diff options
304 files changed, 34897 insertions, 3526 deletions
@@ -63,6 +63,7 @@ Dengcheng Zhu <[email protected]> <[email protected]> Dengcheng Zhu <[email protected]> <[email protected]> Dengcheng Zhu <[email protected]> <[email protected]> Dengcheng Zhu <[email protected]> <[email protected]> Dmitry Eremin-Solenikov <[email protected]> Dmitry Safonov <[email protected]> <[email protected]> Dmitry Safonov <[email protected]> <[email protected]> diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt index c388f7d9feb1..27e1b4cebfbd 100644 --- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt +++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt @@ -10,8 +10,10 @@ Required properties: - #size-cells: 0 - spi-max-frequency: Maximum frequency of the SPI bus the chip can operate at should be less than or equal to 18 MHz. - - data-ready-gpios: Interrupt GPIO for data and error reporting. - device-wake-gpios: Wake up GPIO to wake up the TCAN device. + - interrupt-parent: the phandle to the interrupt controller which provides + the interrupt. + - interrupts: interrupt specification for data-ready. See Documentation/devicetree/bindings/net/can/m_can.txt for additional required property details. @@ -30,7 +32,8 @@ tcan4x5x: tcan4x5x@0 { #size-cells = <1>; spi-max-frequency = <10000000>; bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; - data-ready-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio1>; + interrupts = <14 GPIO_ACTIVE_LOW>; device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; diff --git a/Documentation/devicetree/bindings/net/dsa/mt7530.txt b/Documentation/devicetree/bindings/net/dsa/mt7530.txt index 47aa205ee0bd..c5ed5d25f642 100644 --- a/Documentation/devicetree/bindings/net/dsa/mt7530.txt +++ b/Documentation/devicetree/bindings/net/dsa/mt7530.txt @@ -35,6 +35,42 @@ Required properties for the child nodes within ports container: - phy-mode: String, must be either "trgmii" or "rgmii" for port labeled "cpu". +Port 5 of the switch is muxed between: +1. GMAC5: GMAC5 can interface with another external MAC or PHY. +2. PHY of port 0 or port 4: PHY interfaces with an external MAC like 2nd GMAC + of the SOC. Used in many setups where port 0/4 becomes the WAN port. + Note: On a MT7621 SOC with integrated switch: 2nd GMAC can only connected to + GMAC5 when the gpios for RGMII2 (GPIO 22-33) are not used and not + connected to external component! + +Port 5 modes/configurations: +1. Port 5 is disabled and isolated: An external phy can interface to the 2nd + GMAC of the SOC. + In the case of a build-in MT7530 switch, port 5 shares the RGMII bus with 2nd + GMAC and an optional external phy. Mind the GPIO/pinctl settings of the SOC! +2. Port 5 is muxed to PHY of port 0/4: Port 0/4 interfaces with 2nd GMAC. + It is a simple MAC to PHY interface, port 5 needs to be setup for xMII mode + and RGMII delay. +3. Port 5 is muxed to GMAC5 and can interface to an external phy. + Port 5 becomes an extra switch port. + Only works on platform where external phy TX<->RX lines are swapped. + Like in the Ubiquiti ER-X-SFP. +4. Port 5 is muxed to GMAC5 and interfaces with the 2nd GAMC as 2nd CPU port. + Currently a 2nd CPU port is not supported by DSA code. + +Depending on how the external PHY is wired: +1. normal: The PHY can only connect to 2nd GMAC but not to the switch +2. swapped: RGMII TX, RX are swapped; external phy interface with the switch as + a ethernet port. But can't interface to the 2nd GMAC. + +Based on the DT the port 5 mode is configured. + +Driver tries to lookup the phy-handle of the 2nd GMAC of the master device. +When phy-handle matches PHY of port 0 or 4 then port 5 set-up as mode 2. +phy-mode must be set, see also example 2 below! + * mt7621: phy-mode = "rgmii-txid"; + * mt7623: phy-mode = "rgmii"; + See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional required, optional properties and how the integrated switch subnodes must be specified. @@ -94,3 +130,181 @@ Example: }; }; }; + +Example 2: MT7621: Port 4 is WAN port: 2nd GMAC -> Port 5 -> PHY port 4. + +ð { + gmac0: mac@0 { + compatible = "mediatek,eth-mac"; + reg = <0>; + phy-mode = "rgmii"; + + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + + gmac1: mac@1 { + compatible = "mediatek,eth-mac"; + reg = <1>; + phy-mode = "rgmii-txid"; + phy-handle = <&phy4>; + }; + + mdio: mdio-bus { + #address-cells = <1>; + #size-cells = <0>; + + /* Internal phy */ + phy4: ethernet-phy@4 { + reg = <4>; + }; + + mt7530: switch@1f { + compatible = "mediatek,mt7621"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x1f>; + pinctrl-names = "default"; + mediatek,mcm; + + resets = <&rstctrl 2>; + reset-names = "mcm"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "lan0"; + }; + + port@1 { + reg = <1>; + label = "lan1"; + }; + + port@2 { + reg = <2>; + label = "lan2"; + }; + + port@3 { + reg = <3>; + label = "lan3"; + }; + +/* Commented out. Port 4 is handled by 2nd GMAC. + port@4 { + reg = <4>; + label = "lan4"; + }; +*/ + + cpu_port0: port@6 { + reg = <6>; + label = "cpu"; + ethernet = <&gmac0>; + phy-mode = "rgmii"; + + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + }; + }; + }; +}; + +Example 3: MT7621: Port 5 is connected to external PHY: Port 5 -> external PHY. + +ð { + gmac0: mac@0 { + compatible = "mediatek,eth-mac"; + reg = <0>; + phy-mode = "rgmii"; + + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + + mdio: mdio-bus { + #address-cells = <1>; + #size-cells = <0>; + + /* External phy */ + ephy5: ethernet-phy@7 { + reg = <7>; + }; + + mt7530: switch@1f { + compatible = "mediatek,mt7621"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x1f>; + pinctrl-names = "default"; + mediatek,mcm; + + resets = <&rstctrl 2>; + reset-names = "mcm"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "lan0"; + }; + + port@1 { + reg = <1>; + label = "lan1"; + }; + + port@2 { + reg = <2>; + label = "lan2"; + }; + + port@3 { + reg = <3>; + label = "lan3"; + }; + + port@4 { + reg = <4>; + label = "lan4"; + }; + + port@5 { + reg = <5>; + label = "lan5"; + phy-mode = "rgmii"; + phy-handle = <&ephy5>; + }; + + cpu_port0: port@6 { + reg = <6>; + label = "cpu"; + ethernet = <&gmac0>; + phy-mode = "rgmii"; + + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + }; + }; + }; +}; diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst index eeedc2e826aa..83f7ae5fc045 100644 --- a/Documentation/networking/af_xdp.rst +++ b/Documentation/networking/af_xdp.rst @@ -153,10 +153,12 @@ an example, if the UMEM is 64k and each chunk is 4k, then the UMEM has Frames passed to the kernel are used for the ingress path (RX rings). -The user application produces UMEM addrs to this ring. Note that the -kernel will mask the incoming addr. E.g. for a chunk size of 2k, the -log2(2048) LSB of the addr will be masked off, meaning that 2048, 2050 -and 3000 refers to the same chunk. +The user application produces UMEM addrs to this ring. Note that, if +running the application with aligned chunk mode, the kernel will mask +the incoming addr. E.g. for a chunk size of 2k, the log2(2048) LSB of +the addr will be masked off, meaning that 2048, 2050 and 3000 refers +to the same chunk. If the user application is run in the unaligned +chunks mode, then the incoming addr will be left untouched. UMEM Completion Ring diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst index 2b7fefe72351..57fec66c5419 100644 --- a/Documentation/networking/device_drivers/index.rst +++ b/Documentation/networking/device_drivers/index.rst @@ -23,6 +23,7 @@ Contents: intel/ice google/gve mellanox/mlx5 + pensando/ionic .. only:: subproject diff --git a/Documentation/networking/device_drivers/mellanox/mlx5.rst b/Documentation/networking/device_drivers/mellanox/mlx5.rst index b30a63dbf4b7..d071c6b49e1f 100644 --- a/Documentation/networking/device_drivers/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/mellanox/mlx5.rst @@ -11,6 +11,7 @@ Contents - `Enabling the driver and kconfig options`_ - `Devlink info`_ +- `Devlink parameters`_ - `Devlink health reporters`_ - `mlx5 tracepoints`_ @@ -122,6 +123,38 @@ User command example:: stored: fw.version 16.26.0100 +Devlink parameters +================== + +flow_steering_mode: Device flow steering mode +--------------------------------------------- +The flow steering mode parameter controls the flow steering mode of the driver. +Two modes are supported: +1. 'dmfs' - Device managed flow steering. +2. 'smfs - Software/Driver managed flow steering. + +In DMFS mode, the HW steering entities are created and managed through the +Firmware. +In SMFS mode, the HW steering entities are created and managed though by +the driver directly into Hardware without firmware intervention. + +SMFS mode is faster and provides better rule inserstion rate compared to default DMFS mode. + +User command examples: + +- Set SMFS flow steering mode:: + + $ devlink dev param set pci/0000:06:00.0 name flow_steering_mode value "smfs" cmode runtime + +- Read device flow steering mode:: + + $ devlink dev param show pci/0000:06:00.0 name flow_steering_mode + pci/0000:06:00.0: + name flow_steering_mode type driver-specific + values: + cmode runtime value smfs + + Devlink health reporters ======================== diff --git a/Documentation/networking/device_drivers/pensando/ionic.rst b/Documentation/networking/device_drivers/pensando/ionic.rst new file mode 100644 index 000000000000..67b6839d516b --- /dev/null +++ b/Documentation/networking/device_drivers/pensando/ionic.rst @@ -0,0 +1,43 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +========================================================== +Linux* Driver for the Pensando(R) Ethernet adapter family +========================================================== + +Pensando Linux Ethernet driver. +Copyright(c) 2019 Pensando Systems, Inc + +Contents +======== + +- Identifying the Adapter +- Support + +Identifying the Adapter +======================= + +To find if one or more Pensando PCI Ethernet devices are installed on the +host, check for the PCI devices:: + + $ lspci -d 1dd8: + b5:00.0 Ethernet controller: Device 1dd8:1002 + b6:00.0 Ethernet controller: Device 1dd8:1002 + +If such devices are listed as above, then the ionic.ko driver should find +and configure them for use. There should be log entries in the kernel +messages such as these:: + + $ dmesg | grep ionic + ionic Pensando Ethernet NIC Driver, ver 0.15.0-k + ionic 0000:b5:00.0 enp181s0: renamed from eth0 + ionic 0000:b6:00.0 enp182s0: renamed from eth0 + +Support +======= +For general Linux networking support, please use the netdev mailing +list, which is monitored by Pensando personnel:: + +For more specific support needs, please use the Pensando driver support +email:: diff --git a/Documentation/networking/devlink-info-versions.rst b/Documentation/networking/devlink-info-versions.rst index 4316342b7746..4914f581b1fd 100644 --- a/Documentation/networking/devlink-info-versions.rst +++ b/Documentation/networking/devlink-info-versions.rst @@ -14,11 +14,27 @@ board.rev Board design revision. +asic.id +======= + +ASIC design identifier. + +asic.rev +======== + +ASIC design revision. + board.manufacture ================= An identifier of the company or the facility which produced the part. +fw +== + +Overall firmware version, often representing the collection of +fw.mgmt, fw.app, etc. + fw.mgmt ======= diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 37eabc17894c..0481d0ffebed 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -17,6 +17,7 @@ Contents: devlink-trap devlink-trap-netdevsim ieee802154 + j1939 kapi z8530book msg_zerocopy diff --git a/Documentation/networking/j1939.rst b/Documentation/networking/j1939.rst new file mode 100644 index 000000000000..ce7e7a044e08 --- /dev/null +++ b/Documentation/networking/j1939.rst @@ -0,0 +1,422 @@ +.. SPDX-License-Identifier: (GPL-2.0 OR MIT) + +=================== +J1939 Documentation +=================== + +Overview / What Is J1939 +======================== + +SAE J1939 defines a higher layer protocol on CAN. It implements a more +sophisticated addressing scheme and extends the maximum packet size above 8 +bytes. Several derived specifications exist, which differ from the original +J1939 on the application level, like MilCAN A, NMEA2000 and especially +ISO-11783 (ISOBUS). This last one specifies the so-called ETP (Extended +Transport Protocol) which is has been included in this implementation. This +results in a maximum packet size of ((2 ^ 24) - 1) * 7 bytes == 111 MiB. + +Specifications used +------------------- + +* SAE J1939-21 : data link layer +* SAE J1939-81 : network management +* ISO 11783-6 : Virtual Terminal (Extended Transport Protocol) + +.. _j1939-motivation: + +Motivation +========== + +Given the fact there's something like SocketCAN with an API similar to BSD +sockets, we found some reasons to justify a kernel implementation for the +addressing and transport methods used by J1939. + +* **Addressing:** when a process on an ECU communicates via J1939, it should + not necessarily know its source address. Although at least one process per + ECU should know the source address. Other processes should be able to reuse + that address. This way, address parameters for different processes + cooperating for the same ECU, are not duplicated. This way of working is + closely related to the UNIX concept where programs do just one thing, and do + it well. + +* **Dynamic addressing:** Address Claiming in J1939 is time critical. + Furthermore data transport should be handled properly during the address + negotiation. Putting this functionality in the kernel eliminates it as a + requirement for _every_ user space process that communicates via J1939. This + results in a consistent J1939 bus with proper addressing. + +* **Transport:** both TP & ETP reuse some PGNs to relay big packets over them. + Different processes may thus use the same TP & ETP PGNs without actually + knowing it. The individual TP & ETP sessions _must_ be serialized + (synchronized) between different processes. The kernel solves this problem + properly and eliminates the serialization (synchronization) as a requirement + for _every_ user space process that communicates via J1939. + +J1939 defines some other features (relaying, gateway, fast packet transport, +...). In-kernel code for these would not contribute to protocol stability. +Therefore, these parts are left to user space. + +The J1939 sockets operate on CAN network devices (see SocketCAN). Any J1939 +user space library operating on CAN raw sockets will still operate properly. +Since such library does not communicate with the in-kernel implementation, care +must be taken that these two do not interfere. In practice, this means they +cannot share ECU addresses. A single ECU (or virtual ECU) address is used by +the library exclusively, or by the in-kernel system exclusively. + +J1939 concepts +============== + +PGN +--- + +The PGN (Parameter Group Number) is a number to identify a packet. The PGN +is composed as follows: +1 bit : Reserved Bit +1 bit : Data Page +8 bits : PF (PDU Format) +8 bits : PS (PDU Specific) + +In J1939-21 distinction is made between PDU1 format (where PF < 240) and PDU2 +format (where PF >= 240). Furthermore, when using PDU2 format, the PS-field +contains a so-called Group Extension, which is part of the PGN. When using PDU2 +format, the Group Extension is set in the PS-field. + +On the other hand, when using PDU1 format, the PS-field contains a so-called +Destination Address, which is _not_ part of the PGN. When communicating a PGN +from user space to kernel (or visa versa) and PDU2 format is used, the PS-field +of the PGN shall be set to zero. The Destination Address shall be set +elsewhere. + +Regarding PGN mapping to 29-bit CAN identifier, the Destination Address shall +be get/set from/to the appropriate bits of the identifier by the kernel. + + +Addressing +---------- + +Both static and dynamic addressing methods can be used. + +For static addresses, no extra checks are made by the kernel, and provided +addresses are considered right. This responsibility is for the OEM or system +integrator. + +For dynamic addressing, so-called Address Claiming, extra support is foreseen +in the kernel. In J1939 any ECU is known by it's 64-bit NAME. At the moment of +a successful address claim, the kernel keeps track of both NAME and source +address being claimed. This serves as a base for filter schemes. By default, +packets with a destination that is not locally, will be rejected. + +Mixed mode packets (from a static to a dynamic address or vice versa) are +allowed. The BSD sockets define separate API calls for getting/setting the +local & remote address and are applicable for J1939 sockets. + +Filtering +--------- + +J1939 defines white list filters per socket that a user can set in order to +receive a subset of the J1939 traffic. Filtering can be based on: + +* SA +* SOURCE_NAME +* PGN + +When multiple filters are in place for a single socket, and a packet comes in +that matches several of those filters, the packet is only received once for +that socket. + +How to Use J1939 +================ + +API Calls +--------- + +On CAN, you first need to open a socket for communicating over a CAN network. +To use J1939, #include <linux/can/j1939.h>. From there, <linux/can.h> will be +included too. To open a socket, use: + +.. code-block:: C + + s = socket(PF_CAN, SOCK_DGRAM, CAN_J1939); + +J1939 does use SOCK_DGRAM sockets. In the J1939 specification, connections are +mentioned in the context of transport protocol sessions. These still deliver +packets to the other end (using several CAN packets). SOCK_STREAM is not +supported. + +After the successful creation of the socket, you would normally use the bind(2) +and/or connect(2) system call to bind the socket to a CAN interface. After +binding and/or connecting the socket, you can read(2) and write(2) from/to the +socket or use send(2), sendto(2), sendmsg(2) and the recv*() counterpart +operations on the socket as usual. There are also J1939 specific socket options +described below. + +In order to send data, a bind(2) must have been successful. bind(2) assigns a +local address to a socket. + +Different from CAN is that the payload data is just the data that get send, +without it's header info. The header info is derived from the sockaddr supplied +to bind(2), connect(2), sendto(2) and recvfrom(2). A write(2) with size 4 will +result in a packet with 4 bytes. + +The sockaddr structure has extensions for use with J1939 as specified below: + +.. code-block:: C + + struct sockaddr_can { + sa_family_t can_family; + int can_ifindex; + union { + struct { + __u64 name; + /* pgn: + * 8 bit: PS in PDU2 case, else 0 + * 8 bit: PF + * 1 bit: DP + * 1 bit: reserved + */ + __u32 pgn; + __u8 addr; + } j1939; + } can_addr; + } + +can_family & can_ifindex serve the same purpose as for other SocketCAN sockets. + +can_addr.j1939.pgn specifies the PGN (max 0x3ffff). Individual bits are +specified above. + +can_addr.j1939.name contains the 64-bit J1939 NAME. + +can_addr.j1939.addr contains the address. + +The bind(2) system call assigns the local address, i.e. the source address when +sending packages. If a PGN during bind(2) is set, it's used as a RX filter. +I.e. only packets with a matching PGN are received. If an ADDR or NAME is set +it is used as a receive filter, too. It will match the destination NAME or ADDR +of the incoming packet. The NAME filter will work only if appropriate Address +Claiming for this name was done on the CAN bus and registered/cached by the +kernel. + +On the other hand connect(2) assigns the remote address, i.e. the destination +address. The PGN from connect(2) is used as the default PGN when sending +packets. If ADDR or NAME is set it will be used as the default destination ADDR +or NAME. Further a set ADDR or NAME during connect(2) is used as a receive +filter. It will match the source NAME or ADDR of the incoming packet. + +Both write(2) and send(2) will send a packet with local address from bind(2) and +the remote address from connect(2). Use sendto(2) to overwrite the destination +address. + +If can_addr.j1939.name is set (!= 0) the NAME is looked up by the kernel and +the corresponding ADDR is used. If can_addr.j1939.name is not set (== 0), +can_addr.j1939.addr is used. + +When creating a socket, reasonable defaults are set. Some options can be +modified with setsockopt(2) & getsockopt(2). + +RX path related options: + +- SO_J1939_FILTER - configure array of filters +- SO_J1939_PROMISC - disable filters set by bind(2) and connect(2) + +By default no broadcast packets can be send or received. To enable sending or +receiving broadcast packets use the socket option SO_BROADCAST: + +.. code-block:: C + + int value = 1; + setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &value, sizeof(value)); + +The following diagram illustrates the RX path: + +.. code:: + + +--------------------+ + | incoming packet | + +--------------------+ + | + V + +--------------------+ + | SO_J1939_PROMISC? | + +--------------------+ + | | + no | | yes + | | + .---------' `---------. + | | + +---------------------------+ | + | bind() + connect() + | | + | SOCK_BROADCAST filter | | + +---------------------------+ | + | | + |<---------------------' + V + +---------------------------+ + | SO_J1939_FILTER | + +---------------------------+ + | + V + +---------------------------+ + | socket recv() | + +---------------------------+ + +TX path related options: +SO_J1939_SEND_PRIO - change default send priority for the socket + +Message Flags during send() and Related System Calls +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +send(2), sendto(2) and sendmsg(2) take a 'flags' argument. Currently +supported flags are: + +* MSG_DONTWAIT, i.e. non-blocking operation. + +recvmsg(2) +^^^^^^^^^ + +In most cases recvmsg(2) is needed if you want to extract more information than +recvfrom(2) can provide. For example package priority and timestamp. The +Destination Address, name and packet priority (if applicable) are attached to +the msghdr in the recvmsg(2) call. They can be extracted using cmsg(3) macros, +with cmsg_level == SOL_J1939 && cmsg_type == SCM_J1939_DEST_ADDR, +SCM_J1939_DEST_NAME or SCM_J1939_PRIO. The returned data is a uint8_t for +priority and dst_addr, and uint64_t for dst_name. + +.. code-block:: C + + uint8_t priority, dst_addr; + uint64_t dst_name; + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + switch (cmsg->cmsg_level) { + case SOL_CAN_J1939: + if (cmsg->cmsg_type == SCM_J1939_DEST_ADDR) + dst_addr = *CMSG_DATA(cmsg); + else if (cmsg->cmsg_type == SCM_J1939_DEST_NAME) + memcpy(&dst_name, CMSG_DATA(cmsg), cmsg->cmsg_len - CMSG_LEN(0)); + else if (cmsg->cmsg_type == SCM_J1939_PRIO) + priority = *CMSG_DATA(cmsg); + break; + } + } + +Dynamic Addressing +------------------ + +Distinction has to be made between using the claimed address and doing an +address claim. To use an already claimed address, one has to fill in the +j1939.name member and provide it to bind(2). If the name had claimed an address +earlier, all further messages being sent will use that address. And the +j1939.addr member will be ignored. + +An exception on this is PGN 0x0ee00. This is the "Address Claim/Cannot Claim +Address" message and the kernel will use the j1939.addr member for that PGN if +necessary. + +To claim an address following code example can be used: + +.. code-block:: C + + struct sockaddr_can baddr = { + .can_family = AF_CAN, + .can_addr.j1939 = { + .name = name, + .addr = J1939_IDLE_ADDR, + .pgn = J1939_NO_PGN, /* to disable bind() rx filter for PGN */ + }, + .can_ifindex = if_nametoindex("can0"), + }; + + bind(sock, (struct sockaddr *)&baddr, sizeof(baddr)); + + /* for Address Claiming broadcast must be allowed */ + int value = 1; + setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &value, sizeof(value)); + + /* configured advanced RX filter with PGN needed for Address Claiming */ + const struct j1939_filter filt[] = { + { + .pgn = J1939_PGN_ADDRESS_CLAIMED, + .pgn_mask = J1939_PGN_PDU1_MAX, + }, { + .pgn = J1939_PGN_ADDRESS_REQUEST, + .pgn_mask = J1939_PGN_PDU1_MAX, + }, { + .pgn = J1939_PGN_ADDRESS_COMMANDED, + .pgn_mask = J1939_PGN_MAX, + }, + }; + + setsockopt(sock, SOL_CAN_J1939, SO_J1939_FILTER, &filt, sizeof(filt)); + + uint64_t dat = htole64(name); + const struct sockaddr_can saddr = { + .can_family = AF_CAN, + .can_addr.j1939 = { + .pgn = J1939_PGN_ADDRESS_CLAIMED, + .addr = J1939_NO_ADDR, + }, + }; + + /* Afterwards do a sendto(2) with data set to the NAME (Little Endian). If the + * NAME provided, does not match the j1939.name provided to bind(2), EPROTO + * will be returned. + */ + sendto(sock, dat, sizeof(dat), 0, (const struct sockaddr *)&saddr, sizeof(saddr)); + +If no-one else contests the address claim within 250ms after transmission, the +kernel marks the NAME-SA assignment as valid. The valid assignment will be kept +among other valid NAME-SA assignments. From that point, any socket bound to the +NAME can send packets. + +If another ECU claims the address, the kernel will mark the NAME-SA expired. +No socket bound to the NAME can send packets (other than address claims). To +claim another address, some socket bound to NAME, must bind(2) again, but with +only j1939.addr changed to the new SA, and must then send a valid address claim +packet. This restarts the state machine in the kernel (and any other +participant on the bus) for this NAME. + +can-utils also include the jacd tool, so it can be used as code example or as +default Address Claiming daemon. + +Send Examples +------------- + +Static Addressing +^^^^^^^^^^^^^^^^^ + +This example will send a PGN (0x12300) from SA 0x20 to DA 0x30. + +Bind: + +.. code-block:: C + + struct sockaddr_can baddr = { + .can_family = AF_CAN, + .can_addr.j1939 = { + .name = J1939_NO_NAME, + .addr = 0x20, + .pgn = J1939_NO_PGN, + }, + .can_ifindex = if_nametoindex("can0"), + }; + + bind(sock, (struct sockaddr *)&baddr, sizeof(baddr)); + +Now, the socket 'sock' is bound to the SA 0x20. Since no connect(2) was called, +at this point we can use only sendto(2) or sendmsg(2). + +Send: + +.. code-block:: C + + const struct sockaddr_can saddr = { + .can_family = AF_CAN, + .can_addr.j1939 = { + .name = J1939_NO_NAME; + .pgn = 0x30, + .addr = 0x12300, + }, + }; + + sendto(sock, dat, sizeof(dat), 0, (const struct sockaddr *)&saddr, sizeof(saddr)); diff --git a/MAINTAINERS b/MAINTAINERS index a081c477d1d1..84bb34727f81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3669,6 +3669,16 @@ F: include/uapi/linux/can/bcm.h F: include/uapi/linux/can/raw.h F: include/uapi/linux/can/gw.h +CAN-J1939 NETWORK LAYER +M: Robin van der Gracht <[email protected]> +M: Oleksij Rempel <[email protected]> +R: Pengutronix Kernel Team <[email protected]> +S: Maintained +F: Documentation/networking/j1939.txt +F: net/can/j1939/ +F: include/uapi/linux/can/j1939.h + CAPABILITIES M: Serge Hallyn <[email protected]> @@ -12608,6 +12618,14 @@ L: [email protected] S: Maintained F: drivers/platform/x86/peaq-wmi.c +PENSANDO ETHERNET DRIVERS +M: Shannon Nelson <[email protected]> +M: Pensando Drivers <[email protected]> +S: Supported +F: Documentation/networking/device_drivers/pensando/ionic.rst +F: drivers/net/ethernet/pensando/ + PER-CPU MEMORY ALLOCATOR M: Dennis Zhou <[email protected]> M: Tejun Heo <[email protected]> diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index cb7ab50b7657..eb73f9f72c46 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -171,6 +171,9 @@ /* Rd = Ra + Rn * Rm */ #define A64_MADD(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \ A64_VARIANT(sf), AARCH64_INSN_DATA3_MADD) +/* Rd = Ra - Rn * Rm */ +#define A64_MSUB(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \ + A64_VARIANT(sf), AARCH64_INSN_DATA3_MSUB) /* Rd = Rn * Rm */ #define A64_MUL(sf, Rd, Rn, Rm) A64_MADD(sf, Rd, A64_ZR, Rn, Rm) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index f5b437f8a22b..cdc79de0c794 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -409,8 +409,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, break; case BPF_MOD: emit(A64_UDIV(is64, tmp, dst, src), ctx); - emit(A64_MUL(is64, tmp, tmp, src), ctx); - emit(A64_SUB(is64, dst, dst, tmp), ctx); + emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); break; } break; @@ -516,8 +515,7 @@ emit_bswap_uxt: case BPF_ALU64 | BPF_MOD | BPF_K: emit_a64_mov_i(is64, tmp2, imm, ctx); emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); - emit(A64_MUL(is64, tmp, tmp, tmp2), ctx); - emit(A64_SUB(is64, dst, dst, tmp), ctx); + emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx); break; case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_LSH | BPF_K: diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 955eb355c2fd..ce88211b9c6c 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -502,7 +502,8 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of * stack space for the large switch statement. */ -static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) +static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, + int i, bool extra_pass) { struct bpf_insn *insn = &fp->insnsi[i]; int jmp_off, last, insn_count = 1; @@ -1011,10 +1012,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i */ case BPF_JMP | BPF_CALL: { - /* - * b0 = (__bpf_call_base + imm)(b1, b2, b3, b4, b5) - */ - const u64 func = (u64)__bpf_call_base + imm; + u64 func; + bool func_addr_fixed; + int ret; + + ret = bpf_jit_get_func_addr(fp, insn, extra_pass, + &func, &func_addr_fixed); + if (ret < 0) + return -1; REG_SET_SEEN(BPF_REG_5); jit->seen |= SEEN_FUNC; @@ -1283,7 +1288,8 @@ branch_oc: /* * Compile eBPF program into s390x code */ -static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) +static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, + bool extra_pass) { int i, insn_count; @@ -1292,7 +1298,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) bpf_jit_prologue(jit, fp->aux->stack_depth); for (i = 0; i < fp->len; i += insn_count) { - insn_count = bpf_jit_insn(jit, fp, i); + insn_count = bpf_jit_insn(jit, fp, i, extra_pass); if (insn_count < 0) return -1; /* Next instruction address */ @@ -1311,6 +1317,12 @@ bool bpf_jit_needs_zext(void) return true; } +struct s390_jit_data { + struct bpf_binary_header *header; + struct bpf_jit ctx; + int pass; +}; + /* * Compile eBPF program "fp" */ @@ -1318,7 +1330,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { struct bpf_prog *tmp, *orig_fp = fp; struct bpf_binary_header *header; + struct s390_jit_data *jit_data; bool tmp_blinded = false; + bool extra_pass = false; struct bpf_jit jit; int pass; @@ -1337,6 +1351,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) fp = tmp; } + jit_data = fp->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + fp = orig_fp; + goto out; + } + fp->aux->jit_data = jit_data; + } + if (jit_data->ctx.addrs) { + jit = jit_data->ctx; + header = jit_data->header; + extra_pass = true; + pass = jit_data->pass + 1; + goto skip_init_ctx; + } + memset(&jit, 0, sizeof(jit)); jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); if (jit.addrs == NULL) { @@ -1349,7 +1380,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * - 3: Calculate program size and addrs arrray */ for (pass = 1; pass <= 3; pass++) { - if (bpf_jit_prog(&jit, fp)) { + if (bpf_jit_prog(&jit, fp, extra_pass)) { fp = orig_fp; goto free_addrs; } @@ -1361,12 +1392,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) fp = orig_fp; goto free_addrs; } + header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole); if (!header) { fp = orig_fp; goto free_addrs; } - if (bpf_jit_prog(&jit, fp)) { +skip_init_ctx: + if (bpf_jit_prog(&jit, fp, extra_pass)) { bpf_jit_binary_free(header); fp = orig_fp; goto free_addrs; @@ -1375,12 +1408,24 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf); print_fn_code(jit.prg_buf, jit.size_prg); } - bpf_jit_binary_lock_ro(header); + if (!fp->is_func || extra_pass) { + bpf_jit_binary_lock_ro(header); + } else { + jit_data->header = header; + jit_data->ctx = jit; + jit_data->pass = pass; + } fp->bpf_func = (void *) jit.prg_buf; fp->jited = 1; fp->jited_len = jit.size; + + if (!fp->is_func || extra_pass) { + bpf_prog_fill_jited_linfo(fp, jit.addrs + 1); free_addrs: - kfree(jit.addrs); + kfree(jit.addrs); + kfree(jit_data); + fp->aux->jit_data = NULL; + } out: if (tmp_blinded) bpf_jit_prog_release_other(fp, fp == orig_fp ? diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c index 635bb4b447fb..e6df5b95ed47 100644 --- a/drivers/crypto/chelsio/chtls/chtls_main.c +++ b/drivers/crypto/chelsio/chtls/chtls_main.c @@ -474,7 +474,8 @@ static int chtls_getsockopt(struct sock *sk, int level, int optname, struct tls_context *ctx = tls_get_ctx(sk); if (level != SOL_TLS) - return ctx->getsockopt(sk, level, optname, optval, optlen); + return ctx->sk_proto->getsockopt(sk, level, + optname, optval, optlen); return do_chtls_getsockopt(sk, optval, optlen); } @@ -541,7 +542,8 @@ static int chtls_setsockopt(struct sock *sk, int level, int optname, struct tls_context *ctx = tls_get_ctx(sk); if (level != SOL_TLS) - return ctx->setsockopt(sk, level, optname, optval, optlen); + return ctx->sk_proto->setsockopt(sk, level, + optname, optval, optlen); return do_chtls_setsockopt(sk, optname, optval, optlen); } diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 6c8645033102..4937947400cd 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -186,136 +186,6 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) return err; } -int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, - u16 uid, phys_addr_t *addr, u32 *obj_id) -{ - struct mlx5_core_dev *dev = dm->dev; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; - u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; - unsigned long *block_map; - u64 icm_start_addr; - u32 log_icm_size; - u32 num_blocks; - u32 max_blocks; - u64 block_idx; - void *sw_icm; - int ret; - - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, - MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); - MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); - - switch (type) { - case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: - icm_start_addr = MLX5_CAP64_DEV_MEM(dev, - steering_sw_icm_start_address); - log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size); - block_map = dm->steering_sw_icm_alloc_blocks; - break; - case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - icm_start_addr = MLX5_CAP64_DEV_MEM(dev, - header_modify_sw_icm_start_address); - log_icm_size = MLX5_CAP_DEV_MEM(dev, - log_header_modify_sw_icm_size); - block_map = dm->header_modify_sw_icm_alloc_blocks; - break; - default: - return -EINVAL; - } - - num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >> - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); - spin_lock(&dm->lock); - block_idx = bitmap_find_next_zero_area(block_map, - max_blocks, - 0, - num_blocks, 0); - - if (block_idx < max_blocks) - bitmap_set(block_map, - block_idx, num_blocks); - - spin_unlock(&dm->lock); - - if (block_idx >= max_blocks) - return -ENOMEM; - - sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm); - icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr, - icm_start_addr); - MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length)); - - ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (ret) { - spin_lock(&dm->lock); - bitmap_clear(block_map, - block_idx, num_blocks); - spin_unlock(&dm->lock); - - return ret; - } - - *addr = icm_start_addr; - *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); - - return 0; -} - -int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, - u16 uid, phys_addr_t addr, u32 obj_id) -{ - struct mlx5_core_dev *dev = dm->dev; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; - u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; - unsigned long *block_map; - u32 num_blocks; - u64 start_idx; - int err; - - num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >> - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - - switch (type) { - case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: - start_idx = - (addr - MLX5_CAP64_DEV_MEM( - dev, steering_sw_icm_start_address)) >> - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - block_map = dm->steering_sw_icm_alloc_blocks; - break; - case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - start_idx = - (addr - - MLX5_CAP64_DEV_MEM( - dev, header_modify_sw_icm_start_address)) >> - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - block_map = dm->header_modify_sw_icm_alloc_blocks; - break; - default: - return -EINVAL; - } - - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, - MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); - MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (err) - return err; - - spin_lock(&dm->lock); - bitmap_clear(block_map, - start_idx, num_blocks); - spin_unlock(&dm->lock); - - return 0; -} - int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) { u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 0572dcba6eae..169cab4915e3 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -65,8 +65,4 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); -int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, - u16 uid, phys_addr_t *addr, u32 *obj_id); -int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, - u16 uid, phys_addr_t addr, u32 obj_id); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index b8841355fcd5..1c8f04abee0c 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -322,11 +322,11 @@ void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) switch (maction->flow_action_raw.sub_type) { case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, - maction->flow_action_raw.action_id); + maction->flow_action_raw.modify_hdr); break; case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, - maction->flow_action_raw.action_id); + maction->flow_action_raw.pkt_reformat); break; case MLX5_IB_FLOW_ACTION_DECAP: break; @@ -352,10 +352,11 @@ mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, if (!maction) return ERR_PTR(-ENOMEM); - ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in, - &maction->flow_action_raw.action_id); + maction->flow_action_raw.modify_hdr = + mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in); - if (ret) { + if (IS_ERR(maction->flow_action_raw.modify_hdr)) { + ret = PTR_ERR(maction->flow_action_raw.modify_hdr); kfree(maction); return ERR_PTR(ret); } @@ -479,11 +480,13 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx( if (ret) return ret; - ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, - in, namespace, - &maction->flow_action_raw.action_id); - if (ret) + maction->flow_action_raw.pkt_reformat = + mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, + in, namespace); + if (IS_ERR(maction->flow_action_raw.pkt_reformat)) { + ret = PTR_ERR(maction->flow_action_raw.pkt_reformat); return ret; + } maction->flow_action_raw.sub_type = MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0569bcab02d4..4e9f1507ffd9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2280,6 +2280,7 @@ static inline int check_dm_type_support(struct mlx5_ib_dev *dev, return -EOPNOTSUPP; break; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW)) return -EPERM; @@ -2344,20 +2345,20 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, struct uverbs_attr_bundle *attrs, int type) { - struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm; + struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev; u64 act_size; int err; /* Allocation size must a multiple of the basic block size * and a power of 2. */ - act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev)); + act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev)); act_size = roundup_pow_of_two(act_size); dm->size = act_size; - err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size, - to_mucontext(ctx)->devx_uid, &dm->dev_addr, - &dm->icm_dm.obj_id); + err = mlx5_dm_sw_icm_alloc(dev, type, act_size, + to_mucontext(ctx)->devx_uid, &dm->dev_addr, + &dm->icm_dm.obj_id); if (err) return err; @@ -2365,9 +2366,9 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, &dm->dev_addr, sizeof(dm->dev_addr)); if (err) - mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size, - to_mucontext(ctx)->devx_uid, - dm->dev_addr, dm->icm_dm.obj_id); + mlx5_dm_sw_icm_dealloc(dev, type, dm->size, + to_mucontext(ctx)->devx_uid, dm->dev_addr, + dm->icm_dm.obj_id); return err; } @@ -2407,8 +2408,14 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, attrs); break; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + err = handle_alloc_dm_sw_icm(context, dm, + attr, attrs, + MLX5_SW_ICM_TYPE_STEERING); + break; case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type); + err = handle_alloc_dm_sw_icm(context, dm, + attr, attrs, + MLX5_SW_ICM_TYPE_HEADER_MODIFY); break; default: err = -EOPNOTSUPP; @@ -2428,6 +2435,7 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context( &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev; struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm; struct mlx5_ib_dm *dm = to_mdm(ibdm); u32 page_idx; @@ -2439,19 +2447,23 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) if (ret) return ret; - page_idx = (dm->dev_addr - - pci_resource_start(dm_db->dev->pdev, 0) - - MLX5_CAP64_DEV_MEM(dm_db->dev, - memic_bar_start_addr)) >> - PAGE_SHIFT; + page_idx = (dm->dev_addr - pci_resource_start(dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr)) >> + PAGE_SHIFT; bitmap_clear(ctx->dm_pages, page_idx, DIV_ROUND_UP(dm->size, PAGE_SIZE)); break; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING, + dm->size, ctx->devx_uid, dm->dev_addr, + dm->icm_dm.obj_id); + if (ret) + return ret; + break; case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size, - ctx->devx_uid, dm->dev_addr, - dm->icm_dm.obj_id); + ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_HEADER_MODIFY, + dm->size, ctx->devx_uid, dm->dev_addr, + dm->icm_dm.obj_id); if (ret) return ret; break; @@ -2646,7 +2658,8 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) return -EINVAL; action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - action->modify_id = maction->flow_action_raw.action_id; + action->modify_hdr = + maction->flow_action_raw.modify_hdr; return 0; } if (maction->flow_action_raw.sub_type == @@ -2663,8 +2676,8 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, return -EINVAL; action->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - action->reformat_id = - maction->flow_action_raw.action_id; + action->pkt_reformat = + maction->flow_action_raw.pkt_reformat; return 0; } /* fall through */ @@ -6096,8 +6109,6 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { - struct mlx5_core_dev *mdev = dev->mdev; - mlx5_ib_cleanup_multiport_master(dev); if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { srcu_barrier(&dev->mr_srcu); @@ -6105,29 +6116,11 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) } WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); - - WARN_ON(dev->dm.steering_sw_icm_alloc_blocks && - !bitmap_empty( - dev->dm.steering_sw_icm_alloc_blocks, - BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) - - MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)))); - - kfree(dev->dm.steering_sw_icm_alloc_blocks); - - WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks && - !bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks, - BIT(MLX5_CAP_DEV_MEM( - mdev, log_header_modify_sw_icm_size) - - MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)))); - - kfree(dev->dm.header_modify_sw_icm_alloc_blocks); } static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; - u64 header_modify_icm_blocks = 0; - u64 steering_icm_blocks = 0; int err; int i; @@ -6174,51 +6167,17 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); - if (MLX5_CAP_GEN_64(mdev, general_obj_types) & - MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) { - if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) { - steering_icm_blocks = - BIT(MLX5_CAP_DEV_MEM(mdev, - log_steering_sw_icm_size) - - MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)); - - dev->dm.steering_sw_icm_alloc_blocks = - kcalloc(BITS_TO_LONGS(steering_icm_blocks), - sizeof(unsigned long), GFP_KERNEL); - if (!dev->dm.steering_sw_icm_alloc_blocks) - goto err_mp; - } - - if (MLX5_CAP64_DEV_MEM(mdev, - header_modify_sw_icm_start_address)) { - header_modify_icm_blocks = BIT( - MLX5_CAP_DEV_MEM( - mdev, log_header_modify_sw_icm_size) - - MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)); - - dev->dm.header_modify_sw_icm_alloc_blocks = - kcalloc(BITS_TO_LONGS(header_modify_icm_blocks), - sizeof(unsigned long), GFP_KERNEL); - if (!dev->dm.header_modify_sw_icm_alloc_blocks) - goto err_dm; - } - } - spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { err = init_srcu_struct(&dev->mr_srcu); if (err) - goto err_dm; + goto err_mp; } return 0; -err_dm: - kfree(dev->dm.steering_sw_icm_alloc_blocks); - kfree(dev->dm.header_modify_sw_icm_alloc_blocks); - err_mp: mlx5_ib_cleanup_multiport_master(dev); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 9ae587b74b12..125a507c10ed 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -868,7 +868,10 @@ struct mlx5_ib_flow_action { struct { struct mlx5_ib_dev *dev; u32 sub_type; - u32 action_id; + union { + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_pkt_reformat *pkt_reformat; + }; } flow_action_raw; }; }; @@ -881,8 +884,6 @@ struct mlx5_dm { */ spinlock_t lock; DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); - unsigned long *steering_sw_icm_alloc_blocks; - unsigned long *header_modify_sw_icm_alloc_blocks; }; struct mlx5_read_counters_attr { diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 483d270664cc..ac86be52b461 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2005 Marc Kleine-Budde, Pengutronix +/* Copyright (C) 2005 Marc Kleine-Budde, Pengutronix * Copyright (C) 2006 Andrey Volkov, Varma Electronics * Copyright (C) 2008-2009 Wolfgang Grandegger <[email protected]> */ @@ -12,6 +11,7 @@ #include <linux/if_arp.h> #include <linux/workqueue.h> #include <linux/can.h> +#include <linux/can/can-ml.h> #include <linux/can/dev.h> #include <linux/can/skb.h> #include <linux/can/netlink.h> @@ -62,8 +62,7 @@ EXPORT_SYMBOL_GPL(can_len2dlc); #define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */ #define CAN_CALC_SYNC_SEG 1 -/* - * Bit-timing calculation derived from: +/* Bit-timing calculation derived from: * * Code based on LinCAN sources and H8S2638 project * Copyright 2004-2006 Pavel Pisa - DCE FELK CVUT cz @@ -75,10 +74,11 @@ EXPORT_SYMBOL_GPL(can_len2dlc); * registers of the CAN controller. You can find more information * in the header file linux/can/netlink.h. */ -static int can_update_sample_point(const struct can_bittiming_const *btc, - unsigned int sample_point_nominal, unsigned int tseg, - unsigned int *tseg1_ptr, unsigned int *tseg2_ptr, - unsigned int *sample_point_error_ptr) +static int +can_update_sample_point(const struct can_bittiming_const *btc, + unsigned int sample_point_nominal, unsigned int tseg, + unsigned int *tseg1_ptr, unsigned int *tseg2_ptr, + unsigned int *sample_point_error_ptr) { unsigned int sample_point_error, best_sample_point_error = UINT_MAX; unsigned int sample_point, best_sample_point = 0; @@ -86,7 +86,9 @@ static int can_update_sample_point(const struct can_bittiming_const *btc, int i; for (i = 0; i <= 1; i++) { - tseg2 = tseg + CAN_CALC_SYNC_SEG - (sample_point_nominal * (tseg + CAN_CALC_SYNC_SEG)) / 1000 - i; + tseg2 = tseg + CAN_CALC_SYNC_SEG - + (sample_point_nominal * (tseg + CAN_CALC_SYNC_SEG)) / + 1000 - i; tseg2 = clamp(tseg2, btc->tseg2_min, btc->tseg2_max); tseg1 = tseg - tseg2; if (tseg1 > btc->tseg1_max) { @@ -94,10 +96,12 @@ static int can_update_sample_point(const struct can_bittiming_const *btc, tseg2 = tseg - tseg1; } - sample_point = 1000 * (tseg + CAN_CALC_SYNC_SEG - tseg2) / (tseg + CAN_CALC_SYNC_SEG); + sample_point = 1000 * (tseg + CAN_CALC_SYNC_SEG - tseg2) / + (tseg + CAN_CALC_SYNC_SEG); sample_point_error = abs(sample_point_nominal - sample_point); - if ((sample_point <= sample_point_nominal) && (sample_point_error < best_sample_point_error)) { + if (sample_point <= sample_point_nominal && + sample_point_error < best_sample_point_error) { best_sample_point = sample_point; best_sample_point_error = sample_point_error; *tseg1_ptr = tseg1; @@ -148,7 +152,7 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, /* choose brp step which is possible in system */ brp = (brp / btc->brp_inc) * btc->brp_inc; - if ((brp < btc->brp_min) || (brp > btc->brp_max)) + if (brp < btc->brp_min || brp > btc->brp_max) continue; bitrate = priv->clock.freq / (brp * tsegall); @@ -162,7 +166,8 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, if (bitrate_error < best_bitrate_error) best_sample_point_error = UINT_MAX; - can_update_sample_point(btc, sample_point_nominal, tseg / 2, &tseg1, &tseg2, &sample_point_error); + can_update_sample_point(btc, sample_point_nominal, tseg / 2, + &tseg1, &tseg2, &sample_point_error); if (sample_point_error > best_sample_point_error) continue; @@ -191,8 +196,9 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, } /* real sample point */ - bt->sample_point = can_update_sample_point(btc, sample_point_nominal, best_tseg, - &tseg1, &tseg2, NULL); + bt->sample_point = can_update_sample_point(btc, sample_point_nominal, + best_tseg, &tseg1, &tseg2, + NULL); v64 = (u64)best_brp * 1000 * 1000 * 1000; do_div(v64, priv->clock.freq); @@ -216,7 +222,8 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, bt->brp = best_brp; /* real bitrate */ - bt->bitrate = priv->clock.freq / (bt->brp * (CAN_CALC_SYNC_SEG + tseg1 + tseg2)); + bt->bitrate = priv->clock.freq / + (bt->brp * (CAN_CALC_SYNC_SEG + tseg1 + tseg2)); return 0; } @@ -229,8 +236,7 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, } #endif /* CONFIG_CAN_CALC_BITTIMING */ -/* - * Checks the validity of the specified bit-timing parameters prop_seg, +/* Checks the validity of the specified bit-timing parameters prop_seg, * phase_seg1, phase_seg2 and sjw and tries to determine the bitrate * prescaler value brp. You can find more information in the header * file linux/can/netlink.h. @@ -270,9 +276,10 @@ static int can_fixup_bittiming(struct net_device *dev, struct can_bittiming *bt, } /* Checks the validity of predefined bitrate settings */ -static int can_validate_bitrate(struct net_device *dev, struct can_bittiming *bt, - const u32 *bitrate_const, - const unsigned int bitrate_const_cnt) +static int +can_validate_bitrate(struct net_device *dev, struct can_bittiming *bt, + const u32 *bitrate_const, + const unsigned int bitrate_const_cnt) { struct can_priv *priv = netdev_priv(dev); unsigned int i; @@ -295,8 +302,7 @@ static int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt, { int err; - /* - * Depending on the given can_bittiming parameter structure the CAN + /* Depending on the given can_bittiming parameter structure the CAN * timing parameters are calculated based on the provided bitrate OR * alternatively the CAN timing parameters (tq, prop_seg, etc.) are * provided directly which are then checked and fixed up. @@ -397,8 +403,7 @@ void can_change_state(struct net_device *dev, struct can_frame *cf, } EXPORT_SYMBOL_GPL(can_change_state); -/* - * Local echo of CAN messages +/* Local echo of CAN messages * * CAN network devices *should* support a local echo functionality * (see Documentation/networking/can.rst). To test the handling of CAN @@ -423,8 +428,7 @@ static void can_flush_echo_skb(struct net_device *dev) } } -/* - * Put the skb on the stack to be looped backed locally lateron +/* Put the skb on the stack to be looped backed locally lateron * * The function is typically called in the start_xmit function * of the device driver. The driver must protect access to @@ -446,7 +450,6 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, } if (!priv->echo_skb[idx]) { - skb = can_create_echo_skb(skb); if (!skb) return; @@ -466,7 +469,8 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, } EXPORT_SYMBOL_GPL(can_put_echo_skb); -struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) +struct sk_buff * +__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) { struct can_priv *priv = netdev_priv(dev); @@ -493,8 +497,7 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 return NULL; } -/* - * Get the skb from the stack and loop it back locally +/* Get the skb from the stack and loop it back locally * * The function is typically called when the TX done interrupt * is handled in the device driver. The driver must protect @@ -515,11 +518,10 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) } EXPORT_SYMBOL_GPL(can_get_echo_skb); -/* - * Remove the skb from the stack and free it. - * - * The function is typically called when TX failed. - */ +/* Remove the skb from the stack and free it. + * + * The function is typically called when TX failed. + */ void can_free_echo_skb(struct net_device *dev, unsigned int idx) { struct can_priv *priv = netdev_priv(dev); @@ -533,9 +535,7 @@ void can_free_echo_skb(struct net_device *dev, unsigned int idx) } EXPORT_SYMBOL_GPL(can_free_echo_skb); -/* - * CAN device restart for bus-off recovery - */ +/* CAN device restart for bus-off recovery */ static void can_restart(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); @@ -546,15 +546,14 @@ static void can_restart(struct net_device *dev) BUG_ON(netif_carrier_ok(dev)); - /* - * No synchronization needed because the device is bus-off and + /* No synchronization needed because the device is bus-off and * no messages can come in or go out. */ can_flush_echo_skb(dev); /* send restart message upstream */ skb = alloc_can_err_skb(dev, &cf); - if (skb == NULL) { + if (!skb) { err = -ENOMEM; goto restart; } @@ -580,7 +579,8 @@ restart: static void can_restart_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); - struct can_priv *priv = container_of(dwork, struct can_priv, restart_work); + struct can_priv *priv = container_of(dwork, struct can_priv, + restart_work); can_restart(priv->dev); } @@ -589,8 +589,7 @@ int can_restart_now(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); - /* - * A manual restart is only permitted if automatic restart is + /* A manual restart is only permitted if automatic restart is * disabled and the device is in the bus-off state */ if (priv->restart_ms) @@ -604,8 +603,7 @@ int can_restart_now(struct net_device *dev) return 0; } -/* - * CAN bus-off +/* CAN bus-off * * This functions should be called when the device goes bus-off to * tell the netif layer that no more packets can be sent or received. @@ -708,9 +706,7 @@ struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf) } EXPORT_SYMBOL_GPL(alloc_can_err_skb); -/* - * Allocate and setup space for the CAN network device - */ +/* Allocate and setup space for the CAN network device */ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, unsigned int txqs, unsigned int rxqs) { @@ -718,11 +714,24 @@ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, struct can_priv *priv; int size; + /* We put the driver's priv, the CAN mid layer priv and the + * echo skb into the netdevice's priv. The memory layout for + * the netdev_priv is like this: + * + * +-------------------------+ + * | driver's priv | + * +-------------------------+ + * | struct can_ml_priv | + * +-------------------------+ + * | array of struct sk_buff | + * +-------------------------+ + */ + + size = ALIGN(sizeof_priv, NETDEV_ALIGN) + sizeof(struct can_ml_priv); + if (echo_skb_max) - size = ALIGN(sizeof_priv, sizeof(struct sk_buff *)) + + size = ALIGN(size, sizeof(struct sk_buff *)) + echo_skb_max * sizeof(struct sk_buff *); - else - size = sizeof_priv; dev = alloc_netdev_mqs(size, "can%d", NET_NAME_UNKNOWN, can_setup, txqs, rxqs); @@ -732,10 +741,12 @@ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, priv = netdev_priv(dev); priv->dev = dev; + dev->ml_priv = (void *)priv + ALIGN(sizeof_priv, NETDEV_ALIGN); + if (echo_skb_max) { priv->echo_skb_max = echo_skb_max; priv->echo_skb = (void *)priv + - ALIGN(sizeof_priv, sizeof(struct sk_buff *)); + (size - echo_skb_max * sizeof(struct sk_buff *)); } priv->state = CAN_STATE_STOPPED; @@ -746,18 +757,14 @@ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, } EXPORT_SYMBOL_GPL(alloc_candev_mqs); -/* - * Free space of the CAN network device - */ +/* Free space of the CAN network device */ void free_candev(struct net_device *dev) { free_netdev(dev); } EXPORT_SYMBOL_GPL(free_candev); -/* - * changing MTU and control mode for CAN/CANFD devices - */ +/* changing MTU and control mode for CAN/CANFD devices */ int can_change_mtu(struct net_device *dev, int new_mtu) { struct can_priv *priv = netdev_priv(dev); @@ -794,8 +801,7 @@ int can_change_mtu(struct net_device *dev, int new_mtu) } EXPORT_SYMBOL_GPL(can_change_mtu); -/* - * Common open function when the device gets opened. +/* Common open function when the device gets opened. * * This function should be called in the open function of the device * driver. @@ -812,7 +818,7 @@ int open_candev(struct net_device *dev) /* For CAN FD the data bitrate has to be >= the arbitration bitrate */ if ((priv->ctrlmode & CAN_CTRLMODE_FD) && (!priv->data_bittiming.bitrate || - (priv->data_bittiming.bitrate < priv->bittiming.bitrate))) { + priv->data_bittiming.bitrate < priv->bittiming.bitrate)) { netdev_err(dev, "incorrect/missing data bit-timing\n"); return -EINVAL; } @@ -848,8 +854,7 @@ void of_can_transceiver(struct net_device *dev) EXPORT_SYMBOL_GPL(of_can_transceiver); #endif -/* - * Common close function for cleanup before the device gets closed. +/* Common close function for cleanup before the device gets closed. * * This function should be called in the close function of the device * driver. @@ -863,9 +868,7 @@ void close_candev(struct net_device *dev) } EXPORT_SYMBOL_GPL(close_candev); -/* - * CAN netlink interface - */ +/* CAN netlink interface */ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { [IFLA_CAN_STATE] = { .type = NLA_U32 }, [IFLA_CAN_CTRLMODE] = { .len = sizeof(struct can_ctrlmode) }, @@ -1209,7 +1212,6 @@ static int can_newlink(struct net *src_net, struct net_device *dev, static void can_dellink(struct net_device *dev, struct list_head *head) { - return; } static struct rtnl_link_ops can_link_ops __read_mostly = { @@ -1227,9 +1229,7 @@ static struct rtnl_link_ops can_link_ops __read_mostly = { .fill_xstats = can_fill_xstats, }; -/* - * Register the CAN network device - */ +/* Register the CAN network device */ int register_candev(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); @@ -1255,22 +1255,19 @@ int register_candev(struct net_device *dev) } EXPORT_SYMBOL_GPL(register_candev); -/* - * Unregister the CAN network device - */ +/* Unregister the CAN network device */ void unregister_candev(struct net_device *dev) { unregister_netdev(dev); } EXPORT_SYMBOL_GPL(unregister_candev); -/* - * Test if a network device is a candev based device +/* Test if a network device is a candev based device * and return the can_priv* if so. */ struct can_priv *safe_candev_priv(struct net_device *dev) { - if ((dev->type != ARPHRD_CAN) || (dev->rtnl_link_ops != &can_link_ops)) + if (dev->type != ARPHRD_CAN || dev->rtnl_link_ops != &can_link_ops) return NULL; return netdev_priv(dev); @@ -1285,7 +1282,7 @@ static __init int can_dev_init(void) err = rtnl_link_register(&can_link_ops); if (!err) - printk(KERN_INFO MOD_DESC "\n"); + pr_info(MOD_DESC "\n"); return err; } diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index f9815fda8840..6f766918211a 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -65,6 +65,7 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices"); #define KVASER_PCIEFD_SYSID_BASE 0x1f020 #define KVASER_PCIEFD_SYSID_VERSION_REG (KVASER_PCIEFD_SYSID_BASE + 0x8) #define KVASER_PCIEFD_SYSID_CANFREQ_REG (KVASER_PCIEFD_SYSID_BASE + 0xc) +#define KVASER_PCIEFD_SYSID_BUSFREQ_REG (KVASER_PCIEFD_SYSID_BASE + 0x10) #define KVASER_PCIEFD_SYSID_BUILD_REG (KVASER_PCIEFD_SYSID_BASE + 0x14) /* Shared receive buffer registers */ #define KVASER_PCIEFD_SRB_BASE 0x1f200 @@ -268,6 +269,7 @@ struct kvaser_pciefd { struct kvaser_pciefd_can *can[KVASER_PCIEFD_MAX_CAN_CHANNELS]; void *dma_data[KVASER_PCIEFD_DMA_COUNT]; u8 nr_channels; + u32 bus_freq; u32 freq; u32 freq_to_ticks_div; }; @@ -666,7 +668,7 @@ static void kvaser_pciefd_pwm_start(struct kvaser_pciefd_can *can) spin_lock_irqsave(&can->lock, irq); /* Set frequency to 500 KHz*/ - top = can->can.clock.freq / (2 * 500000) - 1; + top = can->kv_pcie->bus_freq / (2 * 500000) - 1; pwm_ctrl = top & 0xff; pwm_ctrl |= (top & 0xff) << KVASER_PCIEFD_KCAN_PWM_TOP_SHIFT; @@ -1119,6 +1121,8 @@ static int kvaser_pciefd_setup_board(struct kvaser_pciefd *pcie) return -ENODEV; } + pcie->bus_freq = ioread32(pcie->reg_base + + KVASER_PCIEFD_SYSID_BUSFREQ_REG); pcie->freq = ioread32(pcie->reg_base + KVASER_PCIEFD_SYSID_CANFREQ_REG); pcie->freq_to_ticks_div = pcie->freq / 1000000; if (pcie->freq_to_ticks_div == 0) diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c index a697996d81b4..3db619209fe1 100644 --- a/drivers/net/can/m_can/tcan4x5x.c +++ b/drivers/net/can/m_can/tcan4x5x.c @@ -117,7 +117,6 @@ struct tcan4x5x_priv { struct m_can_classdev *mcan_dev; struct gpio_desc *reset_gpio; - struct gpio_desc *interrupt_gpio; struct gpio_desc *device_wake_gpio; struct gpio_desc *device_state_gpio; struct regulator *power; @@ -236,8 +235,6 @@ static u32 tcan4x5x_read_reg(struct m_can_classdev *cdev, int reg) struct tcan4x5x_priv *priv = cdev->device_data; u32 val; - tcan4x5x_check_wake(priv); - regmap_read(priv->regmap, priv->reg_offset + reg, &val); return val; @@ -248,8 +245,6 @@ static u32 tcan4x5x_read_fifo(struct m_can_classdev *cdev, int addr_offset) struct tcan4x5x_priv *priv = cdev->device_data; u32 val; - tcan4x5x_check_wake(priv); - regmap_read(priv->regmap, priv->mram_start + addr_offset, &val); return val; @@ -259,8 +254,6 @@ static int tcan4x5x_write_reg(struct m_can_classdev *cdev, int reg, int val) { struct tcan4x5x_priv *priv = cdev->device_data; - tcan4x5x_check_wake(priv); - return regmap_write(priv->regmap, priv->reg_offset + reg, val); } @@ -269,8 +262,6 @@ static int tcan4x5x_write_fifo(struct m_can_classdev *cdev, { struct tcan4x5x_priv *priv = cdev->device_data; - tcan4x5x_check_wake(priv); - return regmap_write(priv->regmap, priv->mram_start + addr_offset, val); } @@ -290,18 +281,13 @@ static int tcan4x5x_write_tcan_reg(struct m_can_classdev *cdev, { struct tcan4x5x_priv *priv = cdev->device_data; - tcan4x5x_check_wake(priv); - return regmap_write(priv->regmap, reg, val); } static int tcan4x5x_clear_interrupts(struct m_can_classdev *cdev) { - struct tcan4x5x_priv *tcan4x5x = cdev->device_data; int ret; - tcan4x5x_check_wake(tcan4x5x); - ret = tcan4x5x_write_tcan_reg(cdev, TCAN4X5X_STATUS, TCAN4X5X_CLEAR_ALL_INT); if (ret) @@ -356,13 +342,6 @@ static int tcan4x5x_parse_config(struct m_can_classdev *cdev) { struct tcan4x5x_priv *tcan4x5x = cdev->device_data; - tcan4x5x->interrupt_gpio = devm_gpiod_get(cdev->dev, "data-ready", - GPIOD_IN); - if (IS_ERR(tcan4x5x->interrupt_gpio)) { - dev_err(cdev->dev, "data-ready gpio not defined\n"); - return -EINVAL; - } - tcan4x5x->device_wake_gpio = devm_gpiod_get(cdev->dev, "device-wake", GPIOD_OUT_HIGH); if (IS_ERR(tcan4x5x->device_wake_gpio)) { @@ -381,8 +360,6 @@ static int tcan4x5x_parse_config(struct m_can_classdev *cdev) if (IS_ERR(tcan4x5x->device_state_gpio)) tcan4x5x->device_state_gpio = NULL; - cdev->net->irq = gpiod_to_irq(tcan4x5x->interrupt_gpio); - tcan4x5x->power = devm_regulator_get_optional(cdev->dev, "vsup"); if (PTR_ERR(tcan4x5x->power) == -EPROBE_DEFER) @@ -447,6 +424,7 @@ static int tcan4x5x_can_probe(struct spi_device *spi) mcan_class->is_peripheral = true; mcan_class->bit_timing = &tcan4x5x_bittiming_const; mcan_class->data_timing = &tcan4x5x_data_bittiming_const; + mcan_class->net->irq = spi->irq; spi_set_drvdata(spi, priv); diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index aa97dbc797b6..bb6032211043 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -55,6 +55,7 @@ #include <linux/workqueue.h> #include <linux/can.h> #include <linux/can/skb.h> +#include <linux/can/can-ml.h> MODULE_ALIAS_LDISC(N_SLCAN); MODULE_DESCRIPTION("serial line CAN interface"); @@ -514,6 +515,7 @@ static struct slcan *slc_alloc(void) char name[IFNAMSIZ]; struct net_device *dev = NULL; struct slcan *sl; + int size; for (i = 0; i < maxdev; i++) { dev = slcan_devs[i]; @@ -527,12 +529,14 @@ static struct slcan *slc_alloc(void) return NULL; sprintf(name, "slcan%d", i); - dev = alloc_netdev(sizeof(*sl), name, NET_NAME_UNKNOWN, slc_setup); + size = ALIGN(sizeof(*sl), NETDEV_ALIGN) + sizeof(struct can_ml_priv); + dev = alloc_netdev(size, name, NET_NAME_UNKNOWN, slc_setup); if (!dev) return NULL; dev->base_addr = i; sl = netdev_priv(dev); + dev->ml_priv = (void *)sl + ALIGN(sizeof(*sl), NETDEV_ALIGN); /* Initialize channel control data */ sl->magic = SLCAN_MAGIC; diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 58992fd61cb9..bee9f7b8dad6 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -17,26 +17,6 @@ * - Sascha Hauer, Marc Kleine-Budde, Pengutronix * - Simon Kallweit, intefo AG * Copyright 2007 - * - * Your platform definition file should specify something like: - * - * static struct mcp251x_platform_data mcp251x_info = { - * .oscillator_frequency = 8000000, - * }; - * - * static struct spi_board_info spi_board_info[] = { - * { - * .modalias = "mcp2510", - * // "mcp2515" or "mcp25625" depending on your controller - * .platform_data = &mcp251x_info, - * .irq = IRQ_EINT13, - * .max_speed_hz = 2*1000*1000, - * .chip_select = 2, - * }, - * }; - * - * Please see mcp251x.h for a description of the fields in - * struct mcp251x_platform_data. */ #include <linux/can/core.h> @@ -53,8 +33,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> -#include <linux/of.h> -#include <linux/of_device.h> +#include <linux/property.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/spi/spi.h> @@ -914,7 +893,7 @@ static int mcp251x_open(struct net_device *net) priv->tx_skb = NULL; priv->tx_len = 0; - if (!spi->dev.of_node) + if (!dev_fwnode(&spi->dev)) flags = IRQF_TRIGGER_FALLING; ret = request_threaded_irq(spi->irq, NULL, mcp251x_can_ist, @@ -1006,23 +985,20 @@ MODULE_DEVICE_TABLE(spi, mcp251x_id_table); static int mcp251x_can_probe(struct spi_device *spi) { - const struct of_device_id *of_id = of_match_device(mcp251x_of_match, - &spi->dev); + const void *match = device_get_match_data(&spi->dev); struct mcp251x_platform_data *pdata = dev_get_platdata(&spi->dev); struct net_device *net; struct mcp251x_priv *priv; struct clk *clk; int freq, ret; - clk = devm_clk_get(&spi->dev, NULL); - if (IS_ERR(clk)) { - if (pdata) - freq = pdata->oscillator_frequency; - else - return PTR_ERR(clk); - } else { - freq = clk_get_rate(clk); - } + clk = devm_clk_get_optional(&spi->dev, NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + freq = clk_get_rate(clk); + if (freq == 0 && pdata) + freq = pdata->oscillator_frequency; /* Sanity check */ if (freq < 1000000 || freq > 25000000) @@ -1033,11 +1009,9 @@ static int mcp251x_can_probe(struct spi_device *spi) if (!net) return -ENOMEM; - if (!IS_ERR(clk)) { - ret = clk_prepare_enable(clk); - if (ret) - goto out_free; - } + ret = clk_prepare_enable(clk); + if (ret) + goto out_free; net->netdev_ops = &mcp251x_netdev_ops; net->flags |= IFF_ECHO; @@ -1048,8 +1022,8 @@ static int mcp251x_can_probe(struct spi_device *spi) priv->can.clock.freq = freq / 2; priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY; - if (of_id) - priv->model = (enum mcp251x_model)of_id->data; + if (match) + priv->model = (enum mcp251x_model)match; else priv->model = spi_get_device_id(spi)->driver_data; priv->net = net; @@ -1122,8 +1096,7 @@ error_probe: mcp251x_power_enable(priv->power, 0); out_clk: - if (!IS_ERR(clk)) - clk_disable_unprepare(clk); + clk_disable_unprepare(clk); out_free: free_candev(net); @@ -1141,8 +1114,7 @@ static int mcp251x_can_remove(struct spi_device *spi) mcp251x_power_enable(priv->power, 0); - if (!IS_ERR(priv->clk)) - clk_disable_unprepare(priv->clk); + clk_disable_unprepare(priv->clk); free_candev(net); @@ -1170,10 +1142,8 @@ static int __maybe_unused mcp251x_can_suspend(struct device *dev) priv->after_suspend = AFTER_SUSPEND_DOWN; } - if (!IS_ERR_OR_NULL(priv->power)) { - regulator_disable(priv->power); - priv->after_suspend |= AFTER_SUSPEND_POWER; - } + mcp251x_power_enable(priv->power, 0); + priv->after_suspend |= AFTER_SUSPEND_POWER; return 0; } diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index daf27133887b..39ca14b0585d 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -46,6 +46,7 @@ #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/can.h> +#include <linux/can/can-ml.h> #include <linux/can/dev.h> #include <linux/can/skb.h> #include <linux/slab.h> @@ -152,6 +153,7 @@ static void vcan_setup(struct net_device *dev) dev->addr_len = 0; dev->tx_queue_len = 0; dev->flags = IFF_NOARP; + dev->ml_priv = netdev_priv(dev); /* set flags according to driver capabilities */ if (echo) @@ -162,8 +164,9 @@ static void vcan_setup(struct net_device *dev) } static struct rtnl_link_ops vcan_link_ops __read_mostly = { - .kind = DRV_NAME, - .setup = vcan_setup, + .kind = DRV_NAME, + .priv_size = sizeof(struct can_ml_priv), + .setup = vcan_setup, }; static __init int vcan_init_module(void) diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index b2106292230e..d6ba9426be4d 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -18,6 +18,7 @@ #include <linux/can/dev.h> #include <linux/can/skb.h> #include <linux/can/vxcan.h> +#include <linux/can/can-ml.h> #include <linux/slab.h> #include <net/rtnetlink.h> @@ -146,6 +147,7 @@ static void vxcan_setup(struct net_device *dev) dev->flags = (IFF_NOARP|IFF_ECHO); dev->netdev_ops = &vxcan_netdev_ops; dev->needs_free_netdev = true; + dev->ml_priv = netdev_priv(dev) + ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN); } /* forward declaration for rtnl_create_link() */ @@ -281,7 +283,7 @@ static struct net *vxcan_get_link_net(const struct net_device *dev) static struct rtnl_link_ops vxcan_link_ops = { .kind = DRV_NAME, - .priv_size = sizeof(struct vxcan_priv), + .priv_size = ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN) + sizeof(struct can_ml_priv), .setup = vxcan_setup, .newlink = vxcan_newlink, .dellink = vxcan_dellink, diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index c48e29486b10..1d8d36de4d20 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -13,7 +13,7 @@ #include <linux/of_mdio.h> #include <linux/of_net.h> #include <linux/of_platform.h> -#include <linux/phy.h> +#include <linux/phylink.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> #include <linux/reset.h> @@ -633,61 +633,75 @@ mt7530_get_sset_count(struct dsa_switch *ds, int port, int sset) return ARRAY_SIZE(mt7530_mib); } -static void mt7530_adjust_link(struct dsa_switch *ds, int port, - struct phy_device *phydev) +static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface) { struct mt7530_priv *priv = ds->priv; + u8 tx_delay = 0; + int val; - if (phy_is_pseudo_fixed_link(phydev)) { - dev_dbg(priv->dev, "phy-mode for master device = %x\n", - phydev->interface); + mutex_lock(&priv->reg_mutex); - /* Setup TX circuit incluing relevant PAD and driving */ - mt7530_pad_clk_setup(ds, phydev->interface); + val = mt7530_read(priv, MT7530_MHWTRAP); - if (priv->id == ID_MT7530) { - /* Setup RX circuit, relevant PAD and driving on the - * host which must be placed after the setup on the - * device side is all finished. - */ - mt7623_pad_clk_setup(ds); - } - } else { - u16 lcl_adv = 0, rmt_adv = 0; - u8 flowctrl; - u32 mcr = PMCR_USERP_LINK | PMCR_FORCE_MODE; + val |= MHWTRAP_MANUAL | MHWTRAP_P5_MAC_SEL | MHWTRAP_P5_DIS; + val &= ~MHWTRAP_P5_RGMII_MODE & ~MHWTRAP_PHY0_SEL; - switch (phydev->speed) { - case SPEED_1000: - mcr |= PMCR_FORCE_SPEED_1000; - break; - case SPEED_100: - mcr |= PMCR_FORCE_SPEED_100; - break; - } + switch (priv->p5_intf_sel) { + case P5_INTF_SEL_PHY_P0: + /* MT7530_P5_MODE_GPHY_P0: 2nd GMAC -> P5 -> P0 */ + val |= MHWTRAP_PHY0_SEL; + /* fall through */ + case P5_INTF_SEL_PHY_P4: + /* MT7530_P5_MODE_GPHY_P4: 2nd GMAC -> P5 -> P4 */ + val &= ~MHWTRAP_P5_MAC_SEL & ~MHWTRAP_P5_DIS; + + /* Setup the MAC by default for the cpu port */ + mt7530_write(priv, MT7530_PMCR_P(5), 0x56300); + break; + case P5_INTF_SEL_GMAC5: + /* MT7530_P5_MODE_GMAC: P5 -> External phy or 2nd GMAC */ + val &= ~MHWTRAP_P5_DIS; + break; + case P5_DISABLED: + interface = PHY_INTERFACE_MODE_NA; + break; + default: + dev_err(ds->dev, "Unsupported p5_intf_sel %d\n", + priv->p5_intf_sel); + goto unlock_exit; + } - if (phydev->link) - mcr |= PMCR_FORCE_LNK; + /* Setup RGMII settings */ + if (phy_interface_mode_is_rgmii(interface)) { + val |= MHWTRAP_P5_RGMII_MODE; - if (phydev->duplex) { - mcr |= PMCR_FORCE_FDX; + /* P5 RGMII RX Clock Control: delay setting for 1000M */ + mt7530_write(priv, MT7530_P5RGMIIRXCR, CSR_RGMII_EDGE_ALIGN); - if (phydev->pause) - rmt_adv = LPA_PAUSE_CAP; - if (phydev->asym_pause) - rmt_adv |= LPA_PAUSE_ASYM; + /* Don't set delay in DSA mode */ + if (!dsa_is_dsa_port(priv->ds, 5) && + (interface == PHY_INTERFACE_MODE_RGMII_TXID || + interface == PHY_INTERFACE_MODE_RGMII_ID)) + tx_delay = 4; /* n * 0.5 ns */ - lcl_adv = linkmode_adv_to_lcl_adv_t( - phydev->advertising); - flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); + /* P5 RGMII TX Clock Control: delay x */ + mt7530_write(priv, MT7530_P5RGMIITXCR, + CSR_RGMII_TXC_CFG(0x10 + tx_delay)); - if (flowctrl & FLOW_CTRL_TX) - mcr |= PMCR_TX_FC_EN; - if (flowctrl & FLOW_CTRL_RX) - mcr |= PMCR_RX_FC_EN; - } - mt7530_write(priv, MT7530_PMCR_P(port), mcr); + /* reduce P5 RGMII Tx driving, 8mA */ + mt7530_write(priv, MT7530_IO_DRV_CR, + P5_IO_CLK_DRV(1) | P5_IO_DATA_DRV(1)); } + + mt7530_write(priv, MT7530_MHWTRAP, val); + + dev_dbg(ds->dev, "Setup P5, HWTRAP=0x%x, intf_sel=%s, phy-mode=%s\n", + val, p5_intf_modes(priv->p5_intf_sel), phy_modes(interface)); + + priv->p5_interface = interface; + +unlock_exit: + mutex_unlock(&priv->reg_mutex); } static int @@ -698,9 +712,6 @@ mt7530_cpu_port_enable(struct mt7530_priv *priv, mt7530_write(priv, MT7530_PVC_P(port), PORT_SPEC_TAG); - /* Setup the MAC by default for the cpu port */ - mt7530_write(priv, MT7530_PMCR_P(port), PMCR_CPUP_LINK); - /* Disable auto learning on the cpu port */ mt7530_set(priv, MT7530_PSC_P(port), SA_DIS); @@ -731,9 +742,6 @@ mt7530_port_enable(struct dsa_switch *ds, int port, mutex_lock(&priv->reg_mutex); - /* Setup the MAC for the user port */ - mt7530_write(priv, MT7530_PMCR_P(port), PMCR_USERP_LINK); - /* Allow the user port gets connected to the cpu port and also * restore the port matrix if the port is the member of a certain * bridge. @@ -742,7 +750,7 @@ mt7530_port_enable(struct dsa_switch *ds, int port, priv->ports[port].enable = true; mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK, priv->ports[port].pm); - mt7530_port_set_status(priv, port, 1); + mt7530_port_set_status(priv, port, 0); mutex_unlock(&priv->reg_mutex); @@ -1232,10 +1240,13 @@ static int mt7530_setup(struct dsa_switch *ds) { struct mt7530_priv *priv = ds->priv; - int ret, i; - u32 id, val; - struct device_node *dn; + struct device_node *phy_node; + struct device_node *mac_np; struct mt7530_dummy_poll p; + phy_interface_t interface; + struct device_node *dn; + u32 id, val; + int ret, i; /* The parent node of master netdev which holds the common system * controller also is the container for two GMACs nodes representing @@ -1305,6 +1316,8 @@ mt7530_setup(struct dsa_switch *ds) val |= MHWTRAP_MANUAL; mt7530_write(priv, MT7530_MHWTRAP, val); + priv->p6_interface = PHY_INTERFACE_MODE_NA; + /* Enable and reset MIB counters */ mt7530_mib_reset(ds); @@ -1321,6 +1334,40 @@ mt7530_setup(struct dsa_switch *ds) mt7530_port_disable(ds, i); } + /* Setup port 5 */ + priv->p5_intf_sel = P5_DISABLED; + interface = PHY_INTERFACE_MODE_NA; + + if (!dsa_is_unused_port(ds, 5)) { + priv->p5_intf_sel = P5_INTF_SEL_GMAC5; + interface = of_get_phy_mode(ds->ports[5].dn); + } else { + /* Scan the ethernet nodes. look for GMAC1, lookup used phy */ + for_each_child_of_node(dn, mac_np) { + if (!of_device_is_compatible(mac_np, + "mediatek,eth-mac")) + continue; + + ret = of_property_read_u32(mac_np, "reg", &id); + if (ret < 0 || id != 1) + continue; + + phy_node = of_parse_phandle(mac_np, "phy-handle", 0); + if (phy_node->parent == priv->dev->of_node->parent) { + interface = of_get_phy_mode(mac_np); + id = of_mdio_parse_addr(ds->dev, phy_node); + if (id == 0) + priv->p5_intf_sel = P5_INTF_SEL_PHY_P0; + if (id == 4) + priv->p5_intf_sel = P5_INTF_SEL_PHY_P4; + } + of_node_put(phy_node); + break; + } + } + + mt7530_setup_port5(ds, interface); + /* Flush the FDB table */ ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL); if (ret < 0) @@ -1329,6 +1376,216 @@ mt7530_setup(struct dsa_switch *ds) return 0; } +static void mt7530_phylink_mac_config(struct dsa_switch *ds, int port, + unsigned int mode, + const struct phylink_link_state *state) +{ + struct mt7530_priv *priv = ds->priv; + u32 mcr_cur, mcr_new; + + switch (port) { + case 0: /* Internal phy */ + case 1: + case 2: + case 3: + case 4: + if (state->interface != PHY_INTERFACE_MODE_GMII) + return; + break; + case 5: /* 2nd cpu port with phy of port 0 or 4 / external phy */ + if (priv->p5_interface == state->interface) + break; + if (!phy_interface_mode_is_rgmii(state->interface) && + state->interface != PHY_INTERFACE_MODE_MII && + state->interface != PHY_INTERFACE_MODE_GMII) + return; + + mt7530_setup_port5(ds, state->interface); + break; + case 6: /* 1st cpu port */ + if (priv->p6_interface == state->interface) + break; + + if (state->interface != PHY_INTERFACE_MODE_RGMII && + state->interface != PHY_INTERFACE_MODE_TRGMII) + return; + + /* Setup TX circuit incluing relevant PAD and driving */ + mt7530_pad_clk_setup(ds, state->interface); + + if (priv->id == ID_MT7530) { + /* Setup RX circuit, relevant PAD and driving on the + * host which must be placed after the setup on the + * device side is all finished. + */ + mt7623_pad_clk_setup(ds); + } + + priv->p6_interface = state->interface; + break; + default: + dev_err(ds->dev, "%s: unsupported port: %i\n", __func__, port); + return; + } + + if (phylink_autoneg_inband(mode)) { + dev_err(ds->dev, "%s: in-band negotiation unsupported\n", + __func__); + return; + } + + mcr_cur = mt7530_read(priv, MT7530_PMCR_P(port)); + mcr_new = mcr_cur; + mcr_new &= ~(PMCR_FORCE_SPEED_1000 | PMCR_FORCE_SPEED_100 | + PMCR_FORCE_FDX | PMCR_TX_FC_EN | PMCR_RX_FC_EN); + mcr_new |= PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | PMCR_BACKOFF_EN | + PMCR_BACKPR_EN | PMCR_FORCE_MODE | PMCR_FORCE_LNK; + + /* Are we connected to external phy */ + if (port == 5 && dsa_is_user_port(ds, 5)) + mcr_new |= PMCR_EXT_PHY; + + switch (state->speed) { + case SPEED_1000: + mcr_new |= PMCR_FORCE_SPEED_1000; + break; + case SPEED_100: + mcr_new |= PMCR_FORCE_SPEED_100; + break; + } + if (state->duplex == DUPLEX_FULL) { + mcr_new |= PMCR_FORCE_FDX; + if (state->pause & MLO_PAUSE_TX) + mcr_new |= PMCR_TX_FC_EN; + if (state->pause & MLO_PAUSE_RX) + mcr_new |= PMCR_RX_FC_EN; + } + + if (mcr_new != mcr_cur) + mt7530_write(priv, MT7530_PMCR_P(port), mcr_new); +} + +static void mt7530_phylink_mac_link_down(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface) +{ + struct mt7530_priv *priv = ds->priv; + + mt7530_port_set_status(priv, port, 0); +} + +static void mt7530_phylink_mac_link_up(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface, + struct phy_device *phydev) +{ + struct mt7530_priv *priv = ds->priv; + + mt7530_port_set_status(priv, port, 1); +} + +static void mt7530_phylink_validate(struct dsa_switch *ds, int port, + unsigned long *supported, + struct phylink_link_state *state) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; + + switch (port) { + case 0: /* Internal phy */ + case 1: + case 2: + case 3: + case 4: + if (state->interface != PHY_INTERFACE_MODE_NA && + state->interface != PHY_INTERFACE_MODE_GMII) + goto unsupported; + break; + case 5: /* 2nd cpu port with phy of port 0 or 4 / external phy */ + if (state->interface != PHY_INTERFACE_MODE_NA && + !phy_interface_mode_is_rgmii(state->interface) && + state->interface != PHY_INTERFACE_MODE_MII && + state->interface != PHY_INTERFACE_MODE_GMII) + goto unsupported; + break; + case 6: /* 1st cpu port */ + if (state->interface != PHY_INTERFACE_MODE_NA && + state->interface != PHY_INTERFACE_MODE_RGMII && + state->interface != PHY_INTERFACE_MODE_TRGMII) + goto unsupported; + break; + default: + dev_err(ds->dev, "%s: unsupported port: %i\n", __func__, port); +unsupported: + linkmode_zero(supported); + return; + } + + phylink_set_port_modes(mask); + phylink_set(mask, Autoneg); + + if (state->interface == PHY_INTERFACE_MODE_TRGMII) { + phylink_set(mask, 1000baseT_Full); + } else { + phylink_set(mask, 10baseT_Half); + phylink_set(mask, 10baseT_Full); + phylink_set(mask, 100baseT_Half); + phylink_set(mask, 100baseT_Full); + + if (state->interface != PHY_INTERFACE_MODE_MII) { + phylink_set(mask, 1000baseT_Half); + phylink_set(mask, 1000baseT_Full); + if (port == 5) + phylink_set(mask, 1000baseX_Full); + } + } + + phylink_set(mask, Pause); + phylink_set(mask, Asym_Pause); + + linkmode_and(supported, supported, mask); + linkmode_and(state->advertising, state->advertising, mask); +} + +static int +mt7530_phylink_mac_link_state(struct dsa_switch *ds, int port, + struct phylink_link_state *state) +{ + struct mt7530_priv *priv = ds->priv; + u32 pmsr; + + if (port < 0 || port >= MT7530_NUM_PORTS) + return -EINVAL; + + pmsr = mt7530_read(priv, MT7530_PMSR_P(port)); + + state->link = (pmsr & PMSR_LINK); + state->an_complete = state->link; + state->duplex = !!(pmsr & PMSR_DPX); + + switch (pmsr & PMSR_SPEED_MASK) { + case PMSR_SPEED_10: + state->speed = SPEED_10; + break; + case PMSR_SPEED_100: + state->speed = SPEED_100; + break; + case PMSR_SPEED_1000: + state->speed = SPEED_1000; + break; + default: + state->speed = SPEED_UNKNOWN; + break; + } + + state->pause &= ~(MLO_PAUSE_RX | MLO_PAUSE_TX); + if (pmsr & PMSR_RX_FC) + state->pause |= MLO_PAUSE_RX; + if (pmsr & PMSR_TX_FC) + state->pause |= MLO_PAUSE_TX; + + return 1; +} + static const struct dsa_switch_ops mt7530_switch_ops = { .get_tag_protocol = mtk_get_tag_protocol, .setup = mt7530_setup, @@ -1337,7 +1594,6 @@ static const struct dsa_switch_ops mt7530_switch_ops = { .phy_write = mt7530_phy_write, .get_ethtool_stats = mt7530_get_ethtool_stats, .get_sset_count = mt7530_get_sset_count, - .adjust_link = mt7530_adjust_link, .port_enable = mt7530_port_enable, .port_disable = mt7530_port_disable, .port_stp_state_set = mt7530_stp_state_set, @@ -1350,6 +1606,11 @@ static const struct dsa_switch_ops mt7530_switch_ops = { .port_vlan_prepare = mt7530_port_vlan_prepare, .port_vlan_add = mt7530_port_vlan_add, .port_vlan_del = mt7530_port_vlan_del, + .phylink_validate = mt7530_phylink_validate, + .phylink_mac_link_state = mt7530_phylink_mac_link_state, + .phylink_mac_config = mt7530_phylink_mac_config, + .phylink_mac_link_down = mt7530_phylink_mac_link_down, + .phylink_mac_link_up = mt7530_phylink_mac_link_up, }; static const struct of_device_id mt7530_of_match[] = { diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index bfac90f48102..ccb9da8cad0d 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -186,6 +186,7 @@ enum mt7530_vlan_port_attr { /* Register for port MAC control register */ #define MT7530_PMCR_P(x) (0x3000 + ((x) * 0x100)) #define PMCR_IFG_XMIT(x) (((x) & 0x3) << 18) +#define PMCR_EXT_PHY BIT(17) #define PMCR_MAC_MODE BIT(16) #define PMCR_FORCE_MODE BIT(15) #define PMCR_TX_EN BIT(14) @@ -198,26 +199,20 @@ enum mt7530_vlan_port_attr { #define PMCR_FORCE_SPEED_100 BIT(2) #define PMCR_FORCE_FDX BIT(1) #define PMCR_FORCE_LNK BIT(0) -#define PMCR_COMMON_LINK (PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | \ - PMCR_BACKOFF_EN | PMCR_BACKPR_EN | \ - PMCR_TX_EN | PMCR_RX_EN | \ - PMCR_TX_FC_EN | PMCR_RX_FC_EN) -#define PMCR_CPUP_LINK (PMCR_COMMON_LINK | PMCR_FORCE_MODE | \ - PMCR_FORCE_SPEED_1000 | \ - PMCR_FORCE_FDX | \ - PMCR_FORCE_LNK) -#define PMCR_USERP_LINK PMCR_COMMON_LINK -#define PMCR_FIXED_LINK (PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | \ - PMCR_FORCE_MODE | PMCR_TX_EN | \ - PMCR_RX_EN | PMCR_BACKPR_EN | \ - PMCR_BACKOFF_EN | \ - PMCR_FORCE_SPEED_1000 | \ - PMCR_FORCE_FDX | \ - PMCR_FORCE_LNK) -#define PMCR_FIXED_LINK_FC (PMCR_FIXED_LINK | \ - PMCR_TX_FC_EN | PMCR_RX_FC_EN) +#define PMCR_SPEED_MASK (PMCR_FORCE_SPEED_100 | \ + PMCR_FORCE_SPEED_1000) #define MT7530_PMSR_P(x) (0x3008 + (x) * 0x100) +#define PMSR_EEE1G BIT(7) +#define PMSR_EEE100M BIT(6) +#define PMSR_RX_FC BIT(5) +#define PMSR_TX_FC BIT(4) +#define PMSR_SPEED_1000 BIT(3) +#define PMSR_SPEED_100 BIT(2) +#define PMSR_SPEED_10 0x00 +#define PMSR_SPEED_MASK (PMSR_SPEED_100 | PMSR_SPEED_1000) +#define PMSR_DPX BIT(1) +#define PMSR_LINK BIT(0) /* Register for MIB */ #define MT7530_PORT_MIB_COUNTER(x) (0x4000 + (x) * 0x100) @@ -251,6 +246,7 @@ enum mt7530_vlan_port_attr { /* Register for hw trap modification */ #define MT7530_MHWTRAP 0x7804 +#define MHWTRAP_PHY0_SEL BIT(20) #define MHWTRAP_MANUAL BIT(16) #define MHWTRAP_P5_MAC_SEL BIT(13) #define MHWTRAP_P6_DIS BIT(8) @@ -408,6 +404,30 @@ struct mt7530_port { u16 pvid; }; +/* Port 5 interface select definitions */ +enum p5_interface_select { + P5_DISABLED = 0, + P5_INTF_SEL_PHY_P0, + P5_INTF_SEL_PHY_P4, + P5_INTF_SEL_GMAC5, +}; + +static const char *p5_intf_modes(unsigned int p5_interface) +{ + switch (p5_interface) { + case P5_DISABLED: + return "DISABLED"; + case P5_INTF_SEL_PHY_P0: + return "PHY P0"; + case P5_INTF_SEL_PHY_P4: + return "PHY P4"; + case P5_INTF_SEL_GMAC5: + return "GMAC5"; + default: + return "unknown"; + } +} + /* struct mt7530_priv - This is the main data structure for holding the state * of the driver * @dev: The device pointer @@ -423,6 +443,8 @@ struct mt7530_port { * @ports: Holding the state among ports * @reg_mutex: The lock for protecting among process accessing * registers + * @p6_interface Holding the current port 6 interface + * @p5_intf_sel: Holding the current port 5 interface select */ struct mt7530_priv { struct device *dev; @@ -435,6 +457,9 @@ struct mt7530_priv { struct gpio_desc *reset; unsigned int id; bool mcm; + phy_interface_t p6_interface; + phy_interface_t p5_interface; + unsigned int p5_intf_sel; struct mt7530_port ports[MT7530_NUM_PORTS]; /* protect among processes for registers access*/ diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 93a2d4deb27c..2830dc283ce6 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -168,6 +168,7 @@ config ETHOC source "drivers/net/ethernet/packetengines/Kconfig" source "drivers/net/ethernet/pasemi/Kconfig" +source "drivers/net/ethernet/pensando/Kconfig" source "drivers/net/ethernet/qlogic/Kconfig" source "drivers/net/ethernet/qualcomm/Kconfig" source "drivers/net/ethernet/rdc/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index fb9155cffcff..061edd22f507 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -97,3 +97,4 @@ obj-$(CONFIG_NET_VENDOR_WIZNET) += wiznet/ obj-$(CONFIG_NET_VENDOR_XILINX) += xilinx/ obj-$(CONFIG_NET_VENDOR_XIRCOM) += xircom/ obj-$(CONFIG_NET_VENDOR_SYNOPSYS) += synopsys/ +obj-$(CONFIG_NET_VENDOR_PENSANDO) += pensando/ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 5402867272be..162d7d8fb295 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1348,7 +1348,7 @@ static u32 ingress_fq_count(struct dpaa2_eth_priv *priv) return total; } -static void wait_for_fq_empty(struct dpaa2_eth_priv *priv) +static void wait_for_ingress_fq_empty(struct dpaa2_eth_priv *priv) { int retries = 10; u32 pending; @@ -1360,6 +1360,31 @@ static void wait_for_fq_empty(struct dpaa2_eth_priv *priv) } while (pending && --retries); } +#define DPNI_TX_PENDING_VER_MAJOR 7 +#define DPNI_TX_PENDING_VER_MINOR 13 +static void wait_for_egress_fq_empty(struct dpaa2_eth_priv *priv) +{ + union dpni_statistics stats; + int retries = 10; + int err; + + if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_TX_PENDING_VER_MAJOR, + DPNI_TX_PENDING_VER_MINOR) < 0) + goto out; + + do { + err = dpni_get_statistics(priv->mc_io, 0, priv->mc_token, 6, + &stats); + if (err) + goto out; + if (stats.page_6.tx_pending_frames == 0) + return; + } while (--retries); + +out: + msleep(500); +} + static int dpaa2_eth_stop(struct net_device *net_dev) { struct dpaa2_eth_priv *priv = netdev_priv(net_dev); @@ -1379,7 +1404,7 @@ static int dpaa2_eth_stop(struct net_device *net_dev) * on WRIOP. After it finishes, wait until all remaining frames on Rx * and Tx conf queues are consumed on NAPI poll. */ - msleep(500); + wait_for_egress_fq_empty(priv); do { dpni_disable(priv->mc_io, 0, priv->mc_token); @@ -1395,7 +1420,7 @@ static int dpaa2_eth_stop(struct net_device *net_dev) */ } - wait_for_fq_empty(priv); + wait_for_ingress_fq_empty(priv); disable_ch_napi(priv); /* Empty the buffer pool */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index 93076fe01b45..0aa1c34019bb 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -28,6 +28,11 @@ static char dpaa2_ethtool_stats[][ETH_GSTRING_LEN] = { "[hw] rx nobuffer discards", "[hw] tx discarded frames", "[hw] tx confirmed frames", + "[hw] tx dequeued bytes", + "[hw] tx dequeued frames", + "[hw] tx rejected bytes", + "[hw] tx rejected frames", + "[hw] tx pending frames", }; #define DPAA2_ETH_NUM_STATS ARRAY_SIZE(dpaa2_ethtool_stats) @@ -188,27 +193,33 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev, struct dpaa2_eth_priv *priv = netdev_priv(net_dev); struct dpaa2_eth_drv_stats *extras; struct dpaa2_eth_ch_stats *ch_stats; + int dpni_stats_page_size[DPNI_STATISTICS_CNT] = { + sizeof(dpni_stats.page_0), + sizeof(dpni_stats.page_1), + sizeof(dpni_stats.page_2), + sizeof(dpni_stats.page_3), + sizeof(dpni_stats.page_4), + sizeof(dpni_stats.page_5), + sizeof(dpni_stats.page_6), + }; memset(data, 0, sizeof(u64) * (DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS)); /* Print standard counters, from DPNI statistics */ - for (j = 0; j <= 2; j++) { + for (j = 0; j <= 6; j++) { + /* We're not interested in pages 4 & 5 for now */ + if (j == 4 || j == 5) + continue; err = dpni_get_statistics(priv->mc_io, 0, priv->mc_token, j, &dpni_stats); - if (err != 0) + if (err == -EINVAL) + /* Older firmware versions don't support all pages */ + memset(&dpni_stats, 0, sizeof(dpni_stats)); + else netdev_warn(net_dev, "dpni_get_stats(%d) failed\n", j); - switch (j) { - case 0: - num_cnt = sizeof(dpni_stats.page_0) / sizeof(u64); - break; - case 1: - num_cnt = sizeof(dpni_stats.page_1) / sizeof(u64); - break; - case 2: - num_cnt = sizeof(dpni_stats.page_2) / sizeof(u64); - break; - } + + num_cnt = dpni_stats_page_size[j] / sizeof(u64); for (k = 0; k < num_cnt; k++) *(data + i++) = dpni_stats.raw.counter[k]; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c index 05e30893dee6..dd54e6953aeb 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c @@ -1470,7 +1470,7 @@ int dpni_get_queue(struct fsl_mc_io *mc_io, * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' * @token: Token of DPNI object * @page: Selects the statistics page to retrieve, see - * DPNI_GET_STATISTICS output. Pages are numbered 0 to 2. + * DPNI_GET_STATISTICS output. Pages are numbered 0 to 6. * @stat: Structure containing the statistics * * Return: '0' on Success; Error code otherwise. diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h index 3e8fc6c7a8da..fd583911b6c0 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h @@ -416,6 +416,26 @@ int dpni_get_tx_data_offset(struct fsl_mc_io *mc_io, * lack of buffers * @page_2.egress_discarded_frames: Egress discarded frame count * @page_2.egress_confirmed_frames: Egress confirmed frame count + * @page3: Page_3 statistics structure + * @page_3.egress_dequeue_bytes: Cumulative count of the number of bytes + * dequeued from egress FQs + * @page_3.egress_dequeue_frames: Cumulative count of the number of frames + * dequeued from egress FQs + * @page_3.egress_reject_bytes: Cumulative count of the number of bytes in + * egress frames whose enqueue was rejected + * @page_3.egress_reject_frames: Cumulative count of the number of egress + * frames whose enqueue was rejected + * @page_4: Page_4 statistics structure: congestion points + * @page_4.cgr_reject_frames: number of rejected frames due to congestion point + * @page_4.cgr_reject_bytes: number of rejected bytes due to congestion point + * @page_5: Page_5 statistics structure: policer + * @page_5.policer_cnt_red: NUmber of red colored frames + * @page_5.policer_cnt_yellow: number of yellow colored frames + * @page_5.policer_cnt_green: number of green colored frames + * @page_5.policer_cnt_re_red: number of recolored red frames + * @page_5.policer_cnt_re_yellow: number of recolored yellow frames + * @page_6: Page_6 statistics structure + * @page_6.tx_pending_frames: total number of frames pending in egress FQs * @raw: raw statistics structure, used to index counters */ union dpni_statistics { @@ -443,6 +463,26 @@ union dpni_statistics { u64 egress_confirmed_frames; } page_2; struct { + u64 egress_dequeue_bytes; + u64 egress_dequeue_frames; + u64 egress_reject_bytes; + u64 egress_reject_frames; + } page_3; + struct { + u64 cgr_reject_frames; + u64 cgr_reject_bytes; + } page_4; + struct { + u64 policer_cnt_red; + u64 policer_cnt_yellow; + u64 policer_cnt_green; + u64 policer_cnt_re_red; + u64 policer_cnt_re_yellow; + } page_5; + struct { + u64 tx_pending_frames; + } page_6; + struct { u64 counter[DPNI_STATISTICS_CNT]; } raw; }; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 412c0340fed9..24bf7f68375f 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -105,43 +105,6 @@ const char gfar_driver_version[] = "2.0"; -static int gfar_enet_open(struct net_device *dev); -static netdev_tx_t gfar_start_xmit(struct sk_buff *skb, struct net_device *dev); -static void gfar_reset_task(struct work_struct *work); -static void gfar_timeout(struct net_device *dev); -static int gfar_close(struct net_device *dev); -static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue, - int alloc_cnt); -static int gfar_set_mac_address(struct net_device *dev); -static int gfar_change_mtu(struct net_device *dev, int new_mtu); -static irqreturn_t gfar_error(int irq, void *dev_id); -static irqreturn_t gfar_transmit(int irq, void *dev_id); -static irqreturn_t gfar_interrupt(int irq, void *dev_id); -static void adjust_link(struct net_device *dev); -static noinline void gfar_update_link_state(struct gfar_private *priv); -static int init_phy(struct net_device *dev); -static int gfar_probe(struct platform_device *ofdev); -static int gfar_remove(struct platform_device *ofdev); -static void free_skb_resources(struct gfar_private *priv); -static void gfar_set_multi(struct net_device *dev); -static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr); -static void gfar_configure_serdes(struct net_device *dev); -static int gfar_poll_rx(struct napi_struct *napi, int budget); -static int gfar_poll_tx(struct napi_struct *napi, int budget); -static int gfar_poll_rx_sq(struct napi_struct *napi, int budget); -static int gfar_poll_tx_sq(struct napi_struct *napi, int budget); -#ifdef CONFIG_NET_POLL_CONTROLLER -static void gfar_netpoll(struct net_device *dev); -#endif -int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit); -static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue); -static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb); -static void gfar_halt_nodisable(struct gfar_private *priv); -static void gfar_clear_exact_match(struct net_device *dev); -static void gfar_set_mac_for_addr(struct net_device *dev, int num, - const u8 *addr); -static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); - MODULE_AUTHOR("Freescale Semiconductor, Inc"); MODULE_DESCRIPTION("Gianfar Ethernet Driver"); MODULE_LICENSE("GPL"); @@ -162,138 +125,6 @@ static void gfar_init_rxbdp(struct gfar_priv_rx_q *rx_queue, struct rxbd8 *bdp, bdp->lstatus = cpu_to_be32(lstatus); } -static void gfar_init_bds(struct net_device *ndev) -{ - struct gfar_private *priv = netdev_priv(ndev); - struct gfar __iomem *regs = priv->gfargrp[0].regs; - struct gfar_priv_tx_q *tx_queue = NULL; - struct gfar_priv_rx_q *rx_queue = NULL; - struct txbd8 *txbdp; - u32 __iomem *rfbptr; - int i, j; - - for (i = 0; i < priv->num_tx_queues; i++) { - tx_queue = priv->tx_queue[i]; - /* Initialize some variables in our dev structure */ - tx_queue->num_txbdfree = tx_queue->tx_ring_size; - tx_queue->dirty_tx = tx_queue->tx_bd_base; - tx_queue->cur_tx = tx_queue->tx_bd_base; - tx_queue->skb_curtx = 0; - tx_queue->skb_dirtytx = 0; - - /* Initialize Transmit Descriptor Ring */ - txbdp = tx_queue->tx_bd_base; - for (j = 0; j < tx_queue->tx_ring_size; j++) { - txbdp->lstatus = 0; - txbdp->bufPtr = 0; - txbdp++; - } - - /* Set the last descriptor in the ring to indicate wrap */ - txbdp--; - txbdp->status = cpu_to_be16(be16_to_cpu(txbdp->status) | - TXBD_WRAP); - } - - rfbptr = ®s->rfbptr0; - for (i = 0; i < priv->num_rx_queues; i++) { - rx_queue = priv->rx_queue[i]; - - rx_queue->next_to_clean = 0; - rx_queue->next_to_use = 0; - rx_queue->next_to_alloc = 0; - - /* make sure next_to_clean != next_to_use after this - * by leaving at least 1 unused descriptor - */ - gfar_alloc_rx_buffs(rx_queue, gfar_rxbd_unused(rx_queue)); - - rx_queue->rfbptr = rfbptr; - rfbptr += 2; - } -} - -static int gfar_alloc_skb_resources(struct net_device *ndev) -{ - void *vaddr; - dma_addr_t addr; - int i, j; - struct gfar_private *priv = netdev_priv(ndev); - struct device *dev = priv->dev; - struct gfar_priv_tx_q *tx_queue = NULL; - struct gfar_priv_rx_q *rx_queue = NULL; - - priv->total_tx_ring_size = 0; - for (i = 0; i < priv->num_tx_queues; i++) - priv->total_tx_ring_size += priv->tx_queue[i]->tx_ring_size; - - priv->total_rx_ring_size = 0; - for (i = 0; i < priv->num_rx_queues; i++) - priv->total_rx_ring_size += priv->rx_queue[i]->rx_ring_size; - - /* Allocate memory for the buffer descriptors */ - vaddr = dma_alloc_coherent(dev, - (priv->total_tx_ring_size * - sizeof(struct txbd8)) + - (priv->total_rx_ring_size * - sizeof(struct rxbd8)), - &addr, GFP_KERNEL); - if (!vaddr) - return -ENOMEM; - - for (i = 0; i < priv->num_tx_queues; i++) { - tx_queue = priv->tx_queue[i]; - tx_queue->tx_bd_base = vaddr; - tx_queue->tx_bd_dma_base = addr; - tx_queue->dev = ndev; - /* enet DMA only understands physical addresses */ - addr += sizeof(struct txbd8) * tx_queue->tx_ring_size; - vaddr += sizeof(struct txbd8) * tx_queue->tx_ring_size; - } - - /* Start the rx descriptor ring where the tx ring leaves off */ - for (i = 0; i < priv->num_rx_queues; i++) { - rx_queue = priv->rx_queue[i]; - rx_queue->rx_bd_base = vaddr; - rx_queue->rx_bd_dma_base = addr; - rx_queue->ndev = ndev; - rx_queue->dev = dev; - addr += sizeof(struct rxbd8) * rx_queue->rx_ring_size; - vaddr += sizeof(struct rxbd8) * rx_queue->rx_ring_size; - } - - /* Setup the skbuff rings */ - for (i = 0; i < priv->num_tx_queues; i++) { - tx_queue = priv->tx_queue[i]; - tx_queue->tx_skbuff = - kmalloc_array(tx_queue->tx_ring_size, - sizeof(*tx_queue->tx_skbuff), - GFP_KERNEL); - if (!tx_queue->tx_skbuff) - goto cleanup; - - for (j = 0; j < tx_queue->tx_ring_size; j++) - tx_queue->tx_skbuff[j] = NULL; - } - - for (i = 0; i < priv->num_rx_queues; i++) { - rx_queue = priv->rx_queue[i]; - rx_queue->rx_buff = kcalloc(rx_queue->rx_ring_size, - sizeof(*rx_queue->rx_buff), - GFP_KERNEL); - if (!rx_queue->rx_buff) - goto cleanup; - } - - gfar_init_bds(ndev); - - return 0; - -cleanup: - free_skb_resources(priv); - return -ENOMEM; -} - static void gfar_init_tx_rx_base(struct gfar_private *priv) { struct gfar __iomem *regs = priv->gfargrp[0].regs; @@ -444,7 +275,7 @@ static void gfar_configure_coalescing(struct gfar_private *priv, } } -void gfar_configure_coalescing_all(struct gfar_private *priv) +static void gfar_configure_coalescing_all(struct gfar_private *priv) { gfar_configure_coalescing(priv, 0xFF, 0xFF); } @@ -477,6 +308,62 @@ static struct net_device_stats *gfar_get_stats(struct net_device *dev) return &dev->stats; } +/* Set the appropriate hash bit for the given addr */ +/* The algorithm works like so: + * 1) Take the Destination Address (ie the multicast address), and + * do a CRC on it (little endian), and reverse the bits of the + * result. + * 2) Use the 8 most significant bits as a hash into a 256-entry + * table. The table is controlled through 8 32-bit registers: + * gaddr0-7. gaddr0's MSB is entry 0, and gaddr7's LSB is + * gaddr7. This means that the 3 most significant bits in the + * hash index which gaddr register to use, and the 5 other bits + * indicate which bit (assuming an IBM numbering scheme, which + * for PowerPC (tm) is usually the case) in the register holds + * the entry. + */ +static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr) +{ + u32 tempval; + struct gfar_private *priv = netdev_priv(dev); + u32 result = ether_crc(ETH_ALEN, addr); + int width = priv->hash_width; + u8 whichbit = (result >> (32 - width)) & 0x1f; + u8 whichreg = result >> (32 - width + 5); + u32 value = (1 << (31-whichbit)); + + tempval = gfar_read(priv->hash_regs[whichreg]); + tempval |= value; + gfar_write(priv->hash_regs[whichreg], tempval); +} + +/* There are multiple MAC Address register pairs on some controllers + * This function sets the numth pair to a given address + */ +static void gfar_set_mac_for_addr(struct net_device *dev, int num, + const u8 *addr) +{ + struct gfar_private *priv = netdev_priv(dev); + struct gfar __iomem *regs = priv->gfargrp[0].regs; + u32 tempval; + u32 __iomem *macptr = ®s->macstnaddr1; + + macptr += num*2; + + /* For a station address of 0x12345678ABCD in transmission + * order (BE), MACnADDR1 is set to 0xCDAB7856 and + * MACnADDR2 is set to 0x34120000. + */ + tempval = (addr[5] << 24) | (addr[4] << 16) | + (addr[3] << 8) | addr[2]; + + gfar_write(macptr, tempval); + + tempval = (addr[1] << 24) | (addr[0] << 16); + + gfar_write(macptr+1, tempval); +} + static int gfar_set_mac_addr(struct net_device *dev, void *p) { eth_mac_addr(dev, p); @@ -486,24 +373,6 @@ static int gfar_set_mac_addr(struct net_device *dev, void *p) return 0; } -static const struct net_device_ops gfar_netdev_ops = { - .ndo_open = gfar_enet_open, - .ndo_start_xmit = gfar_start_xmit, - .ndo_stop = gfar_close, - .ndo_change_mtu = gfar_change_mtu, - .ndo_set_features = gfar_set_features, - .ndo_set_rx_mode = gfar_set_multi, - .ndo_tx_timeout = gfar_timeout, - .ndo_do_ioctl = gfar_ioctl, - .ndo_get_stats = gfar_get_stats, - .ndo_change_carrier = fixed_phy_change_carrier, - .ndo_set_mac_address = gfar_set_mac_addr, - .ndo_validate_addr = eth_validate_addr, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = gfar_netpoll, -#endif -}; - static void gfar_ints_disable(struct gfar_private *priv) { int i; @@ -723,10 +592,53 @@ static int gfar_of_group_count(struct device_node *np) return num; } +/* Reads the controller's registers to determine what interface + * connects it to the PHY. + */ +static phy_interface_t gfar_get_interface(struct net_device *dev) +{ + struct gfar_private *priv = netdev_priv(dev); + struct gfar __iomem *regs = priv->gfargrp[0].regs; + u32 ecntrl; + + ecntrl = gfar_read(®s->ecntrl); + + if (ecntrl & ECNTRL_SGMII_MODE) + return PHY_INTERFACE_MODE_SGMII; + + if (ecntrl & ECNTRL_TBI_MODE) { + if (ecntrl & ECNTRL_REDUCED_MODE) + return PHY_INTERFACE_MODE_RTBI; + else + return PHY_INTERFACE_MODE_TBI; + } + + if (ecntrl & ECNTRL_REDUCED_MODE) { + if (ecntrl & ECNTRL_REDUCED_MII_MODE) { + return PHY_INTERFACE_MODE_RMII; + } + else { + phy_interface_t interface = priv->interface; + + /* This isn't autodetected right now, so it must + * be set by the device tree or platform code. + */ + if (interface == PHY_INTERFACE_MODE_RGMII_ID) + return PHY_INTERFACE_MODE_RGMII_ID; + + return PHY_INTERFACE_MODE_RGMII; + } + } + + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT) + return PHY_INTERFACE_MODE_GMII; + + return PHY_INTERFACE_MODE_MII; +} + static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) { const char *model; - const char *ctype; const void *mac_addr; int err = 0, i; struct net_device *dev = NULL; @@ -889,13 +801,15 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) FSL_GIANFAR_DEV_HAS_TIMER | FSL_GIANFAR_DEV_HAS_RX_FILER; - err = of_property_read_string(np, "phy-connection-type", &ctype); - - /* We only care about rgmii-id. The rest are autodetected */ - if (err == 0 && !strcmp(ctype, "rgmii-id")) - priv->interface = PHY_INTERFACE_MODE_RGMII_ID; + /* Use PHY connection type from the DT node if one is specified there. + * rgmii-id really needs to be specified. Other types can be + * detected by hardware + */ + err = of_get_phy_mode(np); + if (err >= 0) + priv->interface = err; else - priv->interface = PHY_INTERFACE_MODE_MII; + priv->interface = gfar_get_interface(dev); if (of_find_property(np, "fsl,magic-packet", NULL)) priv->device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET; @@ -931,85 +845,6 @@ tx_alloc_failed: return err; } -static int gfar_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr) -{ - struct hwtstamp_config config; - struct gfar_private *priv = netdev_priv(netdev); - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - priv->hwts_tx_en = 0; - break; - case HWTSTAMP_TX_ON: - if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)) - return -ERANGE; - priv->hwts_tx_en = 1; - break; - default: - return -ERANGE; - } - - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - if (priv->hwts_rx_en) { - priv->hwts_rx_en = 0; - reset_gfar(netdev); - } - break; - default: - if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)) - return -ERANGE; - if (!priv->hwts_rx_en) { - priv->hwts_rx_en = 1; - reset_gfar(netdev); - } - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - } - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -static int gfar_hwtstamp_get(struct net_device *netdev, struct ifreq *ifr) -{ - struct hwtstamp_config config; - struct gfar_private *priv = netdev_priv(netdev); - - config.flags = 0; - config.tx_type = priv->hwts_tx_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - config.rx_filter = (priv->hwts_rx_en ? - HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE); - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) -{ - struct phy_device *phydev = dev->phydev; - - if (!netif_running(dev)) - return -EINVAL; - - if (cmd == SIOCSHWTSTAMP) - return gfar_hwtstamp_set(dev, rq); - if (cmd == SIOCGHWTSTAMP) - return gfar_hwtstamp_get(dev, rq); - - if (!phydev) - return -ENODEV; - - return phy_mii_ioctl(phydev, rq, cmd); -} - static u32 cluster_entry_per_class(struct gfar_private *priv, u32 rqfar, u32 class) { @@ -1133,135 +968,6 @@ static void gfar_detect_errata(struct gfar_private *priv) priv->errata); } -void gfar_mac_reset(struct gfar_private *priv) -{ - struct gfar __iomem *regs = priv->gfargrp[0].regs; - u32 tempval; - - /* Reset MAC layer */ - gfar_write(®s->maccfg1, MACCFG1_SOFT_RESET); - - /* We need to delay at least 3 TX clocks */ - udelay(3); - - /* the soft reset bit is not self-resetting, so we need to - * clear it before resuming normal operation - */ - gfar_write(®s->maccfg1, 0); - - udelay(3); - - gfar_rx_offload_en(priv); - - /* Initialize the max receive frame/buffer lengths */ - gfar_write(®s->maxfrm, GFAR_JUMBO_FRAME_SIZE); - gfar_write(®s->mrblr, GFAR_RXB_SIZE); - - /* Initialize the Minimum Frame Length Register */ - gfar_write(®s->minflr, MINFLR_INIT_SETTINGS); - - /* Initialize MACCFG2. */ - tempval = MACCFG2_INIT_SETTINGS; - - /* eTSEC74 erratum: Rx frames of length MAXFRM or MAXFRM-1 - * are marked as truncated. Avoid this by MACCFG2[Huge Frame]=1, - * and by checking RxBD[LG] and discarding larger than MAXFRM. - */ - if (gfar_has_errata(priv, GFAR_ERRATA_74)) - tempval |= MACCFG2_HUGEFRAME | MACCFG2_LENGTHCHECK; - - gfar_write(®s->maccfg2, tempval); - - /* Clear mac addr hash registers */ - gfar_write(®s->igaddr0, 0); - gfar_write(®s->igaddr1, 0); - gfar_write(®s->igaddr2, 0); - gfar_write(®s->igaddr3, 0); - gfar_write(®s->igaddr4, 0); - gfar_write(®s->igaddr5, 0); - gfar_write(®s->igaddr6, 0); - gfar_write(®s->igaddr7, 0); - - gfar_write(®s->gaddr0, 0); - gfar_write(®s->gaddr1, 0); - gfar_write(®s->gaddr2, 0); - gfar_write(®s->gaddr3, 0); - gfar_write(®s->gaddr4, 0); - gfar_write(®s->gaddr5, 0); - gfar_write(®s->gaddr6, 0); - gfar_write(®s->gaddr7, 0); - - if (priv->extended_hash) - gfar_clear_exact_match(priv->ndev); - - gfar_mac_rx_config(priv); - - gfar_mac_tx_config(priv); - - gfar_set_mac_address(priv->ndev); - - gfar_set_multi(priv->ndev); - - /* clear ievent and imask before configuring coalescing */ - gfar_ints_disable(priv); - - /* Configure the coalescing support */ - gfar_configure_coalescing_all(priv); -} - -static void gfar_hw_init(struct gfar_private *priv) -{ - struct gfar __iomem *regs = priv->gfargrp[0].regs; - u32 attrs; - - /* Stop the DMA engine now, in case it was running before - * (The firmware could have used it, and left it running). - */ - gfar_halt(priv); - - gfar_mac_reset(priv); - - /* Zero out the rmon mib registers if it has them */ - if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) { - memset_io(&(regs->rmon), 0, sizeof(struct rmon_mib)); - - /* Mask off the CAM interrupts */ - gfar_write(®s->rmon.cam1, 0xffffffff); - gfar_write(®s->rmon.cam2, 0xffffffff); - } - - /* Initialize ECNTRL */ - gfar_write(®s->ecntrl, ECNTRL_INIT_SETTINGS); - - /* Set the extraction length and index */ - attrs = ATTRELI_EL(priv->rx_stash_size) | - ATTRELI_EI(priv->rx_stash_index); - - gfar_write(®s->attreli, attrs); - - /* Start with defaults, and add stashing - * depending on driver parameters - */ - attrs = ATTR_INIT_SETTINGS; - - if (priv->bd_stash_en) - attrs |= ATTR_BDSTASH; - - if (priv->rx_stash_size != 0) - attrs |= ATTR_BUFSTASH; - - gfar_write(®s->attr, attrs); - - /* FIFO configs */ - gfar_write(®s->fifo_tx_thr, DEFAULT_FIFO_TX_THR); - gfar_write(®s->fifo_tx_starve, DEFAULT_FIFO_TX_STARVE); - gfar_write(®s->fifo_tx_starve_shutoff, DEFAULT_FIFO_TX_STARVE_OFF); - - /* Program the interrupt steering regs, only for MG devices */ - if (priv->num_grps > 1) - gfar_write_isrg(priv); -} - static void gfar_init_addr_hash_table(struct gfar_private *priv) { struct gfar __iomem *regs = priv->gfargrp[0].regs; @@ -1302,578 +1008,6 @@ static void gfar_init_addr_hash_table(struct gfar_private *priv) } } -/* Set up the ethernet device structure, private data, - * and anything else we need before we start - */ -static int gfar_probe(struct platform_device *ofdev) -{ - struct device_node *np = ofdev->dev.of_node; - struct net_device *dev = NULL; - struct gfar_private *priv = NULL; - int err = 0, i; - - err = gfar_of_init(ofdev, &dev); - - if (err) - return err; - - priv = netdev_priv(dev); - priv->ndev = dev; - priv->ofdev = ofdev; - priv->dev = &ofdev->dev; - SET_NETDEV_DEV(dev, &ofdev->dev); - - INIT_WORK(&priv->reset_task, gfar_reset_task); - - platform_set_drvdata(ofdev, priv); - - gfar_detect_errata(priv); - - /* Set the dev->base_addr to the gfar reg region */ - dev->base_addr = (unsigned long) priv->gfargrp[0].regs; - - /* Fill in the dev structure */ - dev->watchdog_timeo = TX_TIMEOUT; - /* MTU range: 50 - 9586 */ - dev->mtu = 1500; - dev->min_mtu = 50; - dev->max_mtu = GFAR_JUMBO_FRAME_SIZE - ETH_HLEN; - dev->netdev_ops = &gfar_netdev_ops; - dev->ethtool_ops = &gfar_ethtool_ops; - - /* Register for napi ...We are registering NAPI for each grp */ - for (i = 0; i < priv->num_grps; i++) { - if (priv->poll_mode == GFAR_SQ_POLLING) { - netif_napi_add(dev, &priv->gfargrp[i].napi_rx, - gfar_poll_rx_sq, GFAR_DEV_WEIGHT); - netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, - gfar_poll_tx_sq, 2); - } else { - netif_napi_add(dev, &priv->gfargrp[i].napi_rx, - gfar_poll_rx, GFAR_DEV_WEIGHT); - netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, - gfar_poll_tx, 2); - } - } - - if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) { - dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | - NETIF_F_RXCSUM; - dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG | - NETIF_F_RXCSUM | NETIF_F_HIGHDMA; - } - - if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) { - dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX; - dev->features |= NETIF_F_HW_VLAN_CTAG_RX; - } - - dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - - gfar_init_addr_hash_table(priv); - - /* Insert receive time stamps into padding alignment bytes, and - * plus 2 bytes padding to ensure the cpu alignment. - */ - if (priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER) - priv->padding = 8 + DEFAULT_PADDING; - - if (dev->features & NETIF_F_IP_CSUM || - priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER) - dev->needed_headroom = GMAC_FCB_LEN; - - /* Initializing some of the rx/tx queue level parameters */ - for (i = 0; i < priv->num_tx_queues; i++) { - priv->tx_queue[i]->tx_ring_size = DEFAULT_TX_RING_SIZE; - priv->tx_queue[i]->num_txbdfree = DEFAULT_TX_RING_SIZE; - priv->tx_queue[i]->txcoalescing = DEFAULT_TX_COALESCE; - priv->tx_queue[i]->txic = DEFAULT_TXIC; - } - - for (i = 0; i < priv->num_rx_queues; i++) { - priv->rx_queue[i]->rx_ring_size = DEFAULT_RX_RING_SIZE; - priv->rx_queue[i]->rxcoalescing = DEFAULT_RX_COALESCE; - priv->rx_queue[i]->rxic = DEFAULT_RXIC; - } - - /* Always enable rx filer if available */ - priv->rx_filer_enable = - (priv->device_flags & FSL_GIANFAR_DEV_HAS_RX_FILER) ? 1 : 0; - /* Enable most messages by default */ - priv->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1; - /* use pritority h/w tx queue scheduling for single queue devices */ - if (priv->num_tx_queues == 1) - priv->prio_sched_en = 1; - - set_bit(GFAR_DOWN, &priv->state); - - gfar_hw_init(priv); - - /* Carrier starts down, phylib will bring it up */ - netif_carrier_off(dev); - - err = register_netdev(dev); - - if (err) { - pr_err("%s: Cannot register net device, aborting\n", dev->name); - goto register_fail; - } - - if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) - priv->wol_supported |= GFAR_WOL_MAGIC; - - if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER) && - priv->rx_filer_enable) - priv->wol_supported |= GFAR_WOL_FILER_UCAST; - - device_set_wakeup_capable(&ofdev->dev, priv->wol_supported); - - /* fill out IRQ number and name fields */ - for (i = 0; i < priv->num_grps; i++) { - struct gfar_priv_grp *grp = &priv->gfargrp[i]; - if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { - sprintf(gfar_irq(grp, TX)->name, "%s%s%c%s", - dev->name, "_g", '0' + i, "_tx"); - sprintf(gfar_irq(grp, RX)->name, "%s%s%c%s", - dev->name, "_g", '0' + i, "_rx"); - sprintf(gfar_irq(grp, ER)->name, "%s%s%c%s", - dev->name, "_g", '0' + i, "_er"); - } else - strcpy(gfar_irq(grp, TX)->name, dev->name); - } - - /* Initialize the filer table */ - gfar_init_filer_table(priv); - - /* Print out the device info */ - netdev_info(dev, "mac: %pM\n", dev->dev_addr); - - /* Even more device info helps when determining which kernel - * provided which set of benchmarks. - */ - netdev_info(dev, "Running with NAPI enabled\n"); - for (i = 0; i < priv->num_rx_queues; i++) - netdev_info(dev, "RX BD ring size for Q[%d]: %d\n", - i, priv->rx_queue[i]->rx_ring_size); - for (i = 0; i < priv->num_tx_queues; i++) - netdev_info(dev, "TX BD ring size for Q[%d]: %d\n", - i, priv->tx_queue[i]->tx_ring_size); - - return 0; - -register_fail: - if (of_phy_is_fixed_link(np)) - of_phy_deregister_fixed_link(np); - unmap_group_regs(priv); - gfar_free_rx_queues(priv); - gfar_free_tx_queues(priv); - of_node_put(priv->phy_node); - of_node_put(priv->tbi_node); - free_gfar_dev(priv); - return err; -} - -static int gfar_remove(struct platform_device *ofdev) -{ - struct gfar_private *priv = platform_get_drvdata(ofdev); - struct device_node *np = ofdev->dev.of_node; - - of_node_put(priv->phy_node); - of_node_put(priv->tbi_node); - - unregister_netdev(priv->ndev); - - if (of_phy_is_fixed_link(np)) - of_phy_deregister_fixed_link(np); - - unmap_group_regs(priv); - gfar_free_rx_queues(priv); - gfar_free_tx_queues(priv); - free_gfar_dev(priv); - - return 0; -} - -#ifdef CONFIG_PM - -static void __gfar_filer_disable(struct gfar_private *priv) -{ - struct gfar __iomem *regs = priv->gfargrp[0].regs; - u32 temp; - - temp = gfar_read(®s->rctrl); - temp &= ~(RCTRL_FILREN | RCTRL_PRSDEP_INIT); - gfar_write(®s->rctrl, temp); -} - -static void __gfar_filer_enable(struct gfar_private *priv) -{ - struct gfar __iomem *regs = priv->gfargrp[0].regs; - u32 temp; - - temp = gfar_read(®s->rctrl); - temp |= RCTRL_FILREN | RCTRL_PRSDEP_INIT; - gfar_write(®s->rctrl, temp); -} - -/* Filer rules implementing wol capabilities */ -static void gfar_filer_config_wol(struct gfar_private *priv) -{ - unsigned int i; - u32 rqfcr; - - __gfar_filer_disable(priv); - - /* clear the filer table, reject any packet by default */ - rqfcr = RQFCR_RJE | RQFCR_CMP_MATCH; - for (i = 0; i <= MAX_FILER_IDX; i++) - gfar_write_filer(priv, i, rqfcr, 0); - - i = 0; - if (priv->wol_opts & GFAR_WOL_FILER_UCAST) { - /* unicast packet, accept it */ - struct net_device *ndev = priv->ndev; - /* get the default rx queue index */ - u8 qindex = (u8)priv->gfargrp[0].rx_queue->qindex; - u32 dest_mac_addr = (ndev->dev_addr[0] << 16) | - (ndev->dev_addr[1] << 8) | - ndev->dev_addr[2]; - - rqfcr = (qindex << 10) | RQFCR_AND | - RQFCR_CMP_EXACT | RQFCR_PID_DAH; - - gfar_write_filer(priv, i++, rqfcr, dest_mac_addr); - - dest_mac_addr = (ndev->dev_addr[3] << 16) | - (ndev->dev_addr[4] << 8) | - ndev->dev_addr[5]; - rqfcr = (qindex << 10) | RQFCR_GPI | - RQFCR_CMP_EXACT | RQFCR_PID_DAL; - gfar_write_filer(priv, i++, rqfcr, dest_mac_addr); - } - - __gfar_filer_enable(priv); -} - -static void gfar_filer_restore_table(struct gfar_private *priv) -{ - u32 rqfcr, rqfpr; - unsigned int i; - - __gfar_filer_disable(priv); - - for (i = 0; i <= MAX_FILER_IDX; i++) { - rqfcr = priv->ftp_rqfcr[i]; - rqfpr = priv->ftp_rqfpr[i]; - gfar_write_filer(priv, i, rqfcr, rqfpr); - } - - __gfar_filer_enable(priv); -} - -/* gfar_start() for Rx only and with the FGPI filer interrupt enabled */ -static void gfar_start_wol_filer(struct gfar_private *priv) -{ - struct gfar __iomem *regs = priv->gfargrp[0].regs; - u32 tempval; - int i = 0; - - /* Enable Rx hw queues */ - gfar_write(®s->rqueue, priv->rqueue); - - /* Initialize DMACTRL to have WWR and WOP */ - tempval = gfar_read(®s->dmactrl); - tempval |= DMACTRL_INIT_SETTINGS; - gfar_write(®s->dmactrl, tempval); - - /* Make sure we aren't stopped */ - tempval = gfar_read(®s->dmactrl); - tempval &= ~DMACTRL_GRS; - gfar_write(®s->dmactrl, tempval); - - for (i = 0; i < priv->num_grps; i++) { - regs = priv->gfargrp[i].regs; - /* Clear RHLT, so that the DMA starts polling now */ - gfar_write(®s->rstat, priv->gfargrp[i].rstat); - /* enable the Filer General Purpose Interrupt */ - gfar_write(®s->imask, IMASK_FGPI); - } - - /* Enable Rx DMA */ - tempval = gfar_read(®s->maccfg1); - tempval |= MACCFG1_RX_EN; - gfar_write(®s->maccfg1, tempval); -} - -static int gfar_suspend(struct device *dev) -{ - struct gfar_private *priv = dev_get_drvdata(dev); - struct net_device *ndev = priv->ndev; - struct gfar __iomem *regs = priv->gfargrp[0].regs; - u32 tempval; - u16 wol = priv->wol_opts; - - if (!netif_running(ndev)) - return 0; - - disable_napi(priv); - netif_tx_lock(ndev); - netif_device_detach(ndev); - netif_tx_unlock(ndev); - - gfar_halt(priv); - - if (wol & GFAR_WOL_MAGIC) { - /* Enable interrupt on Magic Packet */ - gfar_write(®s->imask, IMASK_MAG); - - /* Enable Magic Packet mode */ - tempval = gfar_read(®s->maccfg2); - tempval |= MACCFG2_MPEN; - gfar_write(®s->maccfg2, tempval); - - /* re-enable the Rx block */ - tempval = gfar_read(®s->maccfg1); - tempval |= MACCFG1_RX_EN; - gfar_write(®s->maccfg1, tempval); - - } else if (wol & GFAR_WOL_FILER_UCAST) { - gfar_filer_config_wol(priv); - gfar_start_wol_filer(priv); - - } else { - phy_stop(ndev->phydev); - } - - return 0; -} - -static int gfar_resume(struct device *dev) -{ - struct gfar_private *priv = dev_get_drvdata(dev); - struct net_device *ndev = priv->ndev; - struct gfar __iomem *regs = priv->gfargrp[0].regs; - u32 tempval; - u16 wol = priv->wol_opts; - - if (!netif_running(ndev)) - return 0; - - if (wol & GFAR_WOL_MAGIC) { - /* Disable Magic Packet mode */ - tempval = gfar_read(®s->maccfg2); - tempval &= ~MACCFG2_MPEN; - gfar_write(®s->maccfg2, tempval); - - } else if (wol & GFAR_WOL_FILER_UCAST) { - /* need to stop rx only, tx is already down */ - gfar_halt(priv); - gfar_filer_restore_table(priv); - - } else { - phy_start(ndev->phydev); - } - - gfar_start(priv); - - netif_device_attach(ndev); - enable_napi(priv); - - return 0; -} - -static int gfar_restore(struct device *dev) -{ - struct gfar_private *priv = dev_get_drvdata(dev); - struct net_device *ndev = priv->ndev; - - if (!netif_running(ndev)) { - netif_device_attach(ndev); - - return 0; - } - - gfar_init_bds(ndev); - - gfar_mac_reset(priv); - - gfar_init_tx_rx_base(priv); - - gfar_start(priv); - - priv->oldlink = 0; - priv->oldspeed = 0; - priv->oldduplex = -1; - - if (ndev->phydev) - phy_start(ndev->phydev); - - netif_device_attach(ndev); - enable_napi(priv); - - return 0; -} - -static const struct dev_pm_ops gfar_pm_ops = { - .suspend = gfar_suspend, - .resume = gfar_resume, - .freeze = gfar_suspend, - .thaw = gfar_resume, - .restore = gfar_restore, -}; - -#define GFAR_PM_OPS (&gfar_pm_ops) - -#else - -#define GFAR_PM_OPS NULL - -#endif - -/* Reads the controller's registers to determine what interface - * connects it to the PHY. - */ -static phy_interface_t gfar_get_interface(struct net_device *dev) -{ - struct gfar_private *priv = netdev_priv(dev); - struct gfar __iomem *regs = priv->gfargrp[0].regs; - u32 ecntrl; - - ecntrl = gfar_read(®s->ecntrl); - - if (ecntrl & ECNTRL_SGMII_MODE) - return PHY_INTERFACE_MODE_SGMII; - - if (ecntrl & ECNTRL_TBI_MODE) { - if (ecntrl & ECNTRL_REDUCED_MODE) - return PHY_INTERFACE_MODE_RTBI; - else - return PHY_INTERFACE_MODE_TBI; - } - - if (ecntrl & ECNTRL_REDUCED_MODE) { - if (ecntrl & ECNTRL_REDUCED_MII_MODE) { - return PHY_INTERFACE_MODE_RMII; - } - else { - phy_interface_t interface = priv->interface; - - /* This isn't autodetected right now, so it must - * be set by the device tree or platform code. - */ - if (interface == PHY_INTERFACE_MODE_RGMII_ID) - return PHY_INTERFACE_MODE_RGMII_ID; - - return PHY_INTERFACE_MODE_RGMII; - } - } - - if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT) - return PHY_INTERFACE_MODE_GMII; - - return PHY_INTERFACE_MODE_MII; -} - - -/* Initializes driver's PHY state, and attaches to the PHY. - * Returns 0 on success. - */ -static int init_phy(struct net_device *dev) -{ - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - struct gfar_private *priv = netdev_priv(dev); - phy_interface_t interface; - struct phy_device *phydev; - struct ethtool_eee edata; - - linkmode_set_bit_array(phy_10_100_features_array, - ARRAY_SIZE(phy_10_100_features_array), - mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask); - if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT) - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mask); - - priv->oldlink = 0; - priv->oldspeed = 0; - priv->oldduplex = -1; - - interface = gfar_get_interface(dev); - - phydev = of_phy_connect(dev, priv->phy_node, &adjust_link, 0, - interface); - if (!phydev) { - dev_err(&dev->dev, "could not attach to PHY\n"); - return -ENODEV; - } - - if (interface == PHY_INTERFACE_MODE_SGMII) - gfar_configure_serdes(dev); - - /* Remove any features not supported by the controller */ - linkmode_and(phydev->supported, phydev->supported, mask); - linkmode_copy(phydev->advertising, phydev->supported); - - /* Add support for flow control */ - phy_support_asym_pause(phydev); - - /* disable EEE autoneg, EEE not supported by eTSEC */ - memset(&edata, 0, sizeof(struct ethtool_eee)); - phy_ethtool_set_eee(phydev, &edata); - - return 0; -} - -/* Initialize TBI PHY interface for communicating with the - * SERDES lynx PHY on the chip. We communicate with this PHY - * through the MDIO bus on each controller, treating it as a - * "normal" PHY at the address found in the TBIPA register. We assume - * that the TBIPA register is valid. Either the MDIO bus code will set - * it to a value that doesn't conflict with other PHYs on the bus, or the - * value doesn't matter, as there are no other PHYs on the bus. - */ -static void gfar_configure_serdes(struct net_device *dev) -{ - struct gfar_private *priv = netdev_priv(dev); - struct phy_device *tbiphy; - - if (!priv->tbi_node) { - dev_warn(&dev->dev, "error: SGMII mode requires that the " - "device tree specify a tbi-handle\n"); - return; - } - - tbiphy = of_phy_find_device(priv->tbi_node); - if (!tbiphy) { - dev_err(&dev->dev, "error: Could not get TBI device\n"); - return; - } - - /* If the link is already up, we must already be ok, and don't need to - * configure and reset the TBI<->SerDes link. Maybe U-Boot configured - * everything for us? Resetting it takes the link down and requires - * several seconds for it to come back. - */ - if (phy_read(tbiphy, MII_BMSR) & BMSR_LSTATUS) { - put_device(&tbiphy->mdio.dev); - return; - } - - /* Single clk mode, mii mode off(for serdes communication) */ - phy_write(tbiphy, MII_TBICON, TBICON_CLK_SELECT); - - phy_write(tbiphy, MII_ADVERTISE, - ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE | - ADVERTISE_1000XPSE_ASYM); - - phy_write(tbiphy, MII_BMCR, - BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX | - BMCR_SPEED1000); - - put_device(&tbiphy->mdio.dev); -} - static int __gfar_is_rx_idle(struct gfar_private *priv) { u32 res; @@ -1930,7 +1064,7 @@ retry: } /* Halt the receive and transmit queues */ -void gfar_halt(struct gfar_private *priv) +static void gfar_halt(struct gfar_private *priv) { struct gfar __iomem *regs = priv->gfargrp[0].regs; u32 tempval; @@ -1949,26 +1083,6 @@ void gfar_halt(struct gfar_private *priv) gfar_write(®s->maccfg1, tempval); } -void stop_gfar(struct net_device *dev) -{ - struct gfar_private *priv = netdev_priv(dev); - - netif_tx_stop_all_queues(dev); - - smp_mb__before_atomic(); - set_bit(GFAR_DOWN, &priv->state); - smp_mb__after_atomic(); - - disable_napi(priv); - - /* disable ints and gracefully shut down Rx/Tx DMA */ - gfar_halt(priv); - - phy_stop(dev->phydev); - - free_skb_resources(priv); -} - static void free_skb_tx_queue(struct gfar_priv_tx_q *tx_queue) { struct txbd8 *txbdp; @@ -2061,7 +1175,27 @@ static void free_skb_resources(struct gfar_private *priv) priv->tx_queue[0]->tx_bd_dma_base); } -void gfar_start(struct gfar_private *priv) +void stop_gfar(struct net_device *dev) +{ + struct gfar_private *priv = netdev_priv(dev); + + netif_tx_stop_all_queues(dev); + + smp_mb__before_atomic(); + set_bit(GFAR_DOWN, &priv->state); + smp_mb__after_atomic(); + + disable_napi(priv); + + /* disable ints and gracefully shut down Rx/Tx DMA */ + gfar_halt(priv); + + phy_stop(dev->phydev); + + free_skb_resources(priv); +} + +static void gfar_start(struct gfar_private *priv) { struct gfar __iomem *regs = priv->gfargrp[0].regs; u32 tempval; @@ -2098,103 +1232,207 @@ void gfar_start(struct gfar_private *priv) netif_trans_update(priv->ndev); /* prevent tx timeout */ } -static void free_grp_irqs(struct gfar_priv_grp *grp) +static bool gfar_new_page(struct gfar_priv_rx_q *rxq, struct gfar_rx_buff *rxb) { - free_irq(gfar_irq(grp, TX)->irq, grp); - free_irq(gfar_irq(grp, RX)->irq, grp); - free_irq(gfar_irq(grp, ER)->irq, grp); + struct page *page; + dma_addr_t addr; + + page = dev_alloc_page(); + if (unlikely(!page)) + return false; + + addr = dma_map_page(rxq->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rxq->dev, addr))) { + __free_page(page); + + return false; + } + + rxb->dma = addr; + rxb->page = page; + rxb->page_offset = 0; + + return true; } -static int register_grp_irqs(struct gfar_priv_grp *grp) +static void gfar_rx_alloc_err(struct gfar_priv_rx_q *rx_queue) { - struct gfar_private *priv = grp->priv; - struct net_device *dev = priv->ndev; - int err; + struct gfar_private *priv = netdev_priv(rx_queue->ndev); + struct gfar_extra_stats *estats = &priv->extra_stats; - /* If the device has multiple interrupts, register for - * them. Otherwise, only register for the one - */ - if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { - /* Install our interrupt handlers for Error, - * Transmit, and Receive - */ - err = request_irq(gfar_irq(grp, ER)->irq, gfar_error, 0, - gfar_irq(grp, ER)->name, grp); - if (err < 0) { - netif_err(priv, intr, dev, "Can't get IRQ %d\n", - gfar_irq(grp, ER)->irq); + netdev_err(rx_queue->ndev, "Can't alloc RX buffers\n"); + atomic64_inc(&estats->rx_alloc_err); +} - goto err_irq_fail; - } - enable_irq_wake(gfar_irq(grp, ER)->irq); +static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue, + int alloc_cnt) +{ + struct rxbd8 *bdp; + struct gfar_rx_buff *rxb; + int i; - err = request_irq(gfar_irq(grp, TX)->irq, gfar_transmit, 0, - gfar_irq(grp, TX)->name, grp); - if (err < 0) { - netif_err(priv, intr, dev, "Can't get IRQ %d\n", - gfar_irq(grp, TX)->irq); - goto tx_irq_fail; + i = rx_queue->next_to_use; + bdp = &rx_queue->rx_bd_base[i]; + rxb = &rx_queue->rx_buff[i]; + + while (alloc_cnt--) { + /* try reuse page */ + if (unlikely(!rxb->page)) { + if (unlikely(!gfar_new_page(rx_queue, rxb))) { + gfar_rx_alloc_err(rx_queue); + break; + } } - err = request_irq(gfar_irq(grp, RX)->irq, gfar_receive, 0, - gfar_irq(grp, RX)->name, grp); - if (err < 0) { - netif_err(priv, intr, dev, "Can't get IRQ %d\n", - gfar_irq(grp, RX)->irq); - goto rx_irq_fail; + + /* Setup the new RxBD */ + gfar_init_rxbdp(rx_queue, bdp, + rxb->dma + rxb->page_offset + RXBUF_ALIGNMENT); + + /* Update to the next pointer */ + bdp++; + rxb++; + + if (unlikely(++i == rx_queue->rx_ring_size)) { + i = 0; + bdp = rx_queue->rx_bd_base; + rxb = rx_queue->rx_buff; } - enable_irq_wake(gfar_irq(grp, RX)->irq); + } - } else { - err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, 0, - gfar_irq(grp, TX)->name, grp); - if (err < 0) { - netif_err(priv, intr, dev, "Can't get IRQ %d\n", - gfar_irq(grp, TX)->irq); - goto err_irq_fail; + rx_queue->next_to_use = i; + rx_queue->next_to_alloc = i; +} + +static void gfar_init_bds(struct net_device *ndev) +{ + struct gfar_private *priv = netdev_priv(ndev); + struct gfar __iomem *regs = priv->gfargrp[0].regs; + struct gfar_priv_tx_q *tx_queue = NULL; + struct gfar_priv_rx_q *rx_queue = NULL; + struct txbd8 *txbdp; + u32 __iomem *rfbptr; + int i, j; + + for (i = 0; i < priv->num_tx_queues; i++) { + tx_queue = priv->tx_queue[i]; + /* Initialize some variables in our dev structure */ + tx_queue->num_txbdfree = tx_queue->tx_ring_size; + tx_queue->dirty_tx = tx_queue->tx_bd_base; + tx_queue->cur_tx = tx_queue->tx_bd_base; + tx_queue->skb_curtx = 0; + tx_queue->skb_dirtytx = 0; + + /* Initialize Transmit Descriptor Ring */ + txbdp = tx_queue->tx_bd_base; + for (j = 0; j < tx_queue->tx_ring_size; j++) { + txbdp->lstatus = 0; + txbdp->bufPtr = 0; + txbdp++; } - enable_irq_wake(gfar_irq(grp, TX)->irq); + + /* Set the last descriptor in the ring to indicate wrap */ + txbdp--; + txbdp->status = cpu_to_be16(be16_to_cpu(txbdp->status) | + TXBD_WRAP); } - return 0; + rfbptr = ®s->rfbptr0; + for (i = 0; i < priv->num_rx_queues; i++) { + rx_queue = priv->rx_queue[i]; -rx_irq_fail: - free_irq(gfar_irq(grp, TX)->irq, grp); -tx_irq_fail: - free_irq(gfar_irq(grp, ER)->irq, grp); -err_irq_fail: - return err; + rx_queue->next_to_clean = 0; + rx_queue->next_to_use = 0; + rx_queue->next_to_alloc = 0; + + /* make sure next_to_clean != next_to_use after this + * by leaving at least 1 unused descriptor + */ + gfar_alloc_rx_buffs(rx_queue, gfar_rxbd_unused(rx_queue)); + rx_queue->rfbptr = rfbptr; + rfbptr += 2; + } } -static void gfar_free_irq(struct gfar_private *priv) +static int gfar_alloc_skb_resources(struct net_device *ndev) { - int i; + void *vaddr; + dma_addr_t addr; + int i, j; + struct gfar_private *priv = netdev_priv(ndev); + struct device *dev = priv->dev; + struct gfar_priv_tx_q *tx_queue = NULL; + struct gfar_priv_rx_q *rx_queue = NULL; - /* Free the IRQs */ - if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { - for (i = 0; i < priv->num_grps; i++) - free_grp_irqs(&priv->gfargrp[i]); - } else { - for (i = 0; i < priv->num_grps; i++) - free_irq(gfar_irq(&priv->gfargrp[i], TX)->irq, - &priv->gfargrp[i]); + priv->total_tx_ring_size = 0; + for (i = 0; i < priv->num_tx_queues; i++) + priv->total_tx_ring_size += priv->tx_queue[i]->tx_ring_size; + + priv->total_rx_ring_size = 0; + for (i = 0; i < priv->num_rx_queues; i++) + priv->total_rx_ring_size += priv->rx_queue[i]->rx_ring_size; + + /* Allocate memory for the buffer descriptors */ + vaddr = dma_alloc_coherent(dev, + (priv->total_tx_ring_size * + sizeof(struct txbd8)) + + (priv->total_rx_ring_size * + sizeof(struct rxbd8)), + &addr, GFP_KERNEL); + if (!vaddr) + return -ENOMEM; + + for (i = 0; i < priv->num_tx_queues; i++) { + tx_queue = priv->tx_queue[i]; + tx_queue->tx_bd_base = vaddr; + tx_queue->tx_bd_dma_base = addr; + tx_queue->dev = ndev; + /* enet DMA only understands physical addresses */ + addr += sizeof(struct txbd8) * tx_queue->tx_ring_size; + vaddr += sizeof(struct txbd8) * tx_queue->tx_ring_size; } -} -static int gfar_request_irq(struct gfar_private *priv) -{ - int err, i, j; + /* Start the rx descriptor ring where the tx ring leaves off */ + for (i = 0; i < priv->num_rx_queues; i++) { + rx_queue = priv->rx_queue[i]; + rx_queue->rx_bd_base = vaddr; + rx_queue->rx_bd_dma_base = addr; + rx_queue->ndev = ndev; + rx_queue->dev = dev; + addr += sizeof(struct rxbd8) * rx_queue->rx_ring_size; + vaddr += sizeof(struct rxbd8) * rx_queue->rx_ring_size; + } - for (i = 0; i < priv->num_grps; i++) { - err = register_grp_irqs(&priv->gfargrp[i]); - if (err) { - for (j = 0; j < i; j++) - free_grp_irqs(&priv->gfargrp[j]); - return err; - } + /* Setup the skbuff rings */ + for (i = 0; i < priv->num_tx_queues; i++) { + tx_queue = priv->tx_queue[i]; + tx_queue->tx_skbuff = + kmalloc_array(tx_queue->tx_ring_size, + sizeof(*tx_queue->tx_skbuff), + GFP_KERNEL); + if (!tx_queue->tx_skbuff) + goto cleanup; + + for (j = 0; j < tx_queue->tx_ring_size; j++) + tx_queue->tx_skbuff[j] = NULL; } + for (i = 0; i < priv->num_rx_queues; i++) { + rx_queue = priv->rx_queue[i]; + rx_queue->rx_buff = kcalloc(rx_queue->rx_ring_size, + sizeof(*rx_queue->rx_buff), + GFP_KERNEL); + if (!rx_queue->rx_buff) + goto cleanup; + } + + gfar_init_bds(ndev); + return 0; + +cleanup: + free_skb_resources(priv); + return -ENOMEM; } /* Bring the controller up and running */ @@ -2232,27 +1470,245 @@ int startup_gfar(struct net_device *ndev) return 0; } -/* Called when something needs to use the ethernet device - * Returns 0 for success. +static u32 gfar_get_flowctrl_cfg(struct gfar_private *priv) +{ + struct net_device *ndev = priv->ndev; + struct phy_device *phydev = ndev->phydev; + u32 val = 0; + + if (!phydev->duplex) + return val; + + if (!priv->pause_aneg_en) { + if (priv->tx_pause_en) + val |= MACCFG1_TX_FLOW; + if (priv->rx_pause_en) + val |= MACCFG1_RX_FLOW; + } else { + u16 lcl_adv, rmt_adv; + u8 flowctrl; + /* get link partner capabilities */ + rmt_adv = 0; + if (phydev->pause) + rmt_adv = LPA_PAUSE_CAP; + if (phydev->asym_pause) + rmt_adv |= LPA_PAUSE_ASYM; + + lcl_adv = linkmode_adv_to_lcl_adv_t(phydev->advertising); + flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); + if (flowctrl & FLOW_CTRL_TX) + val |= MACCFG1_TX_FLOW; + if (flowctrl & FLOW_CTRL_RX) + val |= MACCFG1_RX_FLOW; + } + + return val; +} + +static noinline void gfar_update_link_state(struct gfar_private *priv) +{ + struct gfar __iomem *regs = priv->gfargrp[0].regs; + struct net_device *ndev = priv->ndev; + struct phy_device *phydev = ndev->phydev; + struct gfar_priv_rx_q *rx_queue = NULL; + int i; + + if (unlikely(test_bit(GFAR_RESETTING, &priv->state))) + return; + + if (phydev->link) { + u32 tempval1 = gfar_read(®s->maccfg1); + u32 tempval = gfar_read(®s->maccfg2); + u32 ecntrl = gfar_read(®s->ecntrl); + u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW); + + if (phydev->duplex != priv->oldduplex) { + if (!(phydev->duplex)) + tempval &= ~(MACCFG2_FULL_DUPLEX); + else + tempval |= MACCFG2_FULL_DUPLEX; + + priv->oldduplex = phydev->duplex; + } + + if (phydev->speed != priv->oldspeed) { + switch (phydev->speed) { + case 1000: + tempval = + ((tempval & ~(MACCFG2_IF)) | MACCFG2_GMII); + + ecntrl &= ~(ECNTRL_R100); + break; + case 100: + case 10: + tempval = + ((tempval & ~(MACCFG2_IF)) | MACCFG2_MII); + + /* Reduced mode distinguishes + * between 10 and 100 + */ + if (phydev->speed == SPEED_100) + ecntrl |= ECNTRL_R100; + else + ecntrl &= ~(ECNTRL_R100); + break; + default: + netif_warn(priv, link, priv->ndev, + "Ack! Speed (%d) is not 10/100/1000!\n", + phydev->speed); + break; + } + + priv->oldspeed = phydev->speed; + } + + tempval1 &= ~(MACCFG1_TX_FLOW | MACCFG1_RX_FLOW); + tempval1 |= gfar_get_flowctrl_cfg(priv); + + /* Turn last free buffer recording on */ + if ((tempval1 & MACCFG1_TX_FLOW) && !tx_flow_oldval) { + for (i = 0; i < priv->num_rx_queues; i++) { + u32 bdp_dma; + + rx_queue = priv->rx_queue[i]; + bdp_dma = gfar_rxbd_dma_lastfree(rx_queue); + gfar_write(rx_queue->rfbptr, bdp_dma); + } + + priv->tx_actual_en = 1; + } + + if (unlikely(!(tempval1 & MACCFG1_TX_FLOW) && tx_flow_oldval)) + priv->tx_actual_en = 0; + + gfar_write(®s->maccfg1, tempval1); + gfar_write(®s->maccfg2, tempval); + gfar_write(®s->ecntrl, ecntrl); + + if (!priv->oldlink) + priv->oldlink = 1; + + } else if (priv->oldlink) { + priv->oldlink = 0; + priv->oldspeed = 0; + priv->oldduplex = -1; + } + + if (netif_msg_link(priv)) + phy_print_status(phydev); +} + +/* Called every time the controller might need to be made + * aware of new link state. The PHY code conveys this + * information through variables in the phydev structure, and this + * function converts those variables into the appropriate + * register values, and can bring down the device if needed. */ -static int gfar_enet_open(struct net_device *dev) +static void adjust_link(struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); - int err; + struct phy_device *phydev = dev->phydev; - err = init_phy(dev); - if (err) - return err; + if (unlikely(phydev->link != priv->oldlink || + (phydev->link && (phydev->duplex != priv->oldduplex || + phydev->speed != priv->oldspeed)))) + gfar_update_link_state(priv); +} - err = gfar_request_irq(priv); - if (err) - return err; +/* Initialize TBI PHY interface for communicating with the + * SERDES lynx PHY on the chip. We communicate with this PHY + * through the MDIO bus on each controller, treating it as a + * "normal" PHY at the address found in the TBIPA register. We assume + * that the TBIPA register is valid. Either the MDIO bus code will set + * it to a value that doesn't conflict with other PHYs on the bus, or the + * value doesn't matter, as there are no other PHYs on the bus. + */ +static void gfar_configure_serdes(struct net_device *dev) +{ + struct gfar_private *priv = netdev_priv(dev); + struct phy_device *tbiphy; - err = startup_gfar(dev); - if (err) - return err; + if (!priv->tbi_node) { + dev_warn(&dev->dev, "error: SGMII mode requires that the " + "device tree specify a tbi-handle\n"); + return; + } - return err; + tbiphy = of_phy_find_device(priv->tbi_node); + if (!tbiphy) { + dev_err(&dev->dev, "error: Could not get TBI device\n"); + return; + } + + /* If the link is already up, we must already be ok, and don't need to + * configure and reset the TBI<->SerDes link. Maybe U-Boot configured + * everything for us? Resetting it takes the link down and requires + * several seconds for it to come back. + */ + if (phy_read(tbiphy, MII_BMSR) & BMSR_LSTATUS) { + put_device(&tbiphy->mdio.dev); + return; + } + + /* Single clk mode, mii mode off(for serdes communication) */ + phy_write(tbiphy, MII_TBICON, TBICON_CLK_SELECT); + + phy_write(tbiphy, MII_ADVERTISE, + ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE | + ADVERTISE_1000XPSE_ASYM); + + phy_write(tbiphy, MII_BMCR, + BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX | + BMCR_SPEED1000); + + put_device(&tbiphy->mdio.dev); +} + +/* Initializes driver's PHY state, and attaches to the PHY. + * Returns 0 on success. + */ +static int init_phy(struct net_device *dev) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; + struct gfar_private *priv = netdev_priv(dev); + phy_interface_t interface = priv->interface; + struct phy_device *phydev; + struct ethtool_eee edata; + + linkmode_set_bit_array(phy_10_100_features_array, + ARRAY_SIZE(phy_10_100_features_array), + mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask); + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT) + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mask); + + priv->oldlink = 0; + priv->oldspeed = 0; + priv->oldduplex = -1; + + phydev = of_phy_connect(dev, priv->phy_node, &adjust_link, 0, + interface); + if (!phydev) { + dev_err(&dev->dev, "could not attach to PHY\n"); + return -ENODEV; + } + + if (interface == PHY_INTERFACE_MODE_SGMII) + gfar_configure_serdes(dev); + + /* Remove any features not supported by the controller */ + linkmode_and(phydev->supported, phydev->supported, mask); + linkmode_copy(phydev->advertising, phydev->supported); + + /* Add support for flow control */ + phy_support_asym_pause(phydev); + + /* disable EEE autoneg, EEE not supported by eTSEC */ + memset(&edata, 0, sizeof(struct ethtool_eee)); + phy_ethtool_set_eee(phydev, &edata); + + return 0; } static inline struct txfcb *gfar_add_fcb(struct sk_buff *skb) @@ -2583,22 +2039,6 @@ dma_map_err: return NETDEV_TX_OK; } -/* Stops the kernel queue, and halts the controller */ -static int gfar_close(struct net_device *dev) -{ - struct gfar_private *priv = netdev_priv(dev); - - cancel_work_sync(&priv->reset_task); - stop_gfar(dev); - - /* Disconnect from the PHY */ - phy_disconnect(dev->phydev); - - gfar_free_irq(priv); - - return 0; -} - /* Changes the mac address if the controller is not running. */ static int gfar_set_mac_address(struct net_device *dev) { @@ -2660,6 +2100,85 @@ static void gfar_timeout(struct net_device *dev) schedule_work(&priv->reset_task); } +static int gfar_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr) +{ + struct hwtstamp_config config; + struct gfar_private *priv = netdev_priv(netdev); + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* reserved for future extensions */ + if (config.flags) + return -EINVAL; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + priv->hwts_tx_en = 0; + break; + case HWTSTAMP_TX_ON: + if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)) + return -ERANGE; + priv->hwts_tx_en = 1; + break; + default: + return -ERANGE; + } + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + if (priv->hwts_rx_en) { + priv->hwts_rx_en = 0; + reset_gfar(netdev); + } + break; + default: + if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)) + return -ERANGE; + if (!priv->hwts_rx_en) { + priv->hwts_rx_en = 1; + reset_gfar(netdev); + } + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + } + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +static int gfar_hwtstamp_get(struct net_device *netdev, struct ifreq *ifr) +{ + struct hwtstamp_config config; + struct gfar_private *priv = netdev_priv(netdev); + + config.flags = 0; + config.tx_type = priv->hwts_tx_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + config.rx_filter = (priv->hwts_rx_en ? + HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct phy_device *phydev = dev->phydev; + + if (!netif_running(dev)) + return -EINVAL; + + if (cmd == SIOCSHWTSTAMP) + return gfar_hwtstamp_set(dev, rq); + if (cmd == SIOCGHWTSTAMP) + return gfar_hwtstamp_get(dev, rq); + + if (!phydev) + return -ENODEV; + + return phy_mii_ioctl(phydev, rq, cmd); +} + /* Interrupt Handler for Transmit complete */ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) { @@ -2767,77 +2286,6 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) netdev_tx_completed_queue(txq, howmany, bytes_sent); } -static bool gfar_new_page(struct gfar_priv_rx_q *rxq, struct gfar_rx_buff *rxb) -{ - struct page *page; - dma_addr_t addr; - - page = dev_alloc_page(); - if (unlikely(!page)) - return false; - - addr = dma_map_page(rxq->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(rxq->dev, addr))) { - __free_page(page); - - return false; - } - - rxb->dma = addr; - rxb->page = page; - rxb->page_offset = 0; - - return true; -} - -static void gfar_rx_alloc_err(struct gfar_priv_rx_q *rx_queue) -{ - struct gfar_private *priv = netdev_priv(rx_queue->ndev); - struct gfar_extra_stats *estats = &priv->extra_stats; - - netdev_err(rx_queue->ndev, "Can't alloc RX buffers\n"); - atomic64_inc(&estats->rx_alloc_err); -} - -static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue, - int alloc_cnt) -{ - struct rxbd8 *bdp; - struct gfar_rx_buff *rxb; - int i; - - i = rx_queue->next_to_use; - bdp = &rx_queue->rx_bd_base[i]; - rxb = &rx_queue->rx_buff[i]; - - while (alloc_cnt--) { - /* try reuse page */ - if (unlikely(!rxb->page)) { - if (unlikely(!gfar_new_page(rx_queue, rxb))) { - gfar_rx_alloc_err(rx_queue); - break; - } - } - - /* Setup the new RxBD */ - gfar_init_rxbdp(rx_queue, bdp, - rxb->dma + rxb->page_offset + RXBUF_ALIGNMENT); - - /* Update to the next pointer */ - bdp++; - rxb++; - - if (unlikely(++i == rx_queue->rx_ring_size)) { - i = 0; - bdp = rx_queue->rx_bd_base; - rxb = rx_queue->rx_buff; - } - } - - rx_queue->next_to_use = i; - rx_queue->next_to_alloc = i; -} - static void count_errors(u32 lstatus, struct net_device *ndev) { struct gfar_private *priv = netdev_priv(ndev); @@ -2875,7 +2323,7 @@ static void count_errors(u32 lstatus, struct net_device *ndev) } } -irqreturn_t gfar_receive(int irq, void *grp_id) +static irqreturn_t gfar_receive(int irq, void *grp_id) { struct gfar_priv_grp *grp = (struct gfar_priv_grp *)grp_id; unsigned long flags; @@ -3077,7 +2525,8 @@ static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb) * until the budget/quota has been reached. Returns the number * of frames handled */ -int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit) +static int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, + int rx_work_limit) { struct net_device *ndev = rx_queue->ndev; struct gfar_private *priv = netdev_priv(ndev); @@ -3327,6 +2776,98 @@ static int gfar_poll_tx(struct napi_struct *napi, int budget) return 0; } +/* GFAR error interrupt handler */ +static irqreturn_t gfar_error(int irq, void *grp_id) +{ + struct gfar_priv_grp *gfargrp = grp_id; + struct gfar __iomem *regs = gfargrp->regs; + struct gfar_private *priv= gfargrp->priv; + struct net_device *dev = priv->ndev; + + /* Save ievent for future reference */ + u32 events = gfar_read(®s->ievent); + + /* Clear IEVENT */ + gfar_write(®s->ievent, events & IEVENT_ERR_MASK); + + /* Magic Packet is not an error. */ + if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) && + (events & IEVENT_MAG)) + events &= ~IEVENT_MAG; + + /* Hmm... */ + if (netif_msg_rx_err(priv) || netif_msg_tx_err(priv)) + netdev_dbg(dev, + "error interrupt (ievent=0x%08x imask=0x%08x)\n", + events, gfar_read(®s->imask)); + + /* Update the error counters */ + if (events & IEVENT_TXE) { + dev->stats.tx_errors++; + + if (events & IEVENT_LC) + dev->stats.tx_window_errors++; + if (events & IEVENT_CRL) + dev->stats.tx_aborted_errors++; + if (events & IEVENT_XFUN) { + netif_dbg(priv, tx_err, dev, + "TX FIFO underrun, packet dropped\n"); + dev->stats.tx_dropped++; + atomic64_inc(&priv->extra_stats.tx_underrun); + + schedule_work(&priv->reset_task); + } + netif_dbg(priv, tx_err, dev, "Transmit Error\n"); + } + if (events & IEVENT_BSY) { + dev->stats.rx_over_errors++; + atomic64_inc(&priv->extra_stats.rx_bsy); + + netif_dbg(priv, rx_err, dev, "busy error (rstat: %x)\n", + gfar_read(®s->rstat)); + } + if (events & IEVENT_BABR) { + dev->stats.rx_errors++; + atomic64_inc(&priv->extra_stats.rx_babr); + + netif_dbg(priv, rx_err, dev, "babbling RX error\n"); + } + if (events & IEVENT_EBERR) { + atomic64_inc(&priv->extra_stats.eberr); + netif_dbg(priv, rx_err, dev, "bus error\n"); + } + if (events & IEVENT_RXC) + netif_dbg(priv, rx_status, dev, "control frame\n"); + + if (events & IEVENT_BABT) { + atomic64_inc(&priv->extra_stats.tx_babt); + netif_dbg(priv, tx_err, dev, "babbling TX error\n"); + } + return IRQ_HANDLED; +} + +/* The interrupt handler for devices with one interrupt */ +static irqreturn_t gfar_interrupt(int irq, void *grp_id) +{ + struct gfar_priv_grp *gfargrp = grp_id; + + /* Save ievent for future reference */ + u32 events = gfar_read(&gfargrp->regs->ievent); + + /* Check for reception */ + if (events & IEVENT_RX_MASK) + gfar_receive(irq, grp_id); + + /* Check for transmit completion */ + if (events & IEVENT_TX_MASK) + gfar_transmit(irq, grp_id); + + /* Check for errors */ + if (events & IEVENT_ERR_MASK) + gfar_error(irq, grp_id); + + return IRQ_HANDLED; +} #ifdef CONFIG_NET_POLL_CONTROLLER /* Polling 'interrupt' - used by things like netconsole to send skbs @@ -3363,44 +2904,154 @@ static void gfar_netpoll(struct net_device *dev) } #endif -/* The interrupt handler for devices with one interrupt */ -static irqreturn_t gfar_interrupt(int irq, void *grp_id) +static void free_grp_irqs(struct gfar_priv_grp *grp) { - struct gfar_priv_grp *gfargrp = grp_id; + free_irq(gfar_irq(grp, TX)->irq, grp); + free_irq(gfar_irq(grp, RX)->irq, grp); + free_irq(gfar_irq(grp, ER)->irq, grp); +} - /* Save ievent for future reference */ - u32 events = gfar_read(&gfargrp->regs->ievent); +static int register_grp_irqs(struct gfar_priv_grp *grp) +{ + struct gfar_private *priv = grp->priv; + struct net_device *dev = priv->ndev; + int err; - /* Check for reception */ - if (events & IEVENT_RX_MASK) - gfar_receive(irq, grp_id); + /* If the device has multiple interrupts, register for + * them. Otherwise, only register for the one + */ + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { + /* Install our interrupt handlers for Error, + * Transmit, and Receive + */ + err = request_irq(gfar_irq(grp, ER)->irq, gfar_error, 0, + gfar_irq(grp, ER)->name, grp); + if (err < 0) { + netif_err(priv, intr, dev, "Can't get IRQ %d\n", + gfar_irq(grp, ER)->irq); - /* Check for transmit completion */ - if (events & IEVENT_TX_MASK) - gfar_transmit(irq, grp_id); + goto err_irq_fail; + } + enable_irq_wake(gfar_irq(grp, ER)->irq); - /* Check for errors */ - if (events & IEVENT_ERR_MASK) - gfar_error(irq, grp_id); + err = request_irq(gfar_irq(grp, TX)->irq, gfar_transmit, 0, + gfar_irq(grp, TX)->name, grp); + if (err < 0) { + netif_err(priv, intr, dev, "Can't get IRQ %d\n", + gfar_irq(grp, TX)->irq); + goto tx_irq_fail; + } + err = request_irq(gfar_irq(grp, RX)->irq, gfar_receive, 0, + gfar_irq(grp, RX)->name, grp); + if (err < 0) { + netif_err(priv, intr, dev, "Can't get IRQ %d\n", + gfar_irq(grp, RX)->irq); + goto rx_irq_fail; + } + enable_irq_wake(gfar_irq(grp, RX)->irq); + + } else { + err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, 0, + gfar_irq(grp, TX)->name, grp); + if (err < 0) { + netif_err(priv, intr, dev, "Can't get IRQ %d\n", + gfar_irq(grp, TX)->irq); + goto err_irq_fail; + } + enable_irq_wake(gfar_irq(grp, TX)->irq); + } + + return 0; + +rx_irq_fail: + free_irq(gfar_irq(grp, TX)->irq, grp); +tx_irq_fail: + free_irq(gfar_irq(grp, ER)->irq, grp); +err_irq_fail: + return err; - return IRQ_HANDLED; } -/* Called every time the controller might need to be made - * aware of new link state. The PHY code conveys this - * information through variables in the phydev structure, and this - * function converts those variables into the appropriate - * register values, and can bring down the device if needed. +static void gfar_free_irq(struct gfar_private *priv) +{ + int i; + + /* Free the IRQs */ + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { + for (i = 0; i < priv->num_grps; i++) + free_grp_irqs(&priv->gfargrp[i]); + } else { + for (i = 0; i < priv->num_grps; i++) + free_irq(gfar_irq(&priv->gfargrp[i], TX)->irq, + &priv->gfargrp[i]); + } +} + +static int gfar_request_irq(struct gfar_private *priv) +{ + int err, i, j; + + for (i = 0; i < priv->num_grps; i++) { + err = register_grp_irqs(&priv->gfargrp[i]); + if (err) { + for (j = 0; j < i; j++) + free_grp_irqs(&priv->gfargrp[j]); + return err; + } + } + + return 0; +} + +/* Called when something needs to use the ethernet device + * Returns 0 for success. */ -static void adjust_link(struct net_device *dev) +static int gfar_enet_open(struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; + int err; - if (unlikely(phydev->link != priv->oldlink || - (phydev->link && (phydev->duplex != priv->oldduplex || - phydev->speed != priv->oldspeed)))) - gfar_update_link_state(priv); + err = init_phy(dev); + if (err) + return err; + + err = gfar_request_irq(priv); + if (err) + return err; + + err = startup_gfar(dev); + if (err) + return err; + + return err; +} + +/* Stops the kernel queue, and halts the controller */ +static int gfar_close(struct net_device *dev) +{ + struct gfar_private *priv = netdev_priv(dev); + + cancel_work_sync(&priv->reset_task); + stop_gfar(dev); + + /* Disconnect from the PHY */ + phy_disconnect(dev->phydev); + + gfar_free_irq(priv); + + return 0; +} + +/* Clears each of the exact match registers to zero, so they + * don't interfere with normal reception + */ +static void gfar_clear_exact_match(struct net_device *dev) +{ + int idx; + static const u8 zero_arr[ETH_ALEN] = {0, 0, 0, 0, 0, 0}; + + for (idx = 1; idx < GFAR_EM_NUM + 1; idx++) + gfar_set_mac_for_addr(dev, idx, zero_arr); } /* Update the hash table based on the current list of multicast @@ -3494,274 +3145,582 @@ static void gfar_set_multi(struct net_device *dev) } } - -/* Clears each of the exact match registers to zero, so they - * don't interfere with normal reception - */ -static void gfar_clear_exact_match(struct net_device *dev) +void gfar_mac_reset(struct gfar_private *priv) { - int idx; - static const u8 zero_arr[ETH_ALEN] = {0, 0, 0, 0, 0, 0}; + struct gfar __iomem *regs = priv->gfargrp[0].regs; + u32 tempval; - for (idx = 1; idx < GFAR_EM_NUM + 1; idx++) - gfar_set_mac_for_addr(dev, idx, zero_arr); -} + /* Reset MAC layer */ + gfar_write(®s->maccfg1, MACCFG1_SOFT_RESET); -/* Set the appropriate hash bit for the given addr */ -/* The algorithm works like so: - * 1) Take the Destination Address (ie the multicast address), and - * do a CRC on it (little endian), and reverse the bits of the - * result. - * 2) Use the 8 most significant bits as a hash into a 256-entry - * table. The table is controlled through 8 32-bit registers: - * gaddr0-7. gaddr0's MSB is entry 0, and gaddr7's LSB is - * gaddr7. This means that the 3 most significant bits in the - * hash index which gaddr register to use, and the 5 other bits - * indicate which bit (assuming an IBM numbering scheme, which - * for PowerPC (tm) is usually the case) in the register holds - * the entry. - */ -static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr) -{ - u32 tempval; - struct gfar_private *priv = netdev_priv(dev); - u32 result = ether_crc(ETH_ALEN, addr); - int width = priv->hash_width; - u8 whichbit = (result >> (32 - width)) & 0x1f; - u8 whichreg = result >> (32 - width + 5); - u32 value = (1 << (31-whichbit)); + /* We need to delay at least 3 TX clocks */ + udelay(3); - tempval = gfar_read(priv->hash_regs[whichreg]); - tempval |= value; - gfar_write(priv->hash_regs[whichreg], tempval); -} + /* the soft reset bit is not self-resetting, so we need to + * clear it before resuming normal operation + */ + gfar_write(®s->maccfg1, 0); + udelay(3); -/* There are multiple MAC Address register pairs on some controllers - * This function sets the numth pair to a given address - */ -static void gfar_set_mac_for_addr(struct net_device *dev, int num, - const u8 *addr) + gfar_rx_offload_en(priv); + + /* Initialize the max receive frame/buffer lengths */ + gfar_write(®s->maxfrm, GFAR_JUMBO_FRAME_SIZE); + gfar_write(®s->mrblr, GFAR_RXB_SIZE); + + /* Initialize the Minimum Frame Length Register */ + gfar_write(®s->minflr, MINFLR_INIT_SETTINGS); + + /* Initialize MACCFG2. */ + tempval = MACCFG2_INIT_SETTINGS; + + /* eTSEC74 erratum: Rx frames of length MAXFRM or MAXFRM-1 + * are marked as truncated. Avoid this by MACCFG2[Huge Frame]=1, + * and by checking RxBD[LG] and discarding larger than MAXFRM. + */ + if (gfar_has_errata(priv, GFAR_ERRATA_74)) + tempval |= MACCFG2_HUGEFRAME | MACCFG2_LENGTHCHECK; + + gfar_write(®s->maccfg2, tempval); + + /* Clear mac addr hash registers */ + gfar_write(®s->igaddr0, 0); + gfar_write(®s->igaddr1, 0); + gfar_write(®s->igaddr2, 0); + gfar_write(®s->igaddr3, 0); + gfar_write(®s->igaddr4, 0); + gfar_write(®s->igaddr5, 0); + gfar_write(®s->igaddr6, 0); + gfar_write(®s->igaddr7, 0); + + gfar_write(®s->gaddr0, 0); + gfar_write(®s->gaddr1, 0); + gfar_write(®s->gaddr2, 0); + gfar_write(®s->gaddr3, 0); + gfar_write(®s->gaddr4, 0); + gfar_write(®s->gaddr5, 0); + gfar_write(®s->gaddr6, 0); + gfar_write(®s->gaddr7, 0); + + if (priv->extended_hash) + gfar_clear_exact_match(priv->ndev); + + gfar_mac_rx_config(priv); + + gfar_mac_tx_config(priv); + + gfar_set_mac_address(priv->ndev); + + gfar_set_multi(priv->ndev); + + /* clear ievent and imask before configuring coalescing */ + gfar_ints_disable(priv); + + /* Configure the coalescing support */ + gfar_configure_coalescing_all(priv); +} + +static void gfar_hw_init(struct gfar_private *priv) { - struct gfar_private *priv = netdev_priv(dev); struct gfar __iomem *regs = priv->gfargrp[0].regs; - u32 tempval; - u32 __iomem *macptr = ®s->macstnaddr1; + u32 attrs; - macptr += num*2; + /* Stop the DMA engine now, in case it was running before + * (The firmware could have used it, and left it running). + */ + gfar_halt(priv); - /* For a station address of 0x12345678ABCD in transmission - * order (BE), MACnADDR1 is set to 0xCDAB7856 and - * MACnADDR2 is set to 0x34120000. + gfar_mac_reset(priv); + + /* Zero out the rmon mib registers if it has them */ + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) { + memset_io(&(regs->rmon), 0, sizeof(struct rmon_mib)); + + /* Mask off the CAM interrupts */ + gfar_write(®s->rmon.cam1, 0xffffffff); + gfar_write(®s->rmon.cam2, 0xffffffff); + } + + /* Initialize ECNTRL */ + gfar_write(®s->ecntrl, ECNTRL_INIT_SETTINGS); + + /* Set the extraction length and index */ + attrs = ATTRELI_EL(priv->rx_stash_size) | + ATTRELI_EI(priv->rx_stash_index); + + gfar_write(®s->attreli, attrs); + + /* Start with defaults, and add stashing + * depending on driver parameters */ - tempval = (addr[5] << 24) | (addr[4] << 16) | - (addr[3] << 8) | addr[2]; + attrs = ATTR_INIT_SETTINGS; - gfar_write(macptr, tempval); + if (priv->bd_stash_en) + attrs |= ATTR_BDSTASH; - tempval = (addr[1] << 24) | (addr[0] << 16); + if (priv->rx_stash_size != 0) + attrs |= ATTR_BUFSTASH; - gfar_write(macptr+1, tempval); + gfar_write(®s->attr, attrs); + + /* FIFO configs */ + gfar_write(®s->fifo_tx_thr, DEFAULT_FIFO_TX_THR); + gfar_write(®s->fifo_tx_starve, DEFAULT_FIFO_TX_STARVE); + gfar_write(®s->fifo_tx_starve_shutoff, DEFAULT_FIFO_TX_STARVE_OFF); + + /* Program the interrupt steering regs, only for MG devices */ + if (priv->num_grps > 1) + gfar_write_isrg(priv); } -/* GFAR error interrupt handler */ -static irqreturn_t gfar_error(int irq, void *grp_id) +static const struct net_device_ops gfar_netdev_ops = { + .ndo_open = gfar_enet_open, + .ndo_start_xmit = gfar_start_xmit, + .ndo_stop = gfar_close, + .ndo_change_mtu = gfar_change_mtu, + .ndo_set_features = gfar_set_features, + .ndo_set_rx_mode = gfar_set_multi, + .ndo_tx_timeout = gfar_timeout, + .ndo_do_ioctl = gfar_ioctl, + .ndo_get_stats = gfar_get_stats, + .ndo_change_carrier = fixed_phy_change_carrier, + .ndo_set_mac_address = gfar_set_mac_addr, + .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = gfar_netpoll, +#endif +}; + +/* Set up the ethernet device structure, private data, + * and anything else we need before we start + */ +static int gfar_probe(struct platform_device *ofdev) { - struct gfar_priv_grp *gfargrp = grp_id; - struct gfar __iomem *regs = gfargrp->regs; - struct gfar_private *priv= gfargrp->priv; - struct net_device *dev = priv->ndev; + struct device_node *np = ofdev->dev.of_node; + struct net_device *dev = NULL; + struct gfar_private *priv = NULL; + int err = 0, i; - /* Save ievent for future reference */ - u32 events = gfar_read(®s->ievent); + err = gfar_of_init(ofdev, &dev); - /* Clear IEVENT */ - gfar_write(®s->ievent, events & IEVENT_ERR_MASK); + if (err) + return err; - /* Magic Packet is not an error. */ - if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) && - (events & IEVENT_MAG)) - events &= ~IEVENT_MAG; + priv = netdev_priv(dev); + priv->ndev = dev; + priv->ofdev = ofdev; + priv->dev = &ofdev->dev; + SET_NETDEV_DEV(dev, &ofdev->dev); - /* Hmm... */ - if (netif_msg_rx_err(priv) || netif_msg_tx_err(priv)) - netdev_dbg(dev, - "error interrupt (ievent=0x%08x imask=0x%08x)\n", - events, gfar_read(®s->imask)); + INIT_WORK(&priv->reset_task, gfar_reset_task); - /* Update the error counters */ - if (events & IEVENT_TXE) { - dev->stats.tx_errors++; + platform_set_drvdata(ofdev, priv); - if (events & IEVENT_LC) - dev->stats.tx_window_errors++; - if (events & IEVENT_CRL) - dev->stats.tx_aborted_errors++; - if (events & IEVENT_XFUN) { - netif_dbg(priv, tx_err, dev, - "TX FIFO underrun, packet dropped\n"); - dev->stats.tx_dropped++; - atomic64_inc(&priv->extra_stats.tx_underrun); + gfar_detect_errata(priv); - schedule_work(&priv->reset_task); + /* Set the dev->base_addr to the gfar reg region */ + dev->base_addr = (unsigned long) priv->gfargrp[0].regs; + + /* Fill in the dev structure */ + dev->watchdog_timeo = TX_TIMEOUT; + /* MTU range: 50 - 9586 */ + dev->mtu = 1500; + dev->min_mtu = 50; + dev->max_mtu = GFAR_JUMBO_FRAME_SIZE - ETH_HLEN; + dev->netdev_ops = &gfar_netdev_ops; + dev->ethtool_ops = &gfar_ethtool_ops; + + /* Register for napi ...We are registering NAPI for each grp */ + for (i = 0; i < priv->num_grps; i++) { + if (priv->poll_mode == GFAR_SQ_POLLING) { + netif_napi_add(dev, &priv->gfargrp[i].napi_rx, + gfar_poll_rx_sq, GFAR_DEV_WEIGHT); + netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, + gfar_poll_tx_sq, 2); + } else { + netif_napi_add(dev, &priv->gfargrp[i].napi_rx, + gfar_poll_rx, GFAR_DEV_WEIGHT); + netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, + gfar_poll_tx, 2); } - netif_dbg(priv, tx_err, dev, "Transmit Error\n"); } - if (events & IEVENT_BSY) { - dev->stats.rx_over_errors++; - atomic64_inc(&priv->extra_stats.rx_bsy); - netif_dbg(priv, rx_err, dev, "busy error (rstat: %x)\n", - gfar_read(®s->rstat)); + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) { + dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | + NETIF_F_RXCSUM; + dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG | + NETIF_F_RXCSUM | NETIF_F_HIGHDMA; } - if (events & IEVENT_BABR) { - dev->stats.rx_errors++; - atomic64_inc(&priv->extra_stats.rx_babr); - netif_dbg(priv, rx_err, dev, "babbling RX error\n"); + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) { + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + dev->features |= NETIF_F_HW_VLAN_CTAG_RX; } - if (events & IEVENT_EBERR) { - atomic64_inc(&priv->extra_stats.eberr); - netif_dbg(priv, rx_err, dev, "bus error\n"); + + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + + gfar_init_addr_hash_table(priv); + + /* Insert receive time stamps into padding alignment bytes, and + * plus 2 bytes padding to ensure the cpu alignment. + */ + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER) + priv->padding = 8 + DEFAULT_PADDING; + + if (dev->features & NETIF_F_IP_CSUM || + priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER) + dev->needed_headroom = GMAC_FCB_LEN; + + /* Initializing some of the rx/tx queue level parameters */ + for (i = 0; i < priv->num_tx_queues; i++) { + priv->tx_queue[i]->tx_ring_size = DEFAULT_TX_RING_SIZE; + priv->tx_queue[i]->num_txbdfree = DEFAULT_TX_RING_SIZE; + priv->tx_queue[i]->txcoalescing = DEFAULT_TX_COALESCE; + priv->tx_queue[i]->txic = DEFAULT_TXIC; } - if (events & IEVENT_RXC) - netif_dbg(priv, rx_status, dev, "control frame\n"); - if (events & IEVENT_BABT) { - atomic64_inc(&priv->extra_stats.tx_babt); - netif_dbg(priv, tx_err, dev, "babbling TX error\n"); + for (i = 0; i < priv->num_rx_queues; i++) { + priv->rx_queue[i]->rx_ring_size = DEFAULT_RX_RING_SIZE; + priv->rx_queue[i]->rxcoalescing = DEFAULT_RX_COALESCE; + priv->rx_queue[i]->rxic = DEFAULT_RXIC; } - return IRQ_HANDLED; + + /* Always enable rx filer if available */ + priv->rx_filer_enable = + (priv->device_flags & FSL_GIANFAR_DEV_HAS_RX_FILER) ? 1 : 0; + /* Enable most messages by default */ + priv->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1; + /* use pritority h/w tx queue scheduling for single queue devices */ + if (priv->num_tx_queues == 1) + priv->prio_sched_en = 1; + + set_bit(GFAR_DOWN, &priv->state); + + gfar_hw_init(priv); + + /* Carrier starts down, phylib will bring it up */ + netif_carrier_off(dev); + + err = register_netdev(dev); + + if (err) { + pr_err("%s: Cannot register net device, aborting\n", dev->name); + goto register_fail; + } + + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) + priv->wol_supported |= GFAR_WOL_MAGIC; + + if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER) && + priv->rx_filer_enable) + priv->wol_supported |= GFAR_WOL_FILER_UCAST; + + device_set_wakeup_capable(&ofdev->dev, priv->wol_supported); + + /* fill out IRQ number and name fields */ + for (i = 0; i < priv->num_grps; i++) { + struct gfar_priv_grp *grp = &priv->gfargrp[i]; + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { + sprintf(gfar_irq(grp, TX)->name, "%s%s%c%s", + dev->name, "_g", '0' + i, "_tx"); + sprintf(gfar_irq(grp, RX)->name, "%s%s%c%s", + dev->name, "_g", '0' + i, "_rx"); + sprintf(gfar_irq(grp, ER)->name, "%s%s%c%s", + dev->name, "_g", '0' + i, "_er"); + } else + strcpy(gfar_irq(grp, TX)->name, dev->name); + } + + /* Initialize the filer table */ + gfar_init_filer_table(priv); + + /* Print out the device info */ + netdev_info(dev, "mac: %pM\n", dev->dev_addr); + + /* Even more device info helps when determining which kernel + * provided which set of benchmarks. + */ + netdev_info(dev, "Running with NAPI enabled\n"); + for (i = 0; i < priv->num_rx_queues; i++) + netdev_info(dev, "RX BD ring size for Q[%d]: %d\n", + i, priv->rx_queue[i]->rx_ring_size); + for (i = 0; i < priv->num_tx_queues; i++) + netdev_info(dev, "TX BD ring size for Q[%d]: %d\n", + i, priv->tx_queue[i]->tx_ring_size); + + return 0; + +register_fail: + if (of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); + unmap_group_regs(priv); + gfar_free_rx_queues(priv); + gfar_free_tx_queues(priv); + of_node_put(priv->phy_node); + of_node_put(priv->tbi_node); + free_gfar_dev(priv); + return err; } -static u32 gfar_get_flowctrl_cfg(struct gfar_private *priv) +static int gfar_remove(struct platform_device *ofdev) { - struct net_device *ndev = priv->ndev; - struct phy_device *phydev = ndev->phydev; - u32 val = 0; + struct gfar_private *priv = platform_get_drvdata(ofdev); + struct device_node *np = ofdev->dev.of_node; - if (!phydev->duplex) - return val; + of_node_put(priv->phy_node); + of_node_put(priv->tbi_node); - if (!priv->pause_aneg_en) { - if (priv->tx_pause_en) - val |= MACCFG1_TX_FLOW; - if (priv->rx_pause_en) - val |= MACCFG1_RX_FLOW; - } else { - u16 lcl_adv, rmt_adv; - u8 flowctrl; - /* get link partner capabilities */ - rmt_adv = 0; - if (phydev->pause) - rmt_adv = LPA_PAUSE_CAP; - if (phydev->asym_pause) - rmt_adv |= LPA_PAUSE_ASYM; + unregister_netdev(priv->ndev); - lcl_adv = linkmode_adv_to_lcl_adv_t(phydev->advertising); - flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); - if (flowctrl & FLOW_CTRL_TX) - val |= MACCFG1_TX_FLOW; - if (flowctrl & FLOW_CTRL_RX) - val |= MACCFG1_RX_FLOW; + if (of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); + + unmap_group_regs(priv); + gfar_free_rx_queues(priv); + gfar_free_tx_queues(priv); + free_gfar_dev(priv); + + return 0; +} + +#ifdef CONFIG_PM + +static void __gfar_filer_disable(struct gfar_private *priv) +{ + struct gfar __iomem *regs = priv->gfargrp[0].regs; + u32 temp; + + temp = gfar_read(®s->rctrl); + temp &= ~(RCTRL_FILREN | RCTRL_PRSDEP_INIT); + gfar_write(®s->rctrl, temp); +} + +static void __gfar_filer_enable(struct gfar_private *priv) +{ + struct gfar __iomem *regs = priv->gfargrp[0].regs; + u32 temp; + + temp = gfar_read(®s->rctrl); + temp |= RCTRL_FILREN | RCTRL_PRSDEP_INIT; + gfar_write(®s->rctrl, temp); +} + +/* Filer rules implementing wol capabilities */ +static void gfar_filer_config_wol(struct gfar_private *priv) +{ + unsigned int i; + u32 rqfcr; + + __gfar_filer_disable(priv); + + /* clear the filer table, reject any packet by default */ + rqfcr = RQFCR_RJE | RQFCR_CMP_MATCH; + for (i = 0; i <= MAX_FILER_IDX; i++) + gfar_write_filer(priv, i, rqfcr, 0); + + i = 0; + if (priv->wol_opts & GFAR_WOL_FILER_UCAST) { + /* unicast packet, accept it */ + struct net_device *ndev = priv->ndev; + /* get the default rx queue index */ + u8 qindex = (u8)priv->gfargrp[0].rx_queue->qindex; + u32 dest_mac_addr = (ndev->dev_addr[0] << 16) | + (ndev->dev_addr[1] << 8) | + ndev->dev_addr[2]; + + rqfcr = (qindex << 10) | RQFCR_AND | + RQFCR_CMP_EXACT | RQFCR_PID_DAH; + + gfar_write_filer(priv, i++, rqfcr, dest_mac_addr); + + dest_mac_addr = (ndev->dev_addr[3] << 16) | + (ndev->dev_addr[4] << 8) | + ndev->dev_addr[5]; + rqfcr = (qindex << 10) | RQFCR_GPI | + RQFCR_CMP_EXACT | RQFCR_PID_DAL; + gfar_write_filer(priv, i++, rqfcr, dest_mac_addr); } - return val; + __gfar_filer_enable(priv); } -static noinline void gfar_update_link_state(struct gfar_private *priv) +static void gfar_filer_restore_table(struct gfar_private *priv) +{ + u32 rqfcr, rqfpr; + unsigned int i; + + __gfar_filer_disable(priv); + + for (i = 0; i <= MAX_FILER_IDX; i++) { + rqfcr = priv->ftp_rqfcr[i]; + rqfpr = priv->ftp_rqfpr[i]; + gfar_write_filer(priv, i, rqfcr, rqfpr); + } + + __gfar_filer_enable(priv); +} + +/* gfar_start() for Rx only and with the FGPI filer interrupt enabled */ +static void gfar_start_wol_filer(struct gfar_private *priv) { struct gfar __iomem *regs = priv->gfargrp[0].regs; - struct net_device *ndev = priv->ndev; - struct phy_device *phydev = ndev->phydev; - struct gfar_priv_rx_q *rx_queue = NULL; - int i; + u32 tempval; + int i = 0; - if (unlikely(test_bit(GFAR_RESETTING, &priv->state))) - return; + /* Enable Rx hw queues */ + gfar_write(®s->rqueue, priv->rqueue); - if (phydev->link) { - u32 tempval1 = gfar_read(®s->maccfg1); - u32 tempval = gfar_read(®s->maccfg2); - u32 ecntrl = gfar_read(®s->ecntrl); - u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW); + /* Initialize DMACTRL to have WWR and WOP */ + tempval = gfar_read(®s->dmactrl); + tempval |= DMACTRL_INIT_SETTINGS; + gfar_write(®s->dmactrl, tempval); - if (phydev->duplex != priv->oldduplex) { - if (!(phydev->duplex)) - tempval &= ~(MACCFG2_FULL_DUPLEX); - else - tempval |= MACCFG2_FULL_DUPLEX; + /* Make sure we aren't stopped */ + tempval = gfar_read(®s->dmactrl); + tempval &= ~DMACTRL_GRS; + gfar_write(®s->dmactrl, tempval); - priv->oldduplex = phydev->duplex; - } + for (i = 0; i < priv->num_grps; i++) { + regs = priv->gfargrp[i].regs; + /* Clear RHLT, so that the DMA starts polling now */ + gfar_write(®s->rstat, priv->gfargrp[i].rstat); + /* enable the Filer General Purpose Interrupt */ + gfar_write(®s->imask, IMASK_FGPI); + } - if (phydev->speed != priv->oldspeed) { - switch (phydev->speed) { - case 1000: - tempval = - ((tempval & ~(MACCFG2_IF)) | MACCFG2_GMII); + /* Enable Rx DMA */ + tempval = gfar_read(®s->maccfg1); + tempval |= MACCFG1_RX_EN; + gfar_write(®s->maccfg1, tempval); +} - ecntrl &= ~(ECNTRL_R100); - break; - case 100: - case 10: - tempval = - ((tempval & ~(MACCFG2_IF)) | MACCFG2_MII); +static int gfar_suspend(struct device *dev) +{ + struct gfar_private *priv = dev_get_drvdata(dev); + struct net_device *ndev = priv->ndev; + struct gfar __iomem *regs = priv->gfargrp[0].regs; + u32 tempval; + u16 wol = priv->wol_opts; - /* Reduced mode distinguishes - * between 10 and 100 - */ - if (phydev->speed == SPEED_100) - ecntrl |= ECNTRL_R100; - else - ecntrl &= ~(ECNTRL_R100); - break; - default: - netif_warn(priv, link, priv->ndev, - "Ack! Speed (%d) is not 10/100/1000!\n", - phydev->speed); - break; - } + if (!netif_running(ndev)) + return 0; - priv->oldspeed = phydev->speed; - } + disable_napi(priv); + netif_tx_lock(ndev); + netif_device_detach(ndev); + netif_tx_unlock(ndev); - tempval1 &= ~(MACCFG1_TX_FLOW | MACCFG1_RX_FLOW); - tempval1 |= gfar_get_flowctrl_cfg(priv); + gfar_halt(priv); - /* Turn last free buffer recording on */ - if ((tempval1 & MACCFG1_TX_FLOW) && !tx_flow_oldval) { - for (i = 0; i < priv->num_rx_queues; i++) { - u32 bdp_dma; + if (wol & GFAR_WOL_MAGIC) { + /* Enable interrupt on Magic Packet */ + gfar_write(®s->imask, IMASK_MAG); - rx_queue = priv->rx_queue[i]; - bdp_dma = gfar_rxbd_dma_lastfree(rx_queue); - gfar_write(rx_queue->rfbptr, bdp_dma); - } + /* Enable Magic Packet mode */ + tempval = gfar_read(®s->maccfg2); + tempval |= MACCFG2_MPEN; + gfar_write(®s->maccfg2, tempval); - priv->tx_actual_en = 1; - } + /* re-enable the Rx block */ + tempval = gfar_read(®s->maccfg1); + tempval |= MACCFG1_RX_EN; + gfar_write(®s->maccfg1, tempval); - if (unlikely(!(tempval1 & MACCFG1_TX_FLOW) && tx_flow_oldval)) - priv->tx_actual_en = 0; + } else if (wol & GFAR_WOL_FILER_UCAST) { + gfar_filer_config_wol(priv); + gfar_start_wol_filer(priv); - gfar_write(®s->maccfg1, tempval1); + } else { + phy_stop(ndev->phydev); + } + + return 0; +} + +static int gfar_resume(struct device *dev) +{ + struct gfar_private *priv = dev_get_drvdata(dev); + struct net_device *ndev = priv->ndev; + struct gfar __iomem *regs = priv->gfargrp[0].regs; + u32 tempval; + u16 wol = priv->wol_opts; + + if (!netif_running(ndev)) + return 0; + + if (wol & GFAR_WOL_MAGIC) { + /* Disable Magic Packet mode */ + tempval = gfar_read(®s->maccfg2); + tempval &= ~MACCFG2_MPEN; gfar_write(®s->maccfg2, tempval); - gfar_write(®s->ecntrl, ecntrl); - if (!priv->oldlink) - priv->oldlink = 1; + } else if (wol & GFAR_WOL_FILER_UCAST) { + /* need to stop rx only, tx is already down */ + gfar_halt(priv); + gfar_filer_restore_table(priv); - } else if (priv->oldlink) { - priv->oldlink = 0; - priv->oldspeed = 0; - priv->oldduplex = -1; + } else { + phy_start(ndev->phydev); } - if (netif_msg_link(priv)) - phy_print_status(phydev); + gfar_start(priv); + + netif_device_attach(ndev); + enable_napi(priv); + + return 0; +} + +static int gfar_restore(struct device *dev) +{ + struct gfar_private *priv = dev_get_drvdata(dev); + struct net_device *ndev = priv->ndev; + + if (!netif_running(ndev)) { + netif_device_attach(ndev); + + return 0; + } + + gfar_init_bds(ndev); + + gfar_mac_reset(priv); + + gfar_init_tx_rx_base(priv); + + gfar_start(priv); + + priv->oldlink = 0; + priv->oldspeed = 0; + priv->oldduplex = -1; + + if (ndev->phydev) + phy_start(ndev->phydev); + + netif_device_attach(ndev); + enable_napi(priv); + + return 0; } +static const struct dev_pm_ops gfar_pm_ops = { + .suspend = gfar_suspend, + .resume = gfar_resume, + .freeze = gfar_suspend, + .thaw = gfar_resume, + .restore = gfar_restore, +}; + +#define GFAR_PM_OPS (&gfar_pm_ops) + +#else + +#define GFAR_PM_OPS NULL + +#endif + static const struct of_device_id gfar_match[] = { { diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index f2af96349c7b..f472a6dbbe6f 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -67,8 +67,6 @@ struct ethtool_rx_list { /* Number of bytes to align the rx bufs to */ #define RXBUF_ALIGNMENT 64 -#define PHY_INIT_TIMEOUT 100000 - #define DRV_NAME "gfar-enet" extern const char gfar_driver_version[]; @@ -88,10 +86,6 @@ extern const char gfar_driver_version[]; #define GFAR_RX_MAX_RING_SIZE 256 #define GFAR_TX_MAX_RING_SIZE 256 -#define GFAR_MAX_FIFO_THRESHOLD 511 -#define GFAR_MAX_FIFO_STARVE 511 -#define GFAR_MAX_FIFO_STARVE_OFF 511 - #define FBTHR_SHIFT 24 #define DEFAULT_RX_LFC_THR 16 #define DEFAULT_LFC_PTVVAL 4 @@ -109,9 +103,6 @@ extern const char gfar_driver_version[]; #define DEFAULT_FIFO_TX_THR 0x100 #define DEFAULT_FIFO_TX_STARVE 0x40 #define DEFAULT_FIFO_TX_STARVE_OFF 0x80 -#define DEFAULT_BD_STASH 1 -#define DEFAULT_STASH_LENGTH 96 -#define DEFAULT_STASH_INDEX 0 /* The number of Exact Match registers */ #define GFAR_EM_NUM 15 @@ -139,15 +130,6 @@ extern const char gfar_driver_version[]; #define DEFAULT_RX_COALESCE 0 #define DEFAULT_RXCOUNT 0 -#define GFAR_SUPPORTED (SUPPORTED_10baseT_Half \ - | SUPPORTED_10baseT_Full \ - | SUPPORTED_100baseT_Half \ - | SUPPORTED_100baseT_Full \ - | SUPPORTED_Autoneg \ - | SUPPORTED_MII) - -#define GFAR_SUPPORTED_GBIT SUPPORTED_1000baseT_Full - /* TBI register addresses */ #define MII_TBICON 0x11 @@ -185,8 +167,6 @@ extern const char gfar_driver_version[]; #define ECNTRL_REDUCED_MII_MODE 0x00000004 #define ECNTRL_SGMII_MODE 0x00000002 -#define MRBLR_INIT_SETTINGS DEFAULT_RX_BUFFER_SIZE - #define MINFLR_INIT_SETTINGS 0x00000040 /* Tqueue control */ @@ -266,12 +246,6 @@ extern const char gfar_driver_version[]; #define DEFAULT_TXIC mk_ic_value(DEFAULT_TXCOUNT, DEFAULT_TXTIME) #define DEFAULT_RXIC mk_ic_value(DEFAULT_RXCOUNT, DEFAULT_RXTIME) -#define skip_bd(bdp, stride, base, ring_size) ({ \ - typeof(bdp) new_bd = (bdp) + (stride); \ - (new_bd >= (base) + (ring_size)) ? (new_bd - (ring_size)) : new_bd; }) - -#define next_bd(bdp, base, ring_size) skip_bd(bdp, 1, base, ring_size) - #define RCTRL_TS_ENABLE 0x01000000 #define RCTRL_PAL_MASK 0x001f0000 #define RCTRL_LFC 0x00004000 @@ -385,11 +359,6 @@ extern const char gfar_driver_version[]; #define IMASK_RX_DISABLED ((~(IMASK_RX_DEFAULT)) & IMASK_DEFAULT) #define IMASK_TX_DISABLED ((~(IMASK_TX_DEFAULT)) & IMASK_DEFAULT) -/* Fifo management */ -#define FIFO_TX_THR_MASK 0x01ff -#define FIFO_TX_STARVE_MASK 0x01ff -#define FIFO_TX_STARVE_OFF_MASK 0x01ff - /* Attribute fields */ /* This enables rx snooping for buffers and descriptors */ @@ -1326,16 +1295,9 @@ static inline u32 gfar_rxbd_dma_lastfree(struct gfar_priv_rx_q *rxq) return bdp_dma; } -irqreturn_t gfar_receive(int irq, void *dev_id); int startup_gfar(struct net_device *dev); void stop_gfar(struct net_device *dev); -void reset_gfar(struct net_device *dev); void gfar_mac_reset(struct gfar_private *priv); -void gfar_halt(struct gfar_private *priv); -void gfar_start(struct gfar_private *priv); -void gfar_phy_test(struct mii_bus *bus, struct phy_device *phydev, int enable, - u32 regnum, u32 read); -void gfar_configure_coalescing_all(struct gfar_private *priv); int gfar_set_features(struct net_device *dev, netdev_features_t features); extern const struct ethtool_ops gfar_ethtool_ops; @@ -1348,13 +1310,6 @@ extern const struct ethtool_ops gfar_ethtool_ops; #define RQFCR_PID_PORT_MASK 0xFFFF0000 #define RQFCR_PID_MAC_MASK 0xFF000000 -struct gfar_mask_entry { - unsigned int mask; /* The mask value which is valid form start to end */ - unsigned int start; - unsigned int end; - unsigned int block; /* Same block values indicate depended entries */ -}; - /* Represents a receive filer table entry */ struct gfar_filer_entry { u32 ctrl; diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 3433b46b90c1..3c8e4e2efc07 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -45,19 +45,6 @@ #define GFAR_MAX_COAL_USECS 0xffff #define GFAR_MAX_COAL_FRAMES 0xff -static void gfar_fill_stats(struct net_device *dev, struct ethtool_stats *dummy, - u64 *buf); -static void gfar_gstrings(struct net_device *dev, u32 stringset, u8 * buf); -static int gfar_gcoalesce(struct net_device *dev, - struct ethtool_coalesce *cvals); -static int gfar_scoalesce(struct net_device *dev, - struct ethtool_coalesce *cvals); -static void gfar_gringparam(struct net_device *dev, - struct ethtool_ringparam *rvals); -static int gfar_sringparam(struct net_device *dev, - struct ethtool_ringparam *rvals); -static void gfar_gdrvinfo(struct net_device *dev, - struct ethtool_drvinfo *drvinfo); static const char stat_gstrings[][ETH_GSTRING_LEN] = { /* extra stats */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index c1eba421ba82..3a14bbc26ea2 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -24,7 +24,7 @@ #include "hns_dsaf_rcb.h" #include "hns_dsaf_misc.h" -const static char *g_dsaf_mode_match[DSAF_MODE_MAX] = { +static const char *g_dsaf_mode_match[DSAF_MODE_MAX] = { [DSAF_MODE_DISABLE_2PORT_64VM] = "2port-64vf", [DSAF_MODE_DISABLE_6PORT_0VM] = "6port-16rss", [DSAF_MODE_DISABLE_6PORT_16VM] = "6port-16vf", diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index 528f6243cdc6..03ca7d925e8e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -138,12 +138,28 @@ EXPORT_SYMBOL(hnae3_register_client); void hnae3_unregister_client(struct hnae3_client *client) { + struct hnae3_client *client_tmp; struct hnae3_ae_dev *ae_dev; + bool existed = false; if (!client) return; mutex_lock(&hnae3_common_lock); + + list_for_each_entry(client_tmp, &hnae3_client_list, node) { + if (client_tmp->type == client->type) { + existed = true; + break; + } + } + + if (!existed) { + mutex_unlock(&hnae3_common_lock); + pr_err("client %s does not exist!\n", client->name); + return; + } + /* un-initialize the client on every matched port */ list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) { hnae3_uninit_client_instance(client, ae_dev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index c52eccc1621d..aa692b13b6f3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -908,9 +908,11 @@ static struct hns3_enet_ring *hns3_backup_ringparam(struct hns3_nic_priv *priv) if (!tmp_rings) return NULL; - for (i = 0; i < handle->kinfo.num_tqps * 2; i++) + for (i = 0; i < handle->kinfo.num_tqps * 2; i++) { memcpy(&tmp_rings[i], priv->ring_data[i].ring, sizeof(struct hns3_enet_ring)); + tmp_rings[i].skb = NULL; + } return tmp_rings; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 1c6b501fb7ca..6dcce489fdc5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -949,7 +949,7 @@ static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev) hdev->rst_stats.reset_cnt); } -void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev) +static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev) { struct hclge_desc *desc_src, *desc_tmp; struct hclge_get_m7_bd_cmd *req; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 2b65f2799846..8d4dc1b019c1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -66,6 +66,7 @@ static void hclge_rfs_filter_expire(struct hclge_dev *hdev); static void hclge_clear_arfs_rules(struct hnae3_handle *handle); static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev, unsigned long *addr); +static int hclge_set_default_loopback(struct hclge_dev *hdev); static struct hnae3_ae_algo ae_algo; @@ -2599,6 +2600,10 @@ static int hclge_mac_init(struct hclge_dev *hdev) return ret; } + ret = hclge_set_default_loopback(hdev); + if (ret) + return ret; + ret = hclge_buffer_alloc(hdev); if (ret) dev_err(&hdev->pdev->dev, @@ -3619,6 +3624,7 @@ static int hclge_reset_stack(struct hclge_dev *hdev) static void hclge_reset(struct hclge_dev *hdev) { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + enum hnae3_reset_type reset_level; int ret; /* Initialize ae_dev reset status as well, in case enet layer wants to @@ -3697,10 +3703,10 @@ static void hclge_reset(struct hclge_dev *hdev) * it should be handled as soon as possible. since some errors * need this kind of reset to fix. */ - hdev->reset_level = hclge_get_reset_level(ae_dev, - &hdev->default_reset_request); - if (hdev->reset_level != HNAE3_NONE_RESET) - set_bit(hdev->reset_level, &hdev->reset_request); + reset_level = hclge_get_reset_level(ae_dev, + &hdev->default_reset_request); + if (reset_level != HNAE3_NONE_RESET) + set_bit(reset_level, &hdev->reset_request); return; @@ -6173,7 +6179,7 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable) bool clear; hdev->fd_en = enable; - clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE ? true : false; + clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE; if (!enable) hclge_del_all_fd_entries(handle, clear); else @@ -6330,7 +6336,7 @@ static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en) return ret; } -static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en, +static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en, enum hnae3_loop loop_mode) { #define HCLGE_SERDES_RETRY_MS 10 @@ -6391,6 +6397,17 @@ static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en, dev_err(&hdev->pdev->dev, "serdes loopback set failed in fw\n"); return -EIO; } + return ret; +} + +static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en, + enum hnae3_loop loop_mode) +{ + int ret; + + ret = hclge_cfg_serdes_loopback(hdev, en, loop_mode); + if (ret) + return ret; hclge_cfg_mac_mode(hdev, en); @@ -6534,6 +6551,22 @@ static int hclge_set_loopback(struct hnae3_handle *handle, return 0; } +static int hclge_set_default_loopback(struct hclge_dev *hdev) +{ + int ret; + + ret = hclge_set_app_loopback(hdev, false); + if (ret) + return ret; + + ret = hclge_cfg_serdes_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES); + if (ret) + return ret; + + return hclge_cfg_serdes_loopback(hdev, false, + HNAE3_LOOP_PARALLEL_SERDES); +} + static void hclge_reset_tqp_stats(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -7691,6 +7724,7 @@ static int hclge_set_vlan_tx_offload_cfg(struct hclge_vport *vport) struct hclge_vport_vtag_tx_cfg_cmd *req; struct hclge_dev *hdev = vport->back; struct hclge_desc desc; + u16 bmap_index; int status; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_PORT_TX_CFG, false); @@ -7713,8 +7747,10 @@ static int hclge_set_vlan_tx_offload_cfg(struct hclge_vport *vport) hnae3_set_bit(req->vport_vlan_cfg, HCLGE_CFG_NIC_ROCE_SEL_B, 0); req->vf_offset = vport->vport_id / HCLGE_VF_NUM_PER_CMD; - req->vf_bitmap[req->vf_offset] = - 1 << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE); + bmap_index = vport->vport_id % HCLGE_VF_NUM_PER_CMD / + HCLGE_VF_NUM_PER_BYTE; + req->vf_bitmap[bmap_index] = + 1U << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE); status = hclge_cmd_send(&hdev->hw, &desc, 1); if (status) @@ -7731,6 +7767,7 @@ static int hclge_set_vlan_rx_offload_cfg(struct hclge_vport *vport) struct hclge_vport_vtag_rx_cfg_cmd *req; struct hclge_dev *hdev = vport->back; struct hclge_desc desc; + u16 bmap_index; int status; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_PORT_RX_CFG, false); @@ -7746,8 +7783,10 @@ static int hclge_set_vlan_rx_offload_cfg(struct hclge_vport *vport) vcfg->vlan2_vlan_prionly ? 1 : 0); req->vf_offset = vport->vport_id / HCLGE_VF_NUM_PER_CMD; - req->vf_bitmap[req->vf_offset] = - 1 << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE); + bmap_index = vport->vport_id % HCLGE_VF_NUM_PER_CMD / + HCLGE_VF_NUM_PER_BYTE; + req->vf_bitmap[bmap_index] = + 1U << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE); status = hclge_cmd_send(&hdev->hw, &desc, 1); if (status) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index fdf43d87e983..3c8a2f55c43a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12530,7 +12530,8 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, if (need_reset && prog) for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->xdp_rings[i]->xsk_umem) - (void)i40e_xsk_async_xmit(vsi->netdev, i); + (void)i40e_xsk_wakeup(vsi->netdev, i, + XDP_WAKEUP_RX); return 0; } @@ -12852,7 +12853,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_bridge_setlink = i40e_ndo_bridge_setlink, .ndo_bpf = i40e_xdp, .ndo_xdp_xmit = i40e_xdp_xmit, - .ndo_xsk_async_xmit = i40e_xsk_async_xmit, + .ndo_xsk_wakeup = i40e_xsk_wakeup, .ndo_dfwd_add_station = i40e_fwd_add, .ndo_dfwd_del_station = i40e_fwd_del, }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 32bad014d76c..0373bc6c7e61 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -116,7 +116,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem, return err; /* Kick start the NAPI context so that receiving will start */ - err = i40e_xsk_async_xmit(vsi->netdev, qid); + err = i40e_xsk_wakeup(vsi->netdev, qid, XDP_WAKEUP_RX); if (err) return err; } @@ -190,7 +190,9 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem, **/ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) { + struct xdp_umem *umem = rx_ring->xsk_umem; int err, result = I40E_XDP_PASS; + u64 offset = umem->headroom; struct i40e_ring *xdp_ring; struct bpf_prog *xdp_prog; u32 act; @@ -201,7 +203,10 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) */ xdp_prog = READ_ONCE(rx_ring->xdp_prog); act = bpf_prog_run_xdp(xdp_prog, xdp); - xdp->handle += xdp->data - xdp->data_hard_start; + offset += xdp->data - xdp->data_hard_start; + + xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset); + switch (act) { case XDP_PASS: break; @@ -262,7 +267,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring, bi->addr = xdp_umem_get_data(umem, handle); bi->addr += hr; - bi->handle = handle + umem->headroom; + bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); xsk_umem_discard_addr(umem); return true; @@ -299,7 +304,7 @@ static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring, bi->addr = xdp_umem_get_data(umem, handle); bi->addr += hr; - bi->handle = handle + umem->headroom; + bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); xsk_umem_discard_addr_rq(umem); return true; @@ -420,8 +425,6 @@ static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring, struct i40e_rx_buffer *old_bi) { struct i40e_rx_buffer *new_bi = &rx_ring->rx_bi[rx_ring->next_to_alloc]; - unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask; - u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM; u16 nta = rx_ring->next_to_alloc; /* update, and store next to alloc */ @@ -429,14 +432,9 @@ static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring, rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; /* transfer page from old buffer to new buffer */ - new_bi->dma = old_bi->dma & mask; - new_bi->dma += hr; - - new_bi->addr = (void *)((unsigned long)old_bi->addr & mask); - new_bi->addr += hr; - - new_bi->handle = old_bi->handle & mask; - new_bi->handle += rx_ring->xsk_umem->headroom; + new_bi->dma = old_bi->dma; + new_bi->addr = old_bi->addr; + new_bi->handle = old_bi->handle; old_bi->addr = NULL; } @@ -471,7 +469,8 @@ void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle) bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle); bi->addr += hr; - bi->handle = (u64)handle + rx_ring->xsk_umem->headroom; + bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle, + rx_ring->xsk_umem->headroom); } /** @@ -626,6 +625,15 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) i40e_finalize_xdp_rx(rx_ring, xdp_xmit); i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); + + if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) { + if (failure || rx_ring->next_to_clean == rx_ring->next_to_use) + xsk_set_rx_need_wakeup(rx_ring->xsk_umem); + else + xsk_clear_rx_need_wakeup(rx_ring->xsk_umem); + + return (int)total_rx_packets; + } return failure ? budget : (int)total_rx_packets; } @@ -681,6 +689,8 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) i40e_xdp_ring_update_tail(xdp_ring); xsk_umem_consume_tx_done(xdp_ring->xsk_umem); + if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem)) + xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem); } return !!budget && work_done; @@ -759,19 +769,27 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, i40e_update_tx_stats(tx_ring, completed_frames, total_bytes); out_xmit: + if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) { + if (tx_ring->next_to_clean == tx_ring->next_to_use) + xsk_set_tx_need_wakeup(tx_ring->xsk_umem); + else + xsk_clear_tx_need_wakeup(tx_ring->xsk_umem); + } + xmit_done = i40e_xmit_zc(tx_ring, budget); return work_done && xmit_done; } /** - * i40e_xsk_async_xmit - Implements the ndo_xsk_async_xmit + * i40e_xsk_wakeup - Implements the ndo_xsk_wakeup * @dev: the netdevice * @queue_id: queue id to wake up + * @flags: ignored in our case since we have Rx and Tx in the same NAPI. * * Returns <0 for errors, 0 otherwise. **/ -int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id) +int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h index 8cc0a2e7d9a2..9ed59c14eb55 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h @@ -18,6 +18,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget); bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring, int napi_budget); -int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id); +int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); #endif /* _I40E_XSK_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index bf9aa533a7c6..4da0cde9695b 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -35,9 +35,9 @@ struct ice_aqc_get_ver { /* Queue Shutdown (direct 0x0003) */ struct ice_aqc_q_shutdown { - __le32 driver_unloading; + u8 driver_unloading; #define ICE_AQC_DRIVER_UNLOADING BIT(0) - u8 reserved[12]; + u8 reserved[15]; }; /* Request resource ownership (direct 0x0008) @@ -91,6 +91,7 @@ struct ice_aqc_list_caps_elem { #define ICE_AQC_CAPS_SRIOV 0x0012 #define ICE_AQC_CAPS_VF 0x0013 #define ICE_AQC_CAPS_VSI 0x0017 +#define ICE_AQC_CAPS_DCB 0x0018 #define ICE_AQC_CAPS_RSS 0x0040 #define ICE_AQC_CAPS_RXQS 0x0041 #define ICE_AQC_CAPS_TXQS 0x0042 diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 302ad981129c..9492cd34b09d 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1275,7 +1275,7 @@ enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading) ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_q_shutdown); if (unloading) - cmd->driver_unloading = cpu_to_le32(ICE_AQC_DRIVER_UNLOADING); + cmd->driver_unloading = ICE_AQC_DRIVER_UNLOADING; return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); } @@ -1594,6 +1594,18 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, prefix, func_p->guar_num_vsi); } break; + case ICE_AQC_CAPS_DCB: + caps->dcb = (number == 1); + caps->active_tc_bitmap = logical_id; + caps->maxtc = phys_id; + ice_debug(hw, ICE_DBG_INIT, + "%s: DCB = %d\n", prefix, caps->dcb); + ice_debug(hw, ICE_DBG_INIT, + "%s: active TC bitmap = %d\n", prefix, + caps->active_tc_bitmap); + ice_debug(hw, ICE_DBG_INIT, + "%s: TC max = %d\n", prefix, caps->maxtc); + break; case ICE_AQC_CAPS_RSS: caps->rss_table_size = number; caps->rss_table_entry_width = logical_id; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index d60c942249e8..c5ee8d930611 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -444,9 +444,15 @@ ice_parse_cee_pgcfg_tlv(struct ice_cee_feat_tlv *tlv, * |pg0|pg1|pg2|pg3|pg4|pg5|pg6|pg7| * --------------------------------- */ - ice_for_each_traffic_class(i) + ice_for_each_traffic_class(i) { etscfg->tcbwtable[i] = buf[offset++]; + if (etscfg->prio_table[i] == ICE_CEE_PGID_STRICT) + dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_STRICT; + else + dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS; + } + /* Number of TCs supported (1 octet) */ etscfg->maxtcs = buf[offset]; } diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index d9578919aad8..e922adf1fa15 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -413,7 +413,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked) memset(&pi->local_dcbx_cfg, 0, sizeof(*dcbcfg)); dcbcfg->etscfg.willing = 1; - dcbcfg->etscfg.maxtcs = 8; + dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc; dcbcfg->etscfg.tcbwtable[0] = 100; dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS; @@ -422,7 +422,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked) dcbcfg->etsrec.willing = 0; dcbcfg->pfc.willing = 1; - dcbcfg->pfc.pfccap = IEEE_8021QAZ_MAX_TCS; + dcbcfg->pfc.pfccap = hw->func_caps.common_cap.maxtc; dcbcfg->numapps = 1; dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE; @@ -452,11 +452,18 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked) port_info = hw->port_info; err = ice_init_dcb(hw); + if (err && !port_info->is_sw_lldp) { + dev_err(&pf->pdev->dev, "Error initializing DCB %d\n", err); + goto dcb_init_err; + } + + dev_info(&pf->pdev->dev, + "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n", + pf->hw.func_caps.common_cap.maxtc); if (err) { /* FW LLDP is disabled, activate SW DCBX/LLDP mode */ dev_info(&pf->pdev->dev, "FW LLDP is disabled, DCBx/LLDP in SW mode.\n"); - port_info->is_sw_lldp = true; clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags); err = ice_dcb_sw_dflt_cfg(pf, locked); if (err) { @@ -468,11 +475,9 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked) pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); - set_bit(ICE_FLAG_DCB_ENA, pf->flags); return 0; } - port_info->is_sw_lldp = false; set_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags); /* DCBX in FW and LLDP enabled in FW */ @@ -484,7 +489,6 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked) if (err) goto dcb_init_err; - dev_info(&pf->pdev->dev, "DCBX offload supported\n"); return err; dcb_init_err: diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index f7dd0bd03d39..edba5bd79097 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3253,25 +3253,25 @@ static int ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, struct ice_ring_container *rc, struct ice_vsi *vsi) { + const char *c_type_str = (c_type == ICE_RX_CONTAINER) ? "rx" : "tx"; + u32 use_adaptive_coalesce, coalesce_usecs; struct ice_pf *pf = vsi->back; u16 itr_setting; if (!rc->ring) return -EINVAL; - itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC; - switch (c_type) { case ICE_RX_CONTAINER: if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL || (ec->rx_coalesce_usecs_high && ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) { netdev_info(vsi->netdev, - "Invalid value, rx-usecs-high valid values are 0 (disabled), %d-%d\n", - pf->hw.intrl_gran, ICE_MAX_INTRL); + "Invalid value, %s-usecs-high valid values are 0 (disabled), %d-%d\n", + c_type_str, pf->hw.intrl_gran, + ICE_MAX_INTRL); return -EINVAL; } - if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) { rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high; wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx), @@ -3279,60 +3279,60 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, pf->hw.intrl_gran)); } - if (ec->rx_coalesce_usecs != itr_setting && - ec->use_adaptive_rx_coalesce) { - netdev_info(vsi->netdev, - "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n"); - return -EINVAL; - } + use_adaptive_coalesce = ec->use_adaptive_rx_coalesce; + coalesce_usecs = ec->rx_coalesce_usecs; - if (ec->rx_coalesce_usecs > ICE_ITR_MAX) { - netdev_info(vsi->netdev, - "Invalid value, rx-usecs range is 0-%d\n", - ICE_ITR_MAX); - return -EINVAL; - } - - if (ec->use_adaptive_rx_coalesce) { - rc->itr_setting |= ICE_ITR_DYNAMIC; - } else { - rc->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); - rc->target_itr = ITR_TO_REG(rc->itr_setting); - } break; case ICE_TX_CONTAINER: if (ec->tx_coalesce_usecs_high) { netdev_info(vsi->netdev, - "setting tx-usecs-high is not supported\n"); - return -EINVAL; - } - - if (ec->tx_coalesce_usecs != itr_setting && - ec->use_adaptive_tx_coalesce) { - netdev_info(vsi->netdev, - "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n"); + "setting %s-usecs-high is not supported\n", + c_type_str); return -EINVAL; } - if (ec->tx_coalesce_usecs > ICE_ITR_MAX) { - netdev_info(vsi->netdev, - "Invalid value, tx-usecs range is 0-%d\n", - ICE_ITR_MAX); - return -EINVAL; - } + use_adaptive_coalesce = ec->use_adaptive_tx_coalesce; + coalesce_usecs = ec->tx_coalesce_usecs; - if (ec->use_adaptive_tx_coalesce) { - rc->itr_setting |= ICE_ITR_DYNAMIC; - } else { - rc->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); - rc->target_itr = ITR_TO_REG(rc->itr_setting); - } break; default: dev_dbg(&pf->pdev->dev, "Invalid container type %d\n", c_type); return -EINVAL; } + itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC; + if (coalesce_usecs != itr_setting && use_adaptive_coalesce) { + netdev_info(vsi->netdev, + "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n", + c_type_str, c_type_str); + return -EINVAL; + } + + if (coalesce_usecs > ICE_ITR_MAX) { + netdev_info(vsi->netdev, + "Invalid value, %s-usecs range is 0-%d\n", + c_type_str, ICE_ITR_MAX); + return -EINVAL; + } + + /* hardware only supports an ITR granularity of 2us */ + if (coalesce_usecs % 2 != 0) { + netdev_info(vsi->netdev, + "Invalid value, %s-usecs must be even\n", + c_type_str); + return -EINVAL; + } + + if (use_adaptive_coalesce) { + rc->itr_setting |= ICE_ITR_DYNAMIC; + } else { + /* store user facing value how it was set */ + rc->itr_setting = coalesce_usecs; + /* set to static and convert to value HW understands */ + rc->target_itr = + ITR_TO_REG(ITR_REG_ALIGN(rc->itr_setting)); + } + return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f029aee32913..50a17a0337be 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -567,6 +567,8 @@ static void ice_reset_subtask(struct ice_pf *pf) reset_type = ICE_RESET_CORER; if (test_and_clear_bit(__ICE_GLOBR_RECV, pf->state)) reset_type = ICE_RESET_GLOBR; + if (test_and_clear_bit(__ICE_EMPR_RECV, pf->state)) + reset_type = ICE_RESET_EMPR; /* return if no valid reset type requested */ if (reset_type == ICE_RESET_INVAL) return; @@ -612,6 +614,22 @@ static void ice_reset_subtask(struct ice_pf *pf) } /** + * ice_print_topo_conflict - print topology conflict message + * @vsi: the VSI whose topology status is being checked + */ +static void ice_print_topo_conflict(struct ice_vsi *vsi) +{ + switch (vsi->port_info->phy.link_info.topo_media_conflict) { + case ICE_AQ_LINK_TOPO_CONFLICT: + case ICE_AQ_LINK_MEDIA_CONFLICT: + netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n"); + break; + default: + break; + } +} + +/** * ice_print_link_msg - print link up or down message * @vsi: the VSI whose link status is being queried * @isup: boolean for if the link is now up or down @@ -624,6 +642,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) const char *speed; const char *fec; const char *fc; + const char *an; if (!vsi) return; @@ -707,6 +726,12 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) break; } + /* check if autoneg completed, might be false due to not supported */ + if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) + an = "True"; + else + an = "False"; + /* Get FEC mode requested based on PHY caps last SW configuration */ caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); if (!caps) { @@ -731,8 +756,9 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) devm_kfree(&vsi->back->pdev->dev, caps); done: - netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Flow Control: %s\n", - speed, fec_req, fec, fc); + netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Autoneg: %s, Flow Control: %s\n", + speed, fec_req, fec, an, fc); + ice_print_topo_conflict(vsi); } /** @@ -2636,6 +2662,11 @@ static void ice_remove(struct pci_dev *pdev) ice_deinit_pf(pf); ice_deinit_hw(&pf->hw); ice_clear_interrupt_scheme(pf); + /* Issue a PFR as part of the prescribed driver unload flow. Do not + * do it via ice_schedule_reset() since there is no need to rebuild + * and the service task is already stopped. + */ + ice_reset(&pf->hw, ICE_RESET_PFR); pci_disable_pcie_error_reporting(pdev); } @@ -3446,12 +3477,16 @@ void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) vsi_stats = &vsi->net_stats; - if (test_bit(__ICE_DOWN, vsi->state) || !vsi->num_txq || !vsi->num_rxq) + if (!vsi->num_txq || !vsi->num_rxq) return; + /* netdev packet/byte stats come from ring counter. These are obtained * by summing up ring counters (done by ice_update_vsi_ring_stats). + * But, only call the update routine and read the registers if VSI is + * not down. */ - ice_update_vsi_ring_stats(vsi); + if (!test_bit(__ICE_DOWN, vsi->state)) + ice_update_vsi_ring_stats(vsi); stats->tx_packets = vsi_stats->tx_packets; stats->tx_bytes = vsi_stats->tx_bytes; stats->rx_packets = vsi_stats->rx_packets; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 99cf527d2b1a..1acdd43a2edd 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1623,12 +1623,13 @@ ice_remove_rule_internal(struct ice_hw *hw, u8 recp_id, status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1, ice_aqc_opc_remove_sw_rules, NULL); - if (status) - goto exit; /* Remove a book keeping from the list */ devm_kfree(ice_hw_to_dev(hw), s_rule); + if (status) + goto exit; + list_del(&list_elem->list_entry); devm_kfree(ice_hw_to_dev(hw), list_elem); } diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index b538d0b9eb80..4501d50a7dcc 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -4,15 +4,15 @@ #ifndef _ICE_TYPE_H_ #define _ICE_TYPE_H_ +#define ICE_BYTES_PER_WORD 2 +#define ICE_BYTES_PER_DWORD 4 + #include "ice_status.h" #include "ice_hw_autogen.h" #include "ice_osdep.h" #include "ice_controlq.h" #include "ice_lan_tx_rx.h" -#define ICE_BYTES_PER_WORD 2 -#define ICE_BYTES_PER_DWORD 4 - static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc) { return test_bit(tc, &bitmap); @@ -139,6 +139,9 @@ struct ice_phy_info { /* Common HW capabilities for SW use */ struct ice_hw_common_caps { u32 valid_functions; + /* DCB capabilities */ + u32 active_tc_bitmap; + u32 maxtc; /* Tx/Rx queues */ u16 num_rxq; /* Number/Total Rx queues */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index b93324e9f4bc..c38939b1d496 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -1074,9 +1074,16 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) for (v = 0; v < pf->num_alloc_vfs; v++) ice_trigger_vf_reset(&pf->vf[v], is_vflr); - for (v = 0; v < pf->num_alloc_vfs; v++) - if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[v].vf_states)) - ice_dis_vf_qs(&pf->vf[v]); + for (v = 0; v < pf->num_alloc_vfs; v++) { + struct ice_vsi *vsi; + + vf = &pf->vf[v]; + vsi = pf->vsi[vf->lan_vsi_idx]; + if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) + ice_dis_vf_qs(vf); + ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL, + NULL, ICE_VF_RESET, vf->vf_id, NULL); + } /* HW requires some time to make sure it can flush the FIFO for a VF * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in @@ -1171,12 +1178,12 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) ice_dis_vf_qs(vf); - else - /* Call Disable LAN Tx queue AQ call even when queues are not - * enabled. This is needed for successful completion of VFR - */ - ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL, - NULL, ICE_VF_RESET, vf->vf_id, NULL); + + /* Call Disable LAN Tx queue AQ whether or not queues are + * enabled. This is needed for successful completion of VFR. + */ + ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL, + NULL, ICE_VF_RESET, vf->vf_id, NULL); hw = &pf->hw; /* poll VPGEN_VFRSTAT reg to make sure @@ -2760,8 +2767,9 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) } vf->num_vlan--; - /* Disable VLAN pruning when removing VLAN */ - ice_cfg_vlan_pruning(vsi, false, false); + /* Disable VLAN pruning when the last VLAN is removed */ + if (!vf->num_vlan) + ice_cfg_vlan_pruning(vsi, false, false); /* Disable Unicast/Multicast VLAN promiscuous mode */ if (vlan_promisc) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 31629fc7e820..113f6087c7c9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -960,11 +960,9 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) return 0; err_aead: - memset(xs->aead, 0, sizeof(*xs->aead)); - kfree(xs->aead); + kzfree(xs->aead); err_xs: - memset(xs, 0, sizeof(*xs)); - kfree(xs); + kzfree(xs); err_out: msgbuf[1] = err; return err; @@ -1049,8 +1047,7 @@ int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) ixgbe_ipsec_del_sa(xs); /* remove the xs that was made-up in the add request */ - memset(xs, 0, sizeof(*xs)); - kfree(xs); + kzfree(xs); return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 17b7ae9f46ec..9bcae44e9883 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10260,7 +10260,8 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (need_reset && prog) for (i = 0; i < adapter->num_rx_queues; i++) if (adapter->xdp_ring[i]->xsk_umem) - (void)ixgbe_xsk_async_xmit(adapter->netdev, i); + (void)ixgbe_xsk_wakeup(adapter->netdev, i, + XDP_WAKEUP_RX); return 0; } @@ -10379,7 +10380,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_features_check = ixgbe_features_check, .ndo_bpf = ixgbe_xdp, .ndo_xdp_xmit = ixgbe_xdp_xmit, - .ndo_xsk_async_xmit = ixgbe_xsk_async_xmit, + .ndo_xsk_wakeup = ixgbe_xsk_wakeup, }; static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h index d93a690aff74..6d01700b46bc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h @@ -42,7 +42,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring); bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *tx_ring, int napi_budget); -int ixgbe_xsk_async_xmit(struct net_device *dev, u32 queue_id); +int ixgbe_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring); #endif /* #define _IXGBE_TXRX_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 6b609553329f..ad802a8909e0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -100,7 +100,7 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter, ixgbe_txrx_ring_enable(adapter, qid); /* Kick start the NAPI context so that receiving will start */ - err = ixgbe_xsk_async_xmit(adapter->netdev, qid); + err = ixgbe_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX); if (err) return err; } @@ -143,7 +143,9 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, struct ixgbe_ring *rx_ring, struct xdp_buff *xdp) { + struct xdp_umem *umem = rx_ring->xsk_umem; int err, result = IXGBE_XDP_PASS; + u64 offset = umem->headroom; struct bpf_prog *xdp_prog; struct xdp_frame *xdpf; u32 act; @@ -151,7 +153,10 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, rcu_read_lock(); xdp_prog = READ_ONCE(rx_ring->xdp_prog); act = bpf_prog_run_xdp(xdp_prog, xdp); - xdp->handle += xdp->data - xdp->data_hard_start; + offset += xdp->data - xdp->data_hard_start; + + xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset); + switch (act) { case XDP_PASS: break; @@ -201,8 +206,6 @@ ixgbe_rx_buffer *ixgbe_get_rx_buffer_zc(struct ixgbe_ring *rx_ring, static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *obi) { - unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask; - u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM; u16 nta = rx_ring->next_to_alloc; struct ixgbe_rx_buffer *nbi; @@ -212,14 +215,9 @@ static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring *rx_ring, rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; /* transfer page from old buffer to new buffer */ - nbi->dma = obi->dma & mask; - nbi->dma += hr; - - nbi->addr = (void *)((unsigned long)obi->addr & mask); - nbi->addr += hr; - - nbi->handle = obi->handle & mask; - nbi->handle += rx_ring->xsk_umem->headroom; + nbi->dma = obi->dma; + nbi->addr = obi->addr; + nbi->handle = obi->handle; obi->addr = NULL; obi->skb = NULL; @@ -250,7 +248,8 @@ void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle) bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle); bi->addr += hr; - bi->handle = (u64)handle + rx_ring->xsk_umem->headroom; + bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle, + rx_ring->xsk_umem->headroom); } static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring, @@ -276,7 +275,7 @@ static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring, bi->addr = xdp_umem_get_data(umem, handle); bi->addr += hr; - bi->handle = handle + umem->headroom; + bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); xsk_umem_discard_addr(umem); return true; @@ -303,7 +302,7 @@ static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring, bi->addr = xdp_umem_get_data(umem, handle); bi->addr += hr; - bi->handle = handle + umem->headroom; + bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); xsk_umem_discard_addr_rq(umem); return true; @@ -547,6 +546,14 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, q_vector->rx.total_packets += total_rx_packets; q_vector->rx.total_bytes += total_rx_bytes; + if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) { + if (failure || rx_ring->next_to_clean == rx_ring->next_to_use) + xsk_set_rx_need_wakeup(rx_ring->xsk_umem); + else + xsk_clear_rx_need_wakeup(rx_ring->xsk_umem); + + return (int)total_rx_packets; + } return failure ? budget : (int)total_rx_packets; } @@ -615,6 +622,8 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) if (tx_desc) { ixgbe_xdp_ring_update_tail(xdp_ring); xsk_umem_consume_tx_done(xdp_ring->xsk_umem); + if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem)) + xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem); } return !!budget && work_done; @@ -688,11 +697,19 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, if (xsk_frames) xsk_umem_complete_tx(umem, xsk_frames); + if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) { + if (tx_ring->next_to_clean == tx_ring->next_to_use) + xsk_set_tx_need_wakeup(tx_ring->xsk_umem); + else + xsk_clear_tx_need_wakeup(tx_ring->xsk_umem); + } + xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit); + return budget > 0 && xmit_done; } -int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid) +int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_ring *ring; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 37fef8cd25e3..0d8dd885b7d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -154,3 +154,10 @@ config MLX5_EN_TLS Build support for TLS cryptography-offload accelaration in the NIC. Note: Support for hardware with this capability needs to be selected for this option to become available. + +config MLX5_SW_STEERING + bool "Mellanox Technologies software-managed steering" + depends on MLX5_CORE_EN && MLX5_ESWITCH + default y + help + Build support for software-managed steering in the NIC. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index f4de9ccb5df1..5708fcc079ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -15,7 +15,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ - lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \ + lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o # @@ -67,3 +67,10 @@ mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \ en_accel/ktls.o en_accel/ktls_tx.o + +mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \ + steering/dr_matcher.o steering/dr_rule.o \ + steering/dr_icm_pool.o steering/dr_crc32.o \ + steering/dr_ste.o steering/dr_send.o \ + steering/dr_cmd.o steering/dr_fw.o \ + steering/dr_action.o steering/fs_dr.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index a400f4430c28..7bf7b6fbc776 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -4,6 +4,7 @@ #include <devlink.h> #include "mlx5_core.h" +#include "fs_core.h" #include "eswitch.h" static int mlx5_devlink_flash_update(struct devlink *devlink, @@ -107,12 +108,121 @@ void mlx5_devlink_free(struct devlink *devlink) devlink_free(devlink); } +static int mlx5_devlink_fs_mode_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + char *value = val.vstr; + int err = 0; + + if (!strcmp(value, "dmfs")) { + return 0; + } else if (!strcmp(value, "smfs")) { + u8 eswitch_mode; + bool smfs_cap; + + eswitch_mode = mlx5_eswitch_mode(dev->priv.eswitch); + smfs_cap = mlx5_fs_dr_is_supported(dev); + + if (!smfs_cap) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, + "Software managed steering is not supported by current device"); + } + + else if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "Software managed steering is not supported when eswitch offlaods enabled."); + err = -EOPNOTSUPP; + } + } else { + NL_SET_ERR_MSG_MOD(extack, + "Bad parameter: supported values are [\"dmfs\", \"smfs\"]"); + err = -EINVAL; + } + + return err; +} + +static int mlx5_devlink_fs_mode_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + enum mlx5_flow_steering_mode mode; + + if (!strcmp(ctx->val.vstr, "smfs")) + mode = MLX5_FLOW_STEERING_MODE_SMFS; + else + mode = MLX5_FLOW_STEERING_MODE_DMFS; + dev->priv.steering->mode = mode; + + return 0; +} + +static int mlx5_devlink_fs_mode_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) + strcpy(ctx->val.vstr, "smfs"); + else + strcpy(ctx->val.vstr, "dmfs"); + return 0; +} + +enum mlx5_devlink_param_id { + MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, +}; + +static const struct devlink_param mlx5_devlink_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, + "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set, + mlx5_devlink_fs_mode_validate), +}; + +static void mlx5_devlink_set_params_init_values(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + + if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS) + strcpy(value.vstr, "dmfs"); + else + strcpy(value.vstr, "smfs"); + devlink_param_driverinit_value_set(devlink, + MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, + value); +} + int mlx5_devlink_register(struct devlink *devlink, struct device *dev) { - return devlink_register(devlink, dev); + int err; + + err = devlink_register(devlink, dev); + if (err) + return err; + + err = devlink_params_register(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); + if (err) + goto params_reg_err; + mlx5_devlink_set_params_init_values(devlink); + devlink_params_publish(devlink); + return 0; + +params_reg_err: + devlink_unregister(devlink); + return err; } void mlx5_devlink_unregister(struct devlink *devlink) { + devlink_params_unregister(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); devlink_unregister(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 79301d116667..eb2e1f2138e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -25,18 +25,33 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, return headroom; } -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk); - u32 frag_sz = linear_rq_headroom + hw_mtu; + + return linear_rq_headroom + hw_mtu; +} + +u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk); /* AF_XDP doesn't build SKBs in place. */ if (!xsk) frag_sz = MLX5_SKB_FRAG_SZ(frag_sz); - /* XDP in mlx5e doesn't support multiple packets per page. */ + /* XDP in mlx5e doesn't support multiple packets per page. AF_XDP is a + * special case. It can run with frames smaller than a page, as it + * doesn't allocate pages dynamically. However, here we pretend that + * fragments are page-sized: it allows to treat XSK frames like pages + * by redirecting alloc and free operations to XSK rings and by using + * the fact there are no multiple packets per "page" (which is a frame). + * The latter is important, because frames may come in a random order, + * and we will have trouble assemblying a real page of multiple frames. + */ if (mlx5e_rx_is_xdp(params, xsk)) frag_sz = max_t(u32, frag_sz, PAGE_SIZE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 3a615d663d84..989d8f429438 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -76,6 +76,8 @@ static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile, u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); +u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 4c4620db3d31..f8ee18b4da6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -291,14 +291,14 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, */ goto out; } - - err = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - ipv4_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv4_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); goto destroy_neigh_entry; + } e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); @@ -407,13 +407,14 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, goto out; } - err = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - ipv6_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv6_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); goto destroy_neigh_entry; + } e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 1ed5c33e022f..f049e0ac308a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -122,6 +122,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, void *va, u16 *rx_headroom, u32 *len, bool xsk) { struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); + struct xdp_umem *umem = rq->umem; struct xdp_buff xdp; u32 act; int err; @@ -138,8 +139,11 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, xdp.rxq = &rq->xdp_rxq; act = bpf_prog_run_xdp(prog, &xdp); - if (xsk) - xdp.handle += xdp.data - xdp.data_hard_start; + if (xsk) { + u64 off = xdp.data - xdp.data_hard_start; + + xdp.handle = xsk_umem_adjust_offset(umem, xdp.handle, off); + } switch (act) { case XDP_PASS: *rx_headroom = xdp.data - xdp.data_hard_start; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 6a55573ec8f2..475b6bd5d29b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -24,7 +24,8 @@ int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, if (!xsk_umem_peek_addr_rq(umem, &handle)) return -ENOMEM; - dma_info->xsk.handle = handle + rq->buff.umem_headroom; + dma_info->xsk.handle = xsk_umem_adjust_offset(umem, handle, + rq->buff.umem_headroom); dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle); /* No need to add headroom to the DMA address. In striding RQ case, we @@ -104,7 +105,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, /* head_offset is not used in this function, because di->xsk.data and * di->addr point directly to the necessary place. Furthermore, in the - * current implementation, one page = one packet = one frame, so + * current implementation, UMR pages are mapped to XSK frames, so * head_offset should always be 0. */ WARN_ON_ONCE(head_offset); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h index 307b923a1361..cab0e93497ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -5,6 +5,7 @@ #define __MLX5_EN_XSK_RX_H__ #include "en.h" +#include <net/xdp_sock.h> /* RX data path */ @@ -24,4 +25,17 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); +static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err) +{ + if (!xsk_umem_uses_need_wakeup(rq->umem)) + return alloc_err; + + if (unlikely(alloc_err)) + xsk_set_rx_need_wakeup(rq->umem); + else + xsk_clear_rx_need_wakeup(rq->umem); + + return false; +} + #endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index d360750b25b7..631af8dee517 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -4,18 +4,23 @@ #include "setup.h" #include "en/params.h" +/* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may + * change unexpectedly, and mlx5e has a minimum valid stride size for striding + * RQ, keep this check in the driver. + */ +#define MLX5E_MIN_XSK_CHUNK_SIZE 2048 + bool mlx5e_validate_xsk_param(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5_core_dev *mdev) { - /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current - * mlx5e XDP implementation doesn't support multiple packets per page. - */ - if (xsk->chunk_size != PAGE_SIZE) + /* AF_XDP doesn't support frames larger than PAGE_SIZE. */ + if (xsk->chunk_size > PAGE_SIZE || + xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) return false; /* Current MTU and XSK headroom don't allow packets to fit the frames. */ - if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size) + if (mlx5e_rx_get_min_frag_sz(params, xsk) > xsk->chunk_size) return false; /* frag_sz is different for regular and XSK RQs, so ensure that linear diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index fd2c75b4b519..87827477d38c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -7,7 +7,7 @@ #include "en/params.h" #include <net/xdp_sock.h> -int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid) +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_params *params = &priv->channels.params; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h index 7add18bf78d8..79b487d89757 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -5,11 +5,23 @@ #define __MLX5_EN_XSK_TX_H__ #include "en.h" +#include <net/xdp_sock.h> /* TX data path */ -int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid); +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); +static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq) +{ + if (!xsk_umem_uses_need_wakeup(sq->umem)) + return; + + if (sq->pc != sq->cc) + xsk_clear_tx_need_wakeup(sq->umem); + else + xsk_set_tx_need_wakeup(sq->umem); +} + #endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index dadadf221087..1cacda1bc1b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4580,7 +4580,7 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_bpf = mlx5e_xdp, .ndo_xdp_xmit = mlx5e_xdp_xmit, - .ndo_xsk_async_xmit = mlx5e_xsk_async_xmit, + .ndo_xsk_wakeup = mlx5e_xsk_wakeup, #ifdef CONFIG_MLX5_EN_ARFS .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index a0ae5069d8c3..8e512216deb8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -161,7 +161,7 @@ struct mlx5e_encap_entry { */ struct hlist_node encap_hlist; struct list_head flows; - u32 encap_id; + struct mlx5_pkt_reformat *pkt_reformat; const struct ip_tunnel_info *tun_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 2fd2760d0bb7..d6a547238de0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -695,8 +695,11 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk; rq->mpwqe.actual_wq_head = head; - /* If XSK Fill Ring doesn't have enough frames, busy poll by - * rescheduling the NAPI poll. + /* If XSK Fill Ring doesn't have enough frames, report the error, so + * that one of the actions can be performed: + * 1. If need_wakeup is used, signal that the application has to kick + * the driver when it refills the Fill Ring. + * 2. Otherwise, busy poll by rescheduling the NAPI poll. */ if (unlikely(alloc_err == -ENOMEM && rq->umem)) return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 5581a8045ede..30d26eba75a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -61,7 +61,7 @@ struct mlx5_nic_flow_attr { u32 action; u32 flow_tag; - u32 mod_hdr_id; + struct mlx5_modify_hdr *modify_hdr; u32 hairpin_tirn; u8 match_level; struct mlx5_flow_table *hairpin_ft; @@ -201,7 +201,7 @@ struct mlx5e_mod_hdr_entry { struct mod_hdr_key key; - u32 mod_hdr_id; + struct mlx5_modify_hdr *modify_hdr; refcount_t refcnt; struct completion res_ready; @@ -334,7 +334,7 @@ static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv, WARN_ON(!list_empty(&mh->flows)); if (mh->compl_result > 0) - mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id); + mlx5_modify_header_dealloc(priv->mdev, mh->modify_hdr); kfree(mh); } @@ -395,11 +395,11 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key); mutex_unlock(&tbl->lock); - err = mlx5_modify_header_alloc(priv->mdev, namespace, - mh->key.num_actions, - mh->key.actions, - &mh->mod_hdr_id); - if (err) { + mh->modify_hdr = mlx5_modify_header_alloc(priv->mdev, namespace, + mh->key.num_actions, + mh->key.actions); + if (IS_ERR(mh->modify_hdr)) { + err = PTR_ERR(mh->modify_hdr); mh->compl_result = err; goto alloc_header_err; } @@ -412,9 +412,9 @@ attach_flow: list_add(&flow->mod_hdr, &mh->flows); spin_unlock(&mh->flows_lock); if (mlx5e_is_eswitch_flow(flow)) - flow->esw_attr->mod_hdr_id = mh->mod_hdr_id; + flow->esw_attr->modify_hdr = mh->modify_hdr; else - flow->nic_attr->mod_hdr_id = mh->mod_hdr_id; + flow->nic_attr->modify_hdr = mh->modify_hdr; return 0; @@ -906,7 +906,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, - .reformat_id = 0, .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; @@ -947,7 +946,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - flow_act.modify_id = attr->mod_hdr_id; + flow_act.modify_hdr = attr->modify_hdr; kfree(parse_attr->mod_hdr_actions); if (err) return err; @@ -1304,14 +1303,13 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow; int err; - err = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - e->encap_size, e->encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) { - mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n", - err); + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + e->encap_size, e->encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", + PTR_ERR(e->pkt_reformat)); return; } e->flags |= MLX5_ENCAP_ENTRY_VALID; @@ -1326,7 +1324,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, esw_attr = flow->esw_attr; spec = &esw_attr->parse_attr->spec; - esw_attr->dests[flow->tmp_efi_index].encap_id = e->encap_id; + esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat; esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; /* Flow can be associated with multiple encap entries. * Before offloading the flow verify that all of them have @@ -1395,7 +1393,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, /* we know that the encap is valid */ e->flags &= ~MLX5_ENCAP_ENTRY_VALID; - mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); } static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) @@ -1561,7 +1559,7 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); if (e->flags & MLX5_ENCAP_ENTRY_VALID) - mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); } kfree(e->encap_header); @@ -1896,7 +1894,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *match_level = MLX5_MATCH_L2; } } else if (*match_level != MLX5_MATCH_NONE) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); + /* cvlan_tag enabled in match criteria and + * disabled in match value means both S & C tags + * don't exist (untagged of both) + */ MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); *match_level = MLX5_MATCH_L2; } @@ -3045,7 +3046,7 @@ attach_flow: flow->encaps[out_index].index = out_index; *encap_dev = e->out_dev; if (e->flags & MLX5_ENCAP_ENTRY_VALID) { - attr->dests[out_index].encap_id = e->encap_id; + attr->dests[out_index].pkt_reformat = e->pkt_reformat; attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; *encap_valid = true; } else { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 49b06b256c92..257a7c9f7a14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -33,6 +33,7 @@ #include <linux/irq.h> #include "en.h" #include "en/xdp.h" +#include "en/xsk/rx.h" #include "en/xsk/tx.h" static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) @@ -81,6 +82,29 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq) mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl); } +static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq) +{ + bool busy_xsk = false, xsk_rx_alloc_err; + + /* Handle the race between the application querying need_wakeup and the + * driver setting it: + * 1. Update need_wakeup both before and after the TX. If it goes to + * "yes", it can only happen with the first update. + * 2. If the application queried need_wakeup before we set it, the + * packets will be transmitted anyway, even w/o a wakeup. + * 3. Give a chance to clear need_wakeup after new packets were queued + * for TX. + */ + mlx5e_xsk_update_tx_wakeup(xsksq); + busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); + mlx5e_xsk_update_tx_wakeup(xsksq); + + xsk_rx_alloc_err = xskrq->post_wqes(xskrq); + busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err); + + return busy_xsk; +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -122,8 +146,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) if (xsk_open) { mlx5e_poll_ico_cq(&c->xskicosq.cq); busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); - busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); - busy_xsk |= xskrq->post_wqes(xskrq); + busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq); } busy |= busy_xsk; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index aba9e7a6ad3c..6bd6f5895244 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -69,7 +69,7 @@ struct vport_ingress { struct mlx5_flow_group *allow_spoofchk_only_grp; struct mlx5_flow_group *allow_untagged_only_grp; struct mlx5_flow_group *drop_grp; - int modify_metadata_id; + struct mlx5_modify_hdr *modify_metadata; struct mlx5_flow_handle *modify_metadata_rule; struct mlx5_flow_handle *allow_rule; struct mlx5_flow_handle *drop_rule; @@ -153,6 +153,7 @@ struct mlx5_eswitch_fdb { } legacy; struct offloads_fdb { + struct mlx5_flow_namespace *ns; struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *peer_miss_grp; @@ -385,11 +386,11 @@ struct mlx5_esw_flow_attr { struct { u32 flags; struct mlx5_eswitch_rep *rep; + struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_core_dev *mdev; - u32 encap_id; struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; - u32 mod_hdr_id; + struct mlx5_modify_hdr *modify_hdr; u8 inner_match_level; u8 outer_match_level; struct mlx5_fc *counter; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7d3582ee66b7..afa623b15a38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -190,10 +190,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, MLX5_FLOW_DEST_VPORT_VHCA_ID; if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - flow_act.reformat_id = attr->dests[j].encap_id; + flow_act.pkt_reformat = attr->dests[j].pkt_reformat; dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; - dest[i].vport.reformat_id = - attr->dests[j].encap_id; + dest[i].vport.pkt_reformat = + attr->dests[j].pkt_reformat; } i++; } @@ -213,7 +213,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - flow_act.modify_id = attr->mod_hdr_id; + flow_act.modify_hdr = attr->modify_hdr; fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split); if (IS_ERR(fdb)) { @@ -276,7 +276,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) { dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; - dest[i].vport.reformat_id = attr->dests[i].encap_id; + dest[i].vport.pkt_reformat = attr->dests[i].pkt_reformat; } } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; @@ -1068,6 +1068,13 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) err = -EOPNOTSUPP; goto ns_err; } + esw->fdb_table.offloads.ns = root_ns; + err = mlx5_flow_namespace_set_mode(root_ns, + esw->dev->priv.steering->mode); + if (err) { + esw_warn(dev, "Failed to set FDB namespace steering mode\n"); + goto ns_err; + } max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | MLX5_CAP_GEN(dev, max_flow_counter_15_0); @@ -1207,6 +1214,8 @@ send_vport_err: esw_destroy_offloads_fast_fdb_tables(esw); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); slow_fdb_err: + /* Holds true only as long as DMFS is the default */ + mlx5_flow_namespace_set_mode(root_ns, MLX5_FLOW_STEERING_MODE_DMFS); ns_err: kvfree(flow_group_in); return err; @@ -1226,6 +1235,9 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); esw_destroy_offloads_fast_fdb_tables(esw); + /* Holds true only as long as DMFS is the default */ + mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, + MLX5_FLOW_STEERING_MODE_DMFS); } static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) @@ -1623,13 +1635,42 @@ static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) esw_del_fdb_peer_miss_rules(esw); } +static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw, + bool pair) +{ + struct mlx5_flow_root_namespace *peer_ns; + struct mlx5_flow_root_namespace *ns; + int err; + + peer_ns = peer_esw->dev->priv.steering->fdb_root_ns; + ns = esw->dev->priv.steering->fdb_root_ns; + + if (pair) { + err = mlx5_flow_namespace_set_peer(ns, peer_ns); + if (err) + return err; + + mlx5_flow_namespace_set_peer(peer_ns, ns); + if (err) { + mlx5_flow_namespace_set_peer(ns, NULL); + return err; + } + } else { + mlx5_flow_namespace_set_peer(ns, NULL); + mlx5_flow_namespace_set_peer(peer_ns, NULL); + } + + return 0; +} + static int mlx5_esw_offloads_devcom_event(int event, void *my_data, void *event_data) { struct mlx5_eswitch *esw = my_data; - struct mlx5_eswitch *peer_esw = event_data; struct mlx5_devcom *devcom = esw->dev->priv.devcom; + struct mlx5_eswitch *peer_esw = event_data; int err; switch (event) { @@ -1638,9 +1679,12 @@ static int mlx5_esw_offloads_devcom_event(int event, mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) break; - err = mlx5_esw_offloads_pair(esw, peer_esw); + err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true); if (err) goto err_out; + err = mlx5_esw_offloads_pair(esw, peer_esw); + if (err) + goto err_peer; err = mlx5_esw_offloads_pair(peer_esw, esw); if (err) @@ -1656,6 +1700,7 @@ static int mlx5_esw_offloads_devcom_event(int event, mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); mlx5_esw_offloads_unpair(peer_esw); mlx5_esw_offloads_unpair(esw); + mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); break; } @@ -1663,7 +1708,8 @@ static int mlx5_esw_offloads_devcom_event(int event, err_pair: mlx5_esw_offloads_unpair(esw); - +err_peer: + mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); err_out: mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d", event, err); @@ -1734,7 +1780,7 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, if (vport->ingress.modify_metadata_rule) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - flow_act.modify_id = vport->ingress.modify_metadata_id; + flow_act.modify_hdr = vport->ingress.modify_metadata; } vport->ingress.allow_rule = @@ -1770,9 +1816,11 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, MLX5_SET(set_action_in, action, data, mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); - err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, - 1, action, &vport->ingress.modify_metadata_id); - if (err) { + vport->ingress.modify_metadata = + mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + 1, action); + if (IS_ERR(vport->ingress.modify_metadata)) { + err = PTR_ERR(vport->ingress.modify_metadata); esw_warn(esw->dev, "failed to alloc modify header for vport %d ingress acl (%d)\n", vport->vport, err); @@ -1780,7 +1828,7 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; - flow_act.modify_id = vport->ingress.modify_metadata_id; + flow_act.modify_hdr = vport->ingress.modify_metadata; vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, &spec, &flow_act, NULL, 0); if (IS_ERR(vport->ingress.modify_metadata_rule)) { @@ -1794,7 +1842,7 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, out: if (err) - mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata); return err; } @@ -1803,7 +1851,7 @@ void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, { if (vport->ingress.modify_metadata_rule) { mlx5_del_flow_rules(vport->ingress.modify_metadata_rule); - mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata); vport->ingress.modify_metadata_rule = NULL; } @@ -2113,9 +2161,10 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) else esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; + mlx5_rdma_enable_roce(esw->dev); err = esw_offloads_steering_init(esw); if (err) - return err; + goto err_steering_init; err = esw_set_passing_vport_metadata(esw, true); if (err) @@ -2130,8 +2179,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) esw_offloads_devcom_init(esw); mutex_init(&esw->offloads.termtbl_mutex); - mlx5_rdma_enable_roce(esw->dev); - return 0; err_reps: @@ -2139,6 +2186,8 @@ err_reps: esw_set_passing_vport_metadata(esw, false); err_vport_metadata: esw_offloads_steering_cleanup(esw); +err_steering_init: + mlx5_rdma_disable_roce(esw->dev); return err; } @@ -2163,12 +2212,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_disable(struct mlx5_eswitch *esw) { - mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); esw_offloads_unload_all_reps(esw); mlx5_eswitch_disable_pf_vf_vports(esw); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); + mlx5_rdma_disable_roce(esw->dev); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 1e3381604b3d..579c306caa7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -107,6 +107,50 @@ static int mlx5_cmd_stub_delete_fte(struct mlx5_flow_root_namespace *ns, return 0; } +static int mlx5_cmd_stub_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + return 0; +} + +static void mlx5_cmd_stub_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ +} + +static int mlx5_cmd_stub_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + return 0; +} + +static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ +} + +static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns) +{ + return 0; +} + +static int mlx5_cmd_stub_create_ns(struct mlx5_flow_root_namespace *ns) +{ + return 0; +} + +static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns) +{ + return 0; +} + static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 underlay_qpn, bool disconnect) @@ -412,11 +456,13 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, } else { MLX5_SET(flow_context, in_flow_context, action, fte->action.action); - MLX5_SET(flow_context, in_flow_context, packet_reformat_id, - fte->action.reformat_id); + if (fte->action.pkt_reformat) + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.pkt_reformat->id); } - MLX5_SET(flow_context, in_flow_context, modify_header_id, - fte->action.modify_id); + if (fte->action.modify_hdr) + MLX5_SET(flow_context, in_flow_context, modify_header_id, + fte->action.modify_hdr->id); vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan); @@ -468,7 +514,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); MLX5_SET(extended_dest_format, in_dests, packet_reformat_id, - dst->dest_attr.vport.reformat_id); + dst->dest_attr.vport.pkt_reformat->id); } break; default: @@ -643,14 +689,15 @@ int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len, return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } -int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, - int reformat_type, - size_t size, - void *reformat_data, - enum mlx5_flow_namespace_type namespace, - u32 *packet_reformat_id) +static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) { u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)]; + struct mlx5_core_dev *dev = ns->dev; void *packet_reformat_context_in; int max_encap_size; void *reformat; @@ -693,35 +740,36 @@ int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, memset(out, 0, sizeof(out)); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - *packet_reformat_id = MLX5_GET(alloc_packet_reformat_context_out, - out, packet_reformat_id); + pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out, + out, packet_reformat_id); kfree(in); return err; } -EXPORT_SYMBOL(mlx5_packet_reformat_alloc); -void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, - u32 packet_reformat_id) +static void mlx5_cmd_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) { u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)]; u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)]; + struct mlx5_core_dev *dev = ns->dev; memset(in, 0, sizeof(in)); MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, - packet_reformat_id); + pkt_reformat->id); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -EXPORT_SYMBOL(mlx5_packet_reformat_dealloc); -int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, - u8 namespace, u8 num_actions, - void *modify_actions, u32 *modify_header_id) +static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) { u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)]; int max_actions, actions_size, inlen, err; + struct mlx5_core_dev *dev = ns->dev; void *actions_in; u8 table_type; u32 *in; @@ -772,26 +820,26 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, memset(out, 0, sizeof(out)); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); + modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); kfree(in); return err; } -EXPORT_SYMBOL(mlx5_modify_header_alloc); -void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) +static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) { u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)]; u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)]; + struct mlx5_core_dev *dev = ns->dev; memset(in, 0, sizeof(in)); MLX5_SET(dealloc_modify_header_context_in, in, opcode, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, - modify_header_id); + modify_hdr->id); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -EXPORT_SYMBOL(mlx5_modify_header_dealloc); static const struct mlx5_flow_cmds mlx5_flow_cmds = { .create_flow_table = mlx5_cmd_create_flow_table, @@ -803,6 +851,13 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = { .update_fte = mlx5_cmd_update_fte, .delete_fte = mlx5_cmd_delete_fte, .update_root_ft = mlx5_cmd_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_modify_header_dealloc, + .set_peer = mlx5_cmd_stub_set_peer, + .create_ns = mlx5_cmd_stub_create_ns, + .destroy_ns = mlx5_cmd_stub_destroy_ns, }; static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { @@ -815,9 +870,16 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { .update_fte = mlx5_cmd_stub_update_fte, .delete_fte = mlx5_cmd_stub_delete_fte, .update_root_ft = mlx5_cmd_stub_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_stub_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_stub_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc, + .set_peer = mlx5_cmd_stub_set_peer, + .create_ns = mlx5_cmd_stub_create_ns, + .destroy_ns = mlx5_cmd_stub_destroy_ns, }; -static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) { return &mlx5_flow_cmds; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index bc4606306009..d62de642eca9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -75,6 +75,30 @@ struct mlx5_flow_cmds { struct mlx5_flow_table *ft, u32 underlay_qpn, bool disconnect); + + int (*packet_reformat_alloc)(struct mlx5_flow_root_namespace *ns, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat); + + void (*packet_reformat_dealloc)(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat); + + int (*modify_header_alloc)(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr); + + void (*modify_header_dealloc)(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr); + + int (*set_peer)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns); + + int (*create_ns)(struct mlx5_flow_root_namespace *ns); + int (*destroy_ns)(struct mlx5_flow_root_namespace *ns); }; int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); @@ -90,5 +114,6 @@ int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len, u32 *out); const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type); +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 7bdec442f0ac..3bbb49354829 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1415,7 +1415,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ? (d1->vport.vhca_id == d2->vport.vhca_id) : true) && ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ? - (d1->vport.reformat_id == d2->vport.reformat_id) : true)) || + (d1->vport.pkt_reformat->id == + d2->vport.pkt_reformat->id) : true)) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && d1->ft == d2->ft) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && @@ -2888,3 +2889,160 @@ out: return err; } EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn); + +static struct mlx5_flow_root_namespace +*get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_flow_namespace *ns; + + if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS || + ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS) + ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0); + else + ns = mlx5_get_flow_namespace(dev, ns_type); + if (!ns) + return NULL; + + return find_root(&ns->node); +} + +struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 ns_type, u8 num_actions, + void *modify_actions) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_modify_hdr *modify_hdr; + int err; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL); + if (!modify_hdr) + return ERR_PTR(-ENOMEM); + + modify_hdr->ns_type = ns_type; + err = root->cmds->modify_header_alloc(root, ns_type, num_actions, + modify_actions, modify_hdr); + if (err) { + kfree(modify_hdr); + return ERR_PTR(err); + } + + return modify_hdr; +} +EXPORT_SYMBOL(mlx5_modify_header_alloc); + +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, + struct mlx5_modify_hdr *modify_hdr) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, modify_hdr->ns_type); + if (WARN_ON(!root)) + return; + root->cmds->modify_header_dealloc(root, modify_hdr); + kfree(modify_hdr); +} +EXPORT_SYMBOL(mlx5_modify_header_dealloc); + +struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_pkt_reformat *pkt_reformat; + struct mlx5_flow_root_namespace *root; + int err; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL); + if (!pkt_reformat) + return ERR_PTR(-ENOMEM); + + pkt_reformat->ns_type = ns_type; + pkt_reformat->reformat_type = reformat_type; + err = root->cmds->packet_reformat_alloc(root, reformat_type, size, + reformat_data, ns_type, + pkt_reformat); + if (err) { + kfree(pkt_reformat); + return ERR_PTR(err); + } + + return pkt_reformat; +} +EXPORT_SYMBOL(mlx5_packet_reformat_alloc); + +void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, pkt_reformat->ns_type); + if (WARN_ON(!root)) + return; + root->cmds->packet_reformat_dealloc(root, pkt_reformat); + kfree(pkt_reformat); +} +EXPORT_SYMBOL(mlx5_packet_reformat_dealloc); + +int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns) +{ + if (peer_ns && ns->mode != peer_ns->mode) { + mlx5_core_err(ns->dev, + "Can't peer namespace of different steering mode\n"); + return -EINVAL; + } + + return ns->cmds->set_peer(ns, peer_ns); +} + +/* This function should be called only at init stage of the namespace. + * It is not safe to call this function while steering operations + * are executed in the namespace. + */ +int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, + enum mlx5_flow_steering_mode mode) +{ + struct mlx5_flow_root_namespace *root; + const struct mlx5_flow_cmds *cmds; + int err; + + root = find_root(&ns->node); + if (&root->ns != ns) + /* Can't set cmds to non root namespace */ + return -EINVAL; + + if (root->table_type != FS_FT_FDB) + return -EOPNOTSUPP; + + if (root->mode == mode) + return 0; + + if (mode == MLX5_FLOW_STEERING_MODE_SMFS) + cmds = mlx5_fs_cmd_get_dr_cmds(); + else + cmds = mlx5_fs_cmd_get_fw_cmds(); + if (!cmds) + return -EOPNOTSUPP; + + err = cmds->create_ns(root); + if (err) { + mlx5_core_err(root->dev, "Failed to create flow namespace (%d)\n", + err); + return err; + } + + root->cmds->destroy_ns(root); + root->cmds = cmds; + root->mode = mode; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 0d16b4b5ab83..00717eba2256 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -37,6 +37,24 @@ #include <linux/mlx5/fs.h> #include <linux/rhashtable.h> #include <linux/llist.h> +#include <steering/fs_dr.h> + +struct mlx5_modify_hdr { + enum mlx5_flow_namespace_type ns_type; + union { + struct mlx5_fs_dr_action action; + u32 id; + }; +}; + +struct mlx5_pkt_reformat { + enum mlx5_flow_namespace_type ns_type; + int reformat_type; /* from mlx5_ifc */ + union { + struct mlx5_fs_dr_action action; + u32 id; + }; +}; /* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only, * and those are in parallel to one another when going over them to connect @@ -80,9 +98,15 @@ enum fs_fte_status { FS_FTE_STATUS_EXISTING = 1UL << 0, }; +enum mlx5_flow_steering_mode { + MLX5_FLOW_STEERING_MODE_DMFS, + MLX5_FLOW_STEERING_MODE_SMFS +}; + struct mlx5_flow_steering { struct mlx5_core_dev *dev; - struct kmem_cache *fgs_cache; + enum mlx5_flow_steering_mode mode; + struct kmem_cache *fgs_cache; struct kmem_cache *ftes_cache; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; @@ -128,6 +152,7 @@ struct mlx5_flow_handle { /* Type of children is mlx5_flow_group */ struct mlx5_flow_table { struct fs_node node; + struct mlx5_fs_dr_table fs_dr_table; u32 id; u16 vport; unsigned int max_fte; @@ -168,6 +193,7 @@ struct mlx5_ft_underlay_qp { /* Type of children is mlx5_flow_rule */ struct fs_fte { struct fs_node node; + struct mlx5_fs_dr_rule fs_dr_rule; u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 index; @@ -203,6 +229,7 @@ struct mlx5_flow_group_mask { /* Type of children is fs_fte */ struct mlx5_flow_group { struct fs_node node; + struct mlx5_fs_dr_matcher fs_dr_matcher; struct mlx5_flow_group_mask mask; u32 start_index; u32 max_ftes; @@ -214,6 +241,8 @@ struct mlx5_flow_group { struct mlx5_flow_root_namespace { struct mlx5_flow_namespace ns; + enum mlx5_flow_steering_mode mode; + struct mlx5_fs_dr_domain fs_dr_domain; enum fs_flow_table_type table_type; struct mlx5_core_dev *dev; struct mlx5_flow_table *root_ft; @@ -231,6 +260,14 @@ void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, unsigned long interval); +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); + +int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns); + +int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, + enum mlx5_flow_steering_mode mode); + int mlx5_init_fs(struct mlx5_core_dev *dev); void mlx5_cleanup_fs(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c new file mode 100644 index 000000000000..e065c2f68f5a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> + +#include "mlx5_core.h" +#include "lib/mlx5.h" + +struct mlx5_dm { + /* protect access to icm bitmask */ + spinlock_t lock; + unsigned long *steering_sw_icm_alloc_blocks; + unsigned long *header_modify_sw_icm_alloc_blocks; +}; + +struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) +{ + u64 header_modify_icm_blocks = 0; + u64 steering_icm_blocks = 0; + struct mlx5_dm *dm; + + if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)) + return 0; + + dm = kzalloc(sizeof(*dm), GFP_KERNEL); + if (!dm) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&dm->lock); + + if (MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address)) { + steering_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->steering_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(steering_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dm->steering_sw_icm_alloc_blocks) + goto err_steering; + } + + if (MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address)) { + header_modify_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->header_modify_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(header_modify_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dm->header_modify_sw_icm_alloc_blocks) + goto err_modify_hdr; + } + + return dm; + +err_modify_hdr: + kfree(dm->steering_sw_icm_alloc_blocks); + +err_steering: + kfree(dm); + + return ERR_PTR(-ENOMEM); +} + +void mlx5_dm_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_dm *dm = dev->dm; + + if (!dev->dm) + return; + + if (dm->steering_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + kfree(dm->steering_sw_icm_alloc_blocks); + } + + if (dm->header_modify_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->header_modify_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + kfree(dm->header_modify_sw_icm_alloc_blocks); + } + + kfree(dm); +} + +int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, + u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id) +{ + u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; + struct mlx5_dm *dm = dev->dm; + unsigned long *block_map; + u64 icm_start_addr; + u32 log_icm_size; + u32 max_blocks; + u64 block_idx; + void *sw_icm; + int ret; + + if (!dev->dm) + return -EOPNOTSUPP; + + if (!length || (length & (length - 1)) || + length & (MLX5_SW_ICM_BLOCK_SIZE(dev) - 1)) + return -EINVAL; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + switch (type) { + case MLX5_SW_ICM_TYPE_STEERING: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + if (!block_map) + return -EOPNOTSUPP; + + max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + spin_lock(&dm->lock); + block_idx = bitmap_find_next_zero_area(block_map, + max_blocks, + 0, + num_blocks, 0); + + if (block_idx < max_blocks) + bitmap_set(block_map, + block_idx, num_blocks); + + spin_unlock(&dm->lock); + + if (block_idx >= max_blocks) + return -ENOMEM; + + sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm); + icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr, + icm_start_addr); + MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) { + spin_lock(&dm->lock); + bitmap_clear(block_map, + block_idx, num_blocks); + spin_unlock(&dm->lock); + + return ret; + } + + *addr = icm_start_addr; + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_alloc); + +int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, + u64 length, u16 uid, phys_addr_t addr, u32 obj_id) +{ + u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + struct mlx5_dm *dm = dev->dm; + unsigned long *block_map; + u64 icm_start_addr; + u64 start_idx; + int err; + + if (!dev->dm) + return -EOPNOTSUPP; + + switch (type) { + case MLX5_SW_ICM_TYPE_STEERING: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + start_idx = (addr - icm_start_addr) >> MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + spin_lock(&dm->lock); + bitmap_clear(block_map, + start_idx, num_blocks); + spin_unlock(&dm->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_dealloc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index dee1a8658c87..9648c2297803 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -876,6 +876,10 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_eswitch_cleanup; } + dev->dm = mlx5_dm_create(dev); + if (IS_ERR(dev->dm)) + mlx5_core_warn(dev, "Failed to init device memory%d\n", err); + dev->tracer = mlx5_fw_tracer_create(dev); dev->hv_vhca = mlx5_hv_vhca_create(dev); @@ -910,6 +914,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { mlx5_hv_vhca_destroy(dev->hv_vhca); mlx5_fw_tracer_destroy(dev->tracer); + mlx5_dm_cleanup(dev); mlx5_fpga_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); mlx5_sriov_cleanup(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 87b75b2207c4..b100489dc85c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -198,6 +198,9 @@ int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); +struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev); +void mlx5_dm_cleanup(struct mlx5_core_dev *dev); + #define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \ MLX5_CAP_GEN((mdev), pps_modify) && \ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 18af6981e0be..0fc7de4aa572 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -14,9 +14,6 @@ static void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev) { struct mlx5_core_roce *roce = &dev->priv.roce; - if (!roce->ft) - return; - mlx5_del_flow_rules(roce->allow_rule); mlx5_destroy_flow_group(roce->fg); mlx5_destroy_flow_table(roce->ft); @@ -145,6 +142,11 @@ static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) { + struct mlx5_core_roce *roce = &dev->priv.roce; + + if (!roce->ft) + return; + mlx5_rdma_disable_roce_steering(dev); mlx5_rdma_del_roce_addr(dev); mlx5_nic_vport_disable_roce(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile new file mode 100644 index 000000000000..c78512eed8d7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c new file mode 100644 index 000000000000..a02f87f85c17 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -0,0 +1,1588 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +enum dr_action_domain { + DR_ACTION_DOMAIN_NIC_INGRESS, + DR_ACTION_DOMAIN_NIC_EGRESS, + DR_ACTION_DOMAIN_FDB_INGRESS, + DR_ACTION_DOMAIN_FDB_EGRESS, + DR_ACTION_DOMAIN_MAX, +}; + +enum dr_action_valid_state { + DR_ACTION_STATE_ERR, + DR_ACTION_STATE_NO_ACTION, + DR_ACTION_STATE_REFORMAT, + DR_ACTION_STATE_MODIFY_HDR, + DR_ACTION_STATE_MODIFY_VLAN, + DR_ACTION_STATE_NON_TERM, + DR_ACTION_STATE_TERM, + DR_ACTION_STATE_MAX, +}; + +static const enum dr_action_valid_state +next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] = { + [DR_ACTION_DOMAIN_NIC_INGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_REFORMAT] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + }, + [DR_ACTION_STATE_MODIFY_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_NIC_EGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_REFORMAT] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_MODIFY_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_FDB_INGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_REFORMAT] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_MODIFY_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_FDB_EGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_REFORMAT] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_MODIFY_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, +}; + +struct dr_action_modify_field_conv { + u16 hw_field; + u8 start; + u8 end; + u8 l3_type; + u8 l4_type; +}; + +static const struct dr_action_modify_field_conv dr_action_conv_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_1, .start = 16, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_2, .start = 32, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_0, .start = 16, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_0, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 0, .end = 5, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 48, .end = 56, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 0, .end = 15, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 16, .end = 31, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 8, .end = 15, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 8, .end = 15, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 0, .end = 15, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 16, .end = 31, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_3, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_3, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_4, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_4, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_2, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_2, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_METADATA, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_METADATA, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_0, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_1, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_2, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_2, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_1, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_2, .start = 0, .end = 15, + }, +}; + +#define MAX_VLANS 2 +struct dr_action_vlan_info { + int count; + u32 headers[MAX_VLANS]; +}; + +struct dr_action_apply_attr { + u32 modify_index; + u16 modify_actions; + u32 decap_index; + u16 decap_actions; + u8 decap_with_vlan:1; + u64 final_icm_addr; + u32 flow_tag; + u32 ctr_id; + u16 gvmi; + u16 hit_gvmi; + u32 reformat_id; + u32 reformat_size; + struct dr_action_vlan_info vlans; +}; + +static int +dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type, + enum mlx5dr_action_type *action_type) +{ + switch (reformat_type) { + case DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2: + *action_type = DR_ACTION_TYP_TNL_L2_TO_L2; + break; + case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2: + *action_type = DR_ACTION_TYP_L2_TO_TNL_L2; + break; + case DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2: + *action_type = DR_ACTION_TYP_TNL_L3_TO_L2; + break; + case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3: + *action_type = DR_ACTION_TYP_L2_TO_TNL_L3; + break; + default: + return -EINVAL; + } + + return 0; +} + +static void dr_actions_init_next_ste(u8 **last_ste, + u32 *added_stes, + enum mlx5dr_ste_entry_type entry_type, + u16 gvmi) +{ + (*added_stes)++; + *last_ste += DR_STE_SIZE; + mlx5dr_ste_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, entry_type, gvmi); +} + +static void dr_actions_apply_tx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *last_ste, + struct dr_action_apply_attr *attr, + u32 *added_stes) +{ + bool encap = action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2] || + action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]; + + /* We want to make sure the modify header comes before L2 + * encapsulation. The reason for that is that we support + * modify headers for outer headers only + */ + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT); + mlx5dr_ste_set_rewrite_actions(last_ste, + attr->modify_actions, + attr->modify_index); + } + + if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (i || action_type_set[DR_ACTION_TYP_MODIFY_HDR]) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_TX, + attr->gvmi); + + mlx5dr_ste_set_tx_push_vlan(last_ste, + attr->vlans.headers[i], + encap); + } + } + + if (encap) { + /* Modify header and encapsulation require a different STEs. + * Since modify header STE format doesn't support encapsulation + * tunneling_action. + */ + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] || + action_type_set[DR_ACTION_TYP_PUSH_VLAN]) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_TX, + attr->gvmi); + + mlx5dr_ste_set_tx_encap(last_ste, + attr->reformat_id, + attr->reformat_size, + action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]); + /* Whenever prio_tag_required enabled, we can be sure that the + * previous table (ACL) already push vlan to our packet, + * And due to HW limitation we need to set this bit, otherwise + * push vlan + reformat will not work. + */ + if (MLX5_CAP_GEN(dmn->mdev, prio_tag_required)) + mlx5dr_ste_set_go_back_bit(last_ste); + } + + if (action_type_set[DR_ACTION_TYP_CTR]) + mlx5dr_ste_set_counter_id(last_ste, attr->ctr_id); +} + +static void dr_actions_apply_rx(u8 *action_type_set, + u8 *last_ste, + struct dr_action_apply_attr *attr, + u32 *added_stes) +{ + if (action_type_set[DR_ACTION_TYP_CTR]) + mlx5dr_ste_set_counter_id(last_ste, attr->ctr_id); + + if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) { + mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT); + mlx5dr_ste_set_rx_decap_l3(last_ste, attr->decap_with_vlan); + mlx5dr_ste_set_rewrite_actions(last_ste, + attr->decap_actions, + attr->decap_index); + } + + if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2]) + mlx5dr_ste_set_rx_decap(last_ste); + + if (action_type_set[DR_ACTION_TYP_POP_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (i || + action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2] || + action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_RX, + attr->gvmi); + + mlx5dr_ste_set_rx_pop_vlan(last_ste); + } + } + + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + if (mlx5dr_ste_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_MODIFY_PKT, + attr->gvmi); + else + mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT); + + mlx5dr_ste_set_rewrite_actions(last_ste, + attr->modify_actions, + attr->modify_index); + } + + if (action_type_set[DR_ACTION_TYP_TAG]) { + if (mlx5dr_ste_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_RX, + attr->gvmi); + + mlx5dr_ste_rx_set_flow_tag(last_ste, attr->flow_tag); + } +} + +/* Apply the actions on the rule STE array starting from the last_ste. + * Actions might require more than one STE, new_num_stes will return + * the new size of the STEs array, rule with actions. + */ +static void dr_actions_apply(struct mlx5dr_domain *dmn, + enum mlx5dr_ste_entry_type ste_type, + u8 *action_type_set, + u8 *last_ste, + struct dr_action_apply_attr *attr, + u32 *new_num_stes) +{ + u32 added_stes = 0; + + if (ste_type == MLX5DR_STE_TYPE_RX) + dr_actions_apply_rx(action_type_set, last_ste, attr, &added_stes); + else + dr_actions_apply_tx(dmn, action_type_set, last_ste, attr, &added_stes); + + last_ste += added_stes * DR_STE_SIZE; + *new_num_stes += added_stes; + + mlx5dr_ste_set_hit_gvmi(last_ste, attr->hit_gvmi); + mlx5dr_ste_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + +static enum dr_action_domain +dr_action_get_action_domain(enum mlx5dr_domain_type domain, + enum mlx5dr_ste_entry_type ste_type) +{ + switch (domain) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + return DR_ACTION_DOMAIN_NIC_INGRESS; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + return DR_ACTION_DOMAIN_NIC_EGRESS; + case MLX5DR_DOMAIN_TYPE_FDB: + if (ste_type == MLX5DR_STE_TYPE_RX) + return DR_ACTION_DOMAIN_FDB_INGRESS; + return DR_ACTION_DOMAIN_FDB_EGRESS; + default: + WARN_ON(true); + return DR_ACTION_DOMAIN_MAX; + } +} + +static +int dr_action_validate_and_get_next_state(enum dr_action_domain action_domain, + u32 action_type, + u32 *state) +{ + u32 cur_state = *state; + + /* Check action state machine is valid */ + *state = next_action_state[action_domain][cur_state][action_type]; + + if (*state == DR_ACTION_STATE_ERR) + return -EOPNOTSUPP; + + return 0; +} + +static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn, + struct mlx5dr_action *dest_action, + u64 *final_icm_addr) +{ + int ret; + + switch (dest_action->action_type) { + case DR_ACTION_TYP_FT: + /* Allow destination flow table only if table is a terminating + * table, since there is an *assumption* that in such case FW + * will recalculate the CS. + */ + if (dest_action->dest_tbl.is_fw_tbl) { + *final_icm_addr = dest_action->dest_tbl.fw_tbl.rx_icm_addr; + } else { + mlx5dr_dbg(dmn, + "Destination FT should be terminating when modify TTL is used\n"); + return -EINVAL; + } + break; + + case DR_ACTION_TYP_VPORT: + /* If destination is vport we will get the FW flow table + * that recalculates the CS and forwards to the vport. + */ + ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport.dmn, + dest_action->vport.num, + final_icm_addr); + if (ret) { + mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n"); + return ret; + } + break; + + default: + break; + } + + return 0; +} + +#define WITH_VLAN_NUM_HW_ACTIONS 6 + +int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_action *actions[], + u32 num_actions, + u8 *ste_arr, + u32 *new_hw_ste_arr_sz) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + bool rx_rule = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + u8 action_type_set[DR_ACTION_TYP_MAX] = {}; + struct mlx5dr_action *dest_action = NULL; + u32 state = DR_ACTION_STATE_NO_ACTION; + struct dr_action_apply_attr attr = {}; + enum dr_action_domain action_domain; + bool recalc_cs_required = false; + u8 *last_ste; + int i, ret; + + attr.gvmi = dmn->info.caps.gvmi; + attr.hit_gvmi = dmn->info.caps.gvmi; + attr.final_icm_addr = nic_dmn->default_icm_addr; + action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->ste_type); + + for (i = 0; i < num_actions; i++) { + struct mlx5dr_action *action; + int max_actions_type = 1; + u32 action_type; + + action = actions[i]; + action_type = action->action_type; + + switch (action_type) { + case DR_ACTION_TYP_DROP: + attr.final_icm_addr = nic_dmn->drop_icm_addr; + break; + case DR_ACTION_TYP_FT: + dest_action = action; + if (!action->dest_tbl.is_fw_tbl) { + if (action->dest_tbl.tbl->dmn != dmn) { + mlx5dr_dbg(dmn, + "Destination table belongs to a different domain\n"); + goto out_invalid_arg; + } + if (action->dest_tbl.tbl->level <= matcher->tbl->level) { + mlx5dr_dbg(dmn, + "Destination table level should be higher than source table\n"); + goto out_invalid_arg; + } + attr.final_icm_addr = rx_rule ? + action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr : + action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr; + } else { + struct mlx5dr_cmd_query_flow_table_details output; + int ret; + + /* get the relevant addresses */ + if (!action->dest_tbl.fw_tbl.rx_icm_addr) { + ret = mlx5dr_cmd_query_flow_table(action->dest_tbl.fw_tbl.mdev, + action->dest_tbl.fw_tbl.ft->type, + action->dest_tbl.fw_tbl.ft->id, + &output); + if (!ret) { + action->dest_tbl.fw_tbl.tx_icm_addr = + output.sw_owner_icm_root_1; + action->dest_tbl.fw_tbl.rx_icm_addr = + output.sw_owner_icm_root_0; + } else { + mlx5dr_dbg(dmn, + "Failed mlx5_cmd_query_flow_table ret: %d\n", + ret); + return ret; + } + } + attr.final_icm_addr = rx_rule ? + action->dest_tbl.fw_tbl.rx_icm_addr : + action->dest_tbl.fw_tbl.tx_icm_addr; + } + break; + case DR_ACTION_TYP_QP: + mlx5dr_info(dmn, "Domain doesn't support QP\n"); + goto out_invalid_arg; + case DR_ACTION_TYP_CTR: + attr.ctr_id = action->ctr.ctr_id + + action->ctr.offeset; + break; + case DR_ACTION_TYP_TAG: + attr.flow_tag = action->flow_tag; + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + attr.decap_index = action->rewrite.index; + attr.decap_actions = action->rewrite.num_of_actions; + attr.decap_with_vlan = + attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS; + break; + case DR_ACTION_TYP_MODIFY_HDR: + attr.modify_index = action->rewrite.index; + attr.modify_actions = action->rewrite.num_of_actions; + recalc_cs_required = action->rewrite.modify_ttl; + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + attr.reformat_size = action->reformat.reformat_size; + attr.reformat_id = action->reformat.reformat_id; + break; + case DR_ACTION_TYP_VPORT: + attr.hit_gvmi = action->vport.caps->vhca_gvmi; + dest_action = action; + if (rx_rule) { + /* Loopback on WIRE vport is not supported */ + if (action->vport.num == WIRE_PORT) + goto out_invalid_arg; + + attr.final_icm_addr = action->vport.caps->icm_address_rx; + } else { + attr.final_icm_addr = action->vport.caps->icm_address_tx; + } + break; + case DR_ACTION_TYP_POP_VLAN: + max_actions_type = MAX_VLANS; + attr.vlans.count++; + break; + case DR_ACTION_TYP_PUSH_VLAN: + max_actions_type = MAX_VLANS; + if (attr.vlans.count == MAX_VLANS) + return -EINVAL; + + attr.vlans.headers[attr.vlans.count++] = action->push_vlan.vlan_hdr; + break; + default: + goto out_invalid_arg; + } + + /* Check action duplication */ + if (++action_type_set[action_type] > max_actions_type) { + mlx5dr_dbg(dmn, "Action type %d supports only max %d time(s)\n", + action_type, max_actions_type); + goto out_invalid_arg; + } + + /* Check action state machine is valid */ + if (dr_action_validate_and_get_next_state(action_domain, + action_type, + &state)) { + mlx5dr_dbg(dmn, "Invalid action sequence provided\n"); + return -EOPNOTSUPP; + } + } + + *new_hw_ste_arr_sz = nic_matcher->num_of_builders; + last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1); + + /* Due to a HW bug, modifying TTL on RX flows will cause an incorrect + * checksum calculation. In this case we will use a FW table to + * recalculate. + */ + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB && + rx_rule && recalc_cs_required && dest_action) { + ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr); + if (ret) { + mlx5dr_dbg(dmn, + "Failed to handle checksum recalculation err %d\n", + ret); + return ret; + } + } + + dr_actions_apply(dmn, + nic_dmn->ste_type, + action_type_set, + last_ste, + &attr, + new_hw_ste_arr_sz); + + return 0; + +out_invalid_arg: + return -EINVAL; +} + +#define CVLAN_ETHERTYPE 0x8100 +#define SVLAN_ETHERTYPE 0x88a8 +#define HDR_LEN_L2_ONLY 14 +#define HDR_LEN_L2_VLAN 18 +#define REWRITE_HW_ACTION_NUM 6 + +static int dr_actions_l2_rewrite(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action, + void *data, size_t data_sz) +{ + struct mlx5_ifc_l2_hdr_bits *l2_hdr = data; + u64 ops[REWRITE_HW_ACTION_NUM] = {}; + u32 hdr_fld_4b; + u16 hdr_fld_2b; + u16 vlan_type; + bool vlan; + int i = 0; + int ret; + + vlan = (data_sz != HDR_LEN_L2_ONLY); + + /* dmac_47_16 */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 0); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_0); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 16); + hdr_fld_4b = MLX5_GET(l2_hdr, l2_hdr, dmac_47_16); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_4b); + i++; + + /* smac_47_16 */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 0); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_1); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 16); + hdr_fld_4b = (MLX5_GET(l2_hdr, l2_hdr, smac_31_0) >> 16 | + MLX5_GET(l2_hdr, l2_hdr, smac_47_32) << 16); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_4b); + i++; + + /* dmac_15_0 */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 16); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_0); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 0); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, dmac_15_0); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_2b); + i++; + + /* ethertype + (optional) vlan */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_2); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 32); + if (!vlan) { + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype); + MLX5_SET(dr_action_hw_set, ops + i, inline_data, hdr_fld_2b); + MLX5_SET(dr_action_hw_set, ops + i, destination_length, 16); + } else { + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype); + vlan_type = hdr_fld_2b == SVLAN_ETHERTYPE ? DR_STE_SVLAN : DR_STE_CVLAN; + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan); + hdr_fld_4b = (vlan_type << 16) | hdr_fld_2b; + MLX5_SET(dr_action_hw_set, ops + i, inline_data, hdr_fld_4b); + MLX5_SET(dr_action_hw_set, ops + i, destination_length, 18); + } + i++; + + /* smac_15_0 */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 16); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_1); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 0); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, smac_31_0); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_2b); + i++; + + if (vlan) { + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan_type); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_2b); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 16); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_2); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 0); + i++; + } + + action->rewrite.data = (void *)ops; + action->rewrite.num_of_actions = i; + action->rewrite.chunk->byte_size = i * sizeof(*ops); + + ret = mlx5dr_send_postsend_action(dmn, action); + if (ret) { + mlx5dr_dbg(dmn, "Writing encapsulation action to ICM failed\n"); + return ret; + } + + return 0; +} + +static struct mlx5dr_action * +dr_action_create_generic(enum mlx5dr_action_type action_type) +{ + struct mlx5dr_action *action; + + action = kzalloc(sizeof(*action), GFP_KERNEL); + if (!action) + return NULL; + + action->action_type = action_type; + refcount_set(&action->refcount, 1); + + return action; +} + +struct mlx5dr_action *mlx5dr_action_create_drop(void) +{ + return dr_action_create_generic(DR_ACTION_TYP_DROP); +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl) +{ + struct mlx5dr_action *action; + + refcount_inc(&tbl->refcount); + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + goto dec_ref; + + action->dest_tbl.tbl = tbl; + + return action; + +dec_ref: + refcount_dec(&tbl->refcount); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft, + struct mlx5_core_dev *mdev) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + return NULL; + + action->dest_tbl.is_fw_tbl = 1; + action->dest_tbl.fw_tbl.ft = ft; + action->dest_tbl.fw_tbl.mdev = mdev; + + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_CTR); + if (!action) + return NULL; + + action->ctr.ctr_id = counter_id; + + return action; +} + +struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_TAG); + if (!action) + return NULL; + + action->flow_tag = tag_value & 0xffffff; + + return action; +} + +static int +dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type, + struct mlx5dr_domain *dmn, + size_t data_sz, + void *data) +{ + if ((!data && data_sz) || (data && !data_sz) || reformat_type > + DR_ACTION_TYP_L2_TO_TNL_L3) { + mlx5dr_dbg(dmn, "Invalid reformat parameter!\n"); + goto out_err; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB) + return 0; + + if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { + if (reformat_type != DR_ACTION_TYP_TNL_L2_TO_L2 && + reformat_type != DR_ACTION_TYP_TNL_L3_TO_L2) { + mlx5dr_dbg(dmn, "Action reformat type not support on RX domain\n"); + goto out_err; + } + } else if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { + if (reformat_type != DR_ACTION_TYP_L2_TO_TNL_L2 && + reformat_type != DR_ACTION_TYP_L2_TO_TNL_L3) { + mlx5dr_dbg(dmn, "Action reformat type not support on TX domain\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -EINVAL; +} + +#define ACTION_CACHE_LINE_SIZE 64 + +static int +dr_action_create_reformat_action(struct mlx5dr_domain *dmn, + size_t data_sz, void *data, + struct mlx5dr_action *action) +{ + u32 reformat_id; + int ret; + + switch (action->action_type) { + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + { + enum mlx5dr_action_type rt; + + if (action->action_type == DR_ACTION_TYP_L2_TO_TNL_L2) + rt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; + else + rt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + + ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, rt, data_sz, data, + &reformat_id); + if (ret) + return ret; + + action->reformat.reformat_id = reformat_id; + action->reformat.reformat_size = data_sz; + return 0; + } + case DR_ACTION_TYP_TNL_L2_TO_L2: + { + return 0; + } + case DR_ACTION_TYP_TNL_L3_TO_L2: + { + /* Only Ethernet frame is supported, with VLAN (18) or without (14) */ + if (data_sz != HDR_LEN_L2_ONLY && data_sz != HDR_LEN_L2_VLAN) + return -EINVAL; + + action->rewrite.chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, + DR_CHUNK_SIZE_8); + if (!action->rewrite.chunk) + return -ENOMEM; + + action->rewrite.index = (action->rewrite.chunk->icm_addr - + dmn->info.caps.hdr_modify_icm_addr) / + ACTION_CACHE_LINE_SIZE; + + ret = dr_actions_l2_rewrite(dmn, action, data, data_sz); + if (ret) { + mlx5dr_icm_free_chunk(action->rewrite.chunk); + return ret; + } + return 0; + } + default: + mlx5dr_info(dmn, "Reformat type is not supported %d\n", action->action_type); + return -EINVAL; + } +} + +struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void) +{ + return dr_action_create_generic(DR_ACTION_TYP_POP_VLAN); +} + +struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn, + __be32 vlan_hdr) +{ + u32 vlan_hdr_h = ntohl(vlan_hdr); + u16 ethertype = vlan_hdr_h >> 16; + struct mlx5dr_action *action; + + if (ethertype != SVLAN_ETHERTYPE && ethertype != CVLAN_ETHERTYPE) { + mlx5dr_dbg(dmn, "Invalid vlan ethertype\n"); + return NULL; + } + + action = dr_action_create_generic(DR_ACTION_TYP_PUSH_VLAN); + if (!action) + return NULL; + + action->push_vlan.vlan_hdr = vlan_hdr_h; + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + size_t data_sz, + void *data) +{ + enum mlx5dr_action_type action_type; + struct mlx5dr_action *action; + int ret; + + refcount_inc(&dmn->refcount); + + /* General checks */ + ret = dr_action_reformat_to_action_type(reformat_type, &action_type); + if (ret) { + mlx5dr_dbg(dmn, "Invalid reformat_type provided\n"); + goto dec_ref; + } + + ret = dr_action_verify_reformat_params(action_type, dmn, data_sz, data); + if (ret) + goto dec_ref; + + action = dr_action_create_generic(action_type); + if (!action) + goto dec_ref; + + action->reformat.dmn = dmn; + + ret = dr_action_create_reformat_action(dmn, + data_sz, + data, + action); + if (ret) { + mlx5dr_dbg(dmn, "Failed creating reformat action %d\n", ret); + goto free_action; + } + + return action; + +free_action: + kfree(action); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +static const struct dr_action_modify_field_conv * +dr_action_modify_get_hw_info(u16 sw_field) +{ + const struct dr_action_modify_field_conv *hw_action_info; + + if (sw_field >= ARRAY_SIZE(dr_action_conv_arr)) + goto not_found; + + hw_action_info = &dr_action_conv_arr[sw_field]; + if (!hw_action_info->end && !hw_action_info->start) + goto not_found; + + return hw_action_info; + +not_found: + return NULL; +} + +static int +dr_action_modify_sw_to_hw(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct dr_action_modify_field_conv **ret_hw_info) +{ + const struct dr_action_modify_field_conv *hw_action_info; + u8 offset, length, max_length, action; + u16 sw_field; + u8 hw_opcode; + u32 data; + + /* Get SW modify action data */ + action = MLX5_GET(set_action_in, sw_action, action_type); + length = MLX5_GET(set_action_in, sw_action, length); + offset = MLX5_GET(set_action_in, sw_action, offset); + sw_field = MLX5_GET(set_action_in, sw_action, field); + data = MLX5_GET(set_action_in, sw_action, data); + + /* Convert SW data to HW modify action format */ + hw_action_info = dr_action_modify_get_hw_info(sw_field); + if (!hw_action_info) { + mlx5dr_dbg(dmn, "Modify action invalid field given\n"); + return -EINVAL; + } + + max_length = hw_action_info->end - hw_action_info->start + 1; + + switch (action) { + case MLX5_ACTION_TYPE_SET: + hw_opcode = MLX5DR_ACTION_MDFY_HW_OP_SET; + /* PRM defines that length zero specific length of 32bits */ + if (!length) + length = 32; + + if (length + offset > max_length) { + mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n"); + return -EINVAL; + } + break; + + case MLX5_ACTION_TYPE_ADD: + hw_opcode = MLX5DR_ACTION_MDFY_HW_OP_ADD; + offset = 0; + length = max_length; + break; + + default: + mlx5dr_info(dmn, "Unsupported action_type for modify action\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(dr_action_hw_set, hw_action, opcode, hw_opcode); + + MLX5_SET(dr_action_hw_set, hw_action, destination_field_code, + hw_action_info->hw_field); + + MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, + hw_action_info->start + offset); + + MLX5_SET(dr_action_hw_set, hw_action, destination_length, + length == 32 ? 0 : length); + + MLX5_SET(dr_action_hw_set, hw_action, inline_data, data); + + *ret_hw_info = hw_action_info; + + return 0; +} + +static int +dr_action_modify_check_field_limitation(struct mlx5dr_domain *dmn, + const __be64 *sw_action) +{ + u16 sw_field; + u8 action; + + sw_field = MLX5_GET(set_action_in, sw_action, field); + action = MLX5_GET(set_action_in, sw_action, action_type); + + /* Check if SW field is supported in current domain (RX/TX) */ + if (action == MLX5_ACTION_TYPE_SET) { + if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) { + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) { + mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n", + sw_field); + return -EINVAL; + } + } + + if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) { + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) { + mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n", + sw_field); + return -EINVAL; + } + } + } else if (action == MLX5_ACTION_TYPE_ADD) { + if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL && + sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT && + sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM && + sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM) { + mlx5dr_dbg(dmn, "Unsupported field %d for add action\n", sw_field); + return -EINVAL; + } + } else { + mlx5dr_info(dmn, "Unsupported action %d modify action\n", action); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool +dr_action_modify_check_is_ttl_modify(const u64 *sw_action) +{ + u16 sw_field = MLX5_GET(set_action_in, sw_action, field); + + return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL; +} + +static int dr_actions_convert_modify_header(struct mlx5dr_domain *dmn, + u32 max_hw_actions, + u32 num_sw_actions, + __be64 sw_actions[], + __be64 hw_actions[], + u32 *num_hw_actions, + bool *modify_ttl) +{ + const struct dr_action_modify_field_conv *hw_action_info; + u16 hw_field = MLX5DR_ACTION_MDFY_HW_FLD_RESERVED; + u32 l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE; + u32 l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE; + int ret, i, hw_idx = 0; + __be64 *sw_action; + __be64 hw_action; + + *modify_ttl = false; + + for (i = 0; i < num_sw_actions; i++) { + sw_action = &sw_actions[i]; + + ret = dr_action_modify_check_field_limitation(dmn, sw_action); + if (ret) + return ret; + + if (!(*modify_ttl)) + *modify_ttl = dr_action_modify_check_is_ttl_modify(sw_action); + + /* Convert SW action to HW action */ + ret = dr_action_modify_sw_to_hw(dmn, + sw_action, + &hw_action, + &hw_action_info); + if (ret) + return ret; + + /* Due to a HW limitation we cannot modify 2 different L3 types */ + if (l3_type && hw_action_info->l3_type && + hw_action_info->l3_type != l3_type) { + mlx5dr_dbg(dmn, "Action list can't support two different L3 types\n"); + return -EINVAL; + } + if (hw_action_info->l3_type) + l3_type = hw_action_info->l3_type; + + /* Due to a HW limitation we cannot modify two different L4 types */ + if (l4_type && hw_action_info->l4_type && + hw_action_info->l4_type != l4_type) { + mlx5dr_dbg(dmn, "Action list can't support two different L4 types\n"); + return -EINVAL; + } + if (hw_action_info->l4_type) + l4_type = hw_action_info->l4_type; + + /* HW reads and executes two actions at once this means we + * need to create a gap if two actions access the same field + */ + if ((hw_idx % 2) && hw_field == hw_action_info->hw_field) { + /* Check if after gap insertion the total number of HW + * modify actions doesn't exceeds the limit + */ + hw_idx++; + if ((num_sw_actions + hw_idx - i) >= max_hw_actions) { + mlx5dr_dbg(dmn, "Modify header action number exceeds HW limit\n"); + return -EINVAL; + } + } + hw_field = hw_action_info->hw_field; + + hw_actions[hw_idx] = hw_action; + hw_idx++; + } + + *num_hw_actions = hw_idx; + + return 0; +} + +static int dr_action_create_modify_action(struct mlx5dr_domain *dmn, + size_t actions_sz, + __be64 actions[], + struct mlx5dr_action *action) +{ + struct mlx5dr_icm_chunk *chunk; + u32 max_hw_actions; + u32 num_hw_actions; + u32 num_sw_actions; + __be64 *hw_actions; + bool modify_ttl; + int ret; + + num_sw_actions = actions_sz / DR_MODIFY_ACTION_SIZE; + max_hw_actions = mlx5dr_icm_pool_chunk_size_to_entries(DR_CHUNK_SIZE_16); + + if (num_sw_actions > max_hw_actions) { + mlx5dr_dbg(dmn, "Max number of actions %d exceeds limit %d\n", + num_sw_actions, max_hw_actions); + return -EINVAL; + } + + chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, DR_CHUNK_SIZE_16); + if (!chunk) + return -ENOMEM; + + hw_actions = kcalloc(1, max_hw_actions * DR_MODIFY_ACTION_SIZE, GFP_KERNEL); + if (!hw_actions) { + ret = -ENOMEM; + goto free_chunk; + } + + ret = dr_actions_convert_modify_header(dmn, + max_hw_actions, + num_sw_actions, + actions, + hw_actions, + &num_hw_actions, + &modify_ttl); + if (ret) + goto free_hw_actions; + + action->rewrite.chunk = chunk; + action->rewrite.modify_ttl = modify_ttl; + action->rewrite.data = (u8 *)hw_actions; + action->rewrite.num_of_actions = num_hw_actions; + action->rewrite.index = (chunk->icm_addr - + dmn->info.caps.hdr_modify_icm_addr) / + ACTION_CACHE_LINE_SIZE; + + ret = mlx5dr_send_postsend_action(dmn, action); + if (ret) + goto free_hw_actions; + + return 0; + +free_hw_actions: + kfree(hw_actions); +free_chunk: + mlx5dr_icm_free_chunk(chunk); + return ret; +} + +struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *dmn, + u32 flags, + size_t actions_sz, + __be64 actions[]) +{ + struct mlx5dr_action *action; + int ret = 0; + + refcount_inc(&dmn->refcount); + + if (actions_sz % DR_MODIFY_ACTION_SIZE) { + mlx5dr_dbg(dmn, "Invalid modify actions size provided\n"); + goto dec_ref; + } + + action = dr_action_create_generic(DR_ACTION_TYP_MODIFY_HDR); + if (!action) + goto dec_ref; + + action->rewrite.dmn = dmn; + + ret = dr_action_create_modify_action(dmn, + actions_sz, + actions, + action); + if (ret) { + mlx5dr_dbg(dmn, "Failed creating modify header action %d\n", ret); + goto free_action; + } + + return action; + +free_action: + kfree(action); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, + u32 vport, u8 vhca_id_valid, + u16 vhca_id) +{ + struct mlx5dr_cmd_vport_cap *vport_cap; + struct mlx5dr_domain *vport_dmn; + struct mlx5dr_action *action; + u8 peer_vport; + + peer_vport = vhca_id_valid && (vhca_id != dmn->info.caps.gvmi); + vport_dmn = peer_vport ? dmn->peer_dmn : dmn; + if (!vport_dmn) { + mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n"); + return NULL; + } + + if (vport_dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5dr_dbg(dmn, "Domain doesn't support vport actions\n"); + return NULL; + } + + vport_cap = mlx5dr_get_vport_cap(&vport_dmn->info.caps, vport); + if (!vport_cap) { + mlx5dr_dbg(dmn, "Failed to get vport %d caps\n", vport); + return NULL; + } + + action = dr_action_create_generic(DR_ACTION_TYP_VPORT); + if (!action) + return NULL; + + action->vport.dmn = vport_dmn; + action->vport.caps = vport_cap; + + return action; +} + +int mlx5dr_action_destroy(struct mlx5dr_action *action) +{ + if (refcount_read(&action->refcount) > 1) + return -EBUSY; + + switch (action->action_type) { + case DR_ACTION_TYP_FT: + if (!action->dest_tbl.is_fw_tbl) + refcount_dec(&action->dest_tbl.tbl->refcount); + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + refcount_dec(&action->reformat.dmn->refcount); + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + mlx5dr_icm_free_chunk(action->rewrite.chunk); + refcount_dec(&action->reformat.dmn->refcount); + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + mlx5dr_cmd_destroy_reformat_ctx((action->reformat.dmn)->mdev, + action->reformat.reformat_id); + refcount_dec(&action->reformat.dmn->refcount); + break; + case DR_ACTION_TYP_MODIFY_HDR: + mlx5dr_icm_free_chunk(action->rewrite.chunk); + refcount_dec(&action->rewrite.dmn->refcount); + break; + default: + break; + } + + kfree(action); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c new file mode 100644 index 000000000000..41662c4e2664 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -0,0 +1,480 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, + bool other_vport, + u16 vport_number, + u64 *icm_address_rx, + u64 *icm_address_tx) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; + int err; + + MLX5_SET(query_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + MLX5_SET(query_esw_vport_context_in, in, other_vport, other_vport); + MLX5_SET(query_esw_vport_context_in, in, vport_number, vport_number); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *icm_address_rx = + MLX5_GET64(query_esw_vport_context_out, out, + esw_vport_context.sw_steering_vport_icm_address_rx); + *icm_address_tx = + MLX5_GET64(query_esw_vport_context_out, out, + esw_vport_context.sw_steering_vport_icm_address_tx); + return 0; +} + +int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport, + u16 vport_number, u16 *gvmi) +{ + u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; + int out_size; + void *out; + int err; + + out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); + out = kzalloc(out_size, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, other_function, other_vport); + MLX5_SET(query_hca_cap_in, in, function_id, vport_number); + MLX5_SET(query_hca_cap_in, in, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | + HCA_CAP_OPMOD_GET_CUR); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size); + if (err) { + kfree(out); + return err; + } + + *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id); + + kfree(out); + return 0; +} + +int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_esw_caps *caps) +{ + caps->drop_icm_address_rx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_fdb_action_drop_icm_address_rx); + caps->drop_icm_address_tx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_fdb_action_drop_icm_address_tx); + caps->uplink_icm_address_rx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_uplink_icm_address_rx); + caps->uplink_icm_address_tx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_uplink_icm_address_tx); + caps->sw_owner = + MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, + sw_owner); + + return 0; +} + +int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_caps *caps) +{ + caps->prio_tag_required = MLX5_CAP_GEN(mdev, prio_tag_required); + caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager); + caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); + caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); + + if (mlx5dr_matcher_supp_flex_parser_icmp_v4(caps)) { + caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); + caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1); + } + + if (mlx5dr_matcher_supp_flex_parser_icmp_v6(caps)) { + caps->flex_parser_id_icmpv6_dw0 = + MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw0); + caps->flex_parser_id_icmpv6_dw1 = + MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw1); + } + + caps->nic_rx_drop_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address); + caps->nic_tx_drop_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_drop_icm_address); + caps->nic_tx_allow_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_allow_icm_address); + + caps->rx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner); + caps->max_ft_level = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_ft_level); + + caps->tx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner); + + caps->log_icm_size = MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size); + caps->hdr_modify_icm_addr = + MLX5_CAP64_DEV_MEM(mdev, header_modify_sw_icm_start_address); + + caps->roce_min_src_udp = MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port); + + return 0; +} + +int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, + u32 table_id, + struct mlx5dr_cmd_query_flow_table_details *output) +{ + u32 out[MLX5_ST_SZ_DW(query_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_flow_table_in)] = {}; + int err; + + MLX5_SET(query_flow_table_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_TABLE); + + MLX5_SET(query_flow_table_in, in, table_type, type); + MLX5_SET(query_flow_table_in, in, table_id, table_id); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + output->status = MLX5_GET(query_flow_table_out, out, status); + output->level = MLX5_GET(query_flow_table_out, out, flow_table_context.level); + + output->sw_owner_icm_root_1 = MLX5_GET64(query_flow_table_out, out, + flow_table_context.sw_owner_icm_root_1); + output->sw_owner_icm_root_0 = MLX5_GET64(query_flow_table_out, out, + flow_table_context.sw_owner_icm_root_0); + + return 0; +} + +int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev) +{ + u32 out[MLX5_ST_SZ_DW(sync_steering_out)] = {}; + u32 in[MLX5_ST_SZ_DW(sync_steering_in)] = {}; + + MLX5_SET(sync_steering_in, in, opcode, MLX5_CMD_OP_SYNC_STEERING); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id, + u32 modify_header_id, + u32 vport_id) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {}; + void *in_flow_context; + unsigned int inlen; + void *in_dests; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + + 1 * MLX5_ST_SZ_BYTES(dest_format_struct); /* One destination only */ + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, table_type, table_type); + MLX5_SET(set_fte_in, in, table_id, table_id); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + MLX5_SET(flow_context, in_flow_context, modify_header_id, modify_header_id); + MLX5_SET(flow_context, in_flow_context, destination_list_size, 1); + MLX5_SET(flow_context, in_flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + MLX5_SET(dest_format_struct, in_dests, destination_type, + MLX5_FLOW_DESTINATION_TYPE_VPORT); + MLX5_SET(dest_format_struct, in_dests, destination_id, vport_id); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + kvfree(in); + + return err; +} + +int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id) +{ + u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {}; + u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {}; + + MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + MLX5_SET(delete_fte_in, in, table_type, table_type); + MLX5_SET(delete_fte_in, in, table_id, table_id); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev, + u32 table_type, + u8 num_of_actions, + u64 *actions, + u32 *modify_header_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {}; + void *p_actions; + u32 inlen; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + + num_of_actions * sizeof(u64); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type); + MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_of_actions); + p_actions = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions); + memcpy(p_actions, actions, num_of_actions * sizeof(u64)); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + goto out; + + *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, + modify_header_id); +out: + kvfree(in); + return err; +} + +int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev, + u32 modify_header_id) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {}; + + MLX5_SET(dealloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, + modify_header_id); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 *group_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {}; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + u32 *in; + int err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET(create_flow_group_in, in, table_type, table_type); + MLX5_SET(create_flow_group_in, in, table_id, table_id); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + goto out; + + *group_id = MLX5_GET(create_flow_group_out, out, group_id); + +out: + kfree(in); + return err; +} + +int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {}; + + MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET(destroy_flow_group_in, in, table_type, table_type); + MLX5_SET(destroy_flow_group_in, in, table_id, table_id); + MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, + u32 table_type, + u64 icm_addr_rx, + u64 icm_addr_tx, + u8 level, + bool sw_owner, + bool term_tbl, + u64 *fdb_rx_icm_addr, + u32 *table_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {}; + void *ft_mdev; + int err; + + MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); + MLX5_SET(create_flow_table_in, in, table_type, table_type); + + ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context); + MLX5_SET(flow_table_context, ft_mdev, termination_table, term_tbl); + MLX5_SET(flow_table_context, ft_mdev, sw_owner, sw_owner); + MLX5_SET(flow_table_context, ft_mdev, level, level); + + if (sw_owner) { + /* icm_addr_0 used for FDB RX / NIC TX / NIC_RX + * icm_addr_1 used for FDB TX + */ + if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, icm_addr_rx); + } else if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, icm_addr_tx); + } else if (table_type == MLX5_FLOW_TABLE_TYPE_FDB) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, icm_addr_rx); + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_1, icm_addr_tx); + } + } + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *table_id = MLX5_GET(create_flow_table_out, out, table_id); + if (!sw_owner && table_type == MLX5_FLOW_TABLE_TYPE_FDB) + *fdb_rx_icm_addr = + (u64)MLX5_GET(create_flow_table_out, out, icm_address_31_0) | + (u64)MLX5_GET(create_flow_table_out, out, icm_address_39_32) << 32 | + (u64)MLX5_GET(create_flow_table_out, out, icm_address_63_40) << 40; + + return 0; +} + +int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev, + u32 table_id, + u32 table_type) +{ + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {}; + + MLX5_SET(destroy_flow_table_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + MLX5_SET(destroy_flow_table_in, in, table_type, table_type); + MLX5_SET(destroy_flow_table_in, in, table_id, table_id); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, + enum mlx5_reformat_ctx_type rt, + size_t reformat_size, + void *reformat_data, + u32 *reformat_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {}; + size_t inlen, cmd_data_sz, cmd_total_sz; + void *prctx; + void *pdata; + void *in; + int err; + + cmd_total_sz = MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in); + cmd_data_sz = MLX5_FLD_SZ_BYTES(alloc_packet_reformat_context_in, + packet_reformat_context.reformat_data); + inlen = ALIGN(cmd_total_sz + reformat_size - cmd_data_sz, 4); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT); + + prctx = MLX5_ADDR_OF(alloc_packet_reformat_context_in, in, packet_reformat_context); + pdata = MLX5_ADDR_OF(packet_reformat_context_in, prctx, reformat_data); + + MLX5_SET(packet_reformat_context_in, prctx, reformat_type, rt); + MLX5_SET(packet_reformat_context_in, prctx, reformat_data_size, reformat_size); + memcpy(pdata, reformat_data, reformat_size); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + return err; + + *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); + kvfree(in); + + return err; +} + +void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev, + u32 reformat_id) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {}; + + MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, + reformat_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, + u16 index, struct mlx5dr_cmd_gid_attr *attr) +{ + u32 out[MLX5_ST_SZ_DW(query_roce_address_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_roce_address_in)] = {}; + int err; + + MLX5_SET(query_roce_address_in, in, opcode, + MLX5_CMD_OP_QUERY_ROCE_ADDRESS); + + MLX5_SET(query_roce_address_in, in, roce_address_index, index); + MLX5_SET(query_roce_address_in, in, vhca_port_num, vhca_port_num); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + memcpy(&attr->gid, + MLX5_ADDR_OF(query_roce_address_out, + out, roce_address.source_l3_address), + sizeof(attr->gid)); + memcpy(attr->mac, + MLX5_ADDR_OF(query_roce_address_out, out, + roce_address.source_mac_47_32), + sizeof(attr->mac)); + + if (MLX5_GET(query_roce_address_out, out, + roce_address.roce_version) == MLX5_ROCE_VERSION_2) + attr->roce_ver = MLX5_ROCE_VERSION_2; + else + attr->roce_ver = MLX5_ROCE_VERSION_1; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c new file mode 100644 index 000000000000..9e2eccbb1eb8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +/* Copyright (c) 2011-2015 Stephan Brumme. All rights reserved. + * Slicing-by-16 contributed by Bulat Ziganshin + * + * This software is provided 'as-is', without any express or implied warranty. + * In no event will the author be held liable for any damages arising from the + * of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. + * 2. If you use this software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * 3. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * Taken from http://create.stephan-brumme.com/crc32/ and adapted. + */ + +#include "dr_types.h" + +#define DR_STE_CRC_POLY 0xEDB88320L + +static u32 dr_ste_crc_tab32[8][256]; + +static void dr_crc32_calc_lookup_entry(u32 (*tbl)[256], u8 i, u8 j) +{ + tbl[i][j] = (tbl[i - 1][j] >> 8) ^ tbl[0][tbl[i - 1][j] & 0xff]; +} + +void mlx5dr_crc32_init_table(void) +{ + u32 crc, i, j; + + for (i = 0; i < 256; i++) { + crc = i; + for (j = 0; j < 8; j++) { + if (crc & 0x00000001L) + crc = (crc >> 1) ^ DR_STE_CRC_POLY; + else + crc = crc >> 1; + } + dr_ste_crc_tab32[0][i] = crc; + } + + /* Init CRC lookup tables according to crc_slice_8 algorithm */ + for (i = 0; i < 256; i++) { + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 1, i); + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 2, i); + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 3, i); + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 4, i); + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 5, i); + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 6, i); + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 7, i); + } +} + +/* Compute CRC32 (Slicing-by-8 algorithm) */ +u32 mlx5dr_crc32_slice8_calc(const void *input_data, size_t length) +{ + const u32 *curr = (const u32 *)input_data; + const u8 *curr_char; + u32 crc = 0, one, two; + + if (!input_data) + return 0; + + /* Process eight bytes at once (Slicing-by-8) */ + while (length >= 8) { + one = *curr++ ^ crc; + two = *curr++; + + crc = dr_ste_crc_tab32[0][(two >> 24) & 0xff] + ^ dr_ste_crc_tab32[1][(two >> 16) & 0xff] + ^ dr_ste_crc_tab32[2][(two >> 8) & 0xff] + ^ dr_ste_crc_tab32[3][two & 0xff] + ^ dr_ste_crc_tab32[4][(one >> 24) & 0xff] + ^ dr_ste_crc_tab32[5][(one >> 16) & 0xff] + ^ dr_ste_crc_tab32[6][(one >> 8) & 0xff] + ^ dr_ste_crc_tab32[7][one & 0xff]; + + length -= 8; + } + + curr_char = (const u8 *)curr; + /* Remaining 1 to 7 bytes (standard algorithm) */ + while (length-- != 0) + crc = (crc >> 8) ^ dr_ste_crc_tab32[0][(crc & 0xff) + ^ *curr_char++]; + + return ((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) | + ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c new file mode 100644 index 000000000000..3b9cf0bccf4d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -0,0 +1,395 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/mlx5/eswitch.h> +#include "dr_types.h" + +static int dr_domain_init_cache(struct mlx5dr_domain *dmn) +{ + /* Per vport cached FW FT for checksum recalculation, this + * recalculation is needed due to a HW bug. + */ + dmn->cache.recalc_cs_ft = kcalloc(dmn->info.caps.num_vports, + sizeof(dmn->cache.recalc_cs_ft[0]), + GFP_KERNEL); + if (!dmn->cache.recalc_cs_ft) + return -ENOMEM; + + return 0; +} + +static void dr_domain_uninit_cache(struct mlx5dr_domain *dmn) +{ + int i; + + for (i = 0; i < dmn->info.caps.num_vports; i++) { + if (!dmn->cache.recalc_cs_ft[i]) + continue; + + mlx5dr_fw_destroy_recalc_cs_ft(dmn, dmn->cache.recalc_cs_ft[i]); + } + + kfree(dmn->cache.recalc_cs_ft); +} + +int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn, + u32 vport_num, + u64 *rx_icm_addr) +{ + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; + + recalc_cs_ft = dmn->cache.recalc_cs_ft[vport_num]; + if (!recalc_cs_ft) { + /* Table not in cache, need to allocate a new one */ + recalc_cs_ft = mlx5dr_fw_create_recalc_cs_ft(dmn, vport_num); + if (!recalc_cs_ft) + return -EINVAL; + + dmn->cache.recalc_cs_ft[vport_num] = recalc_cs_ft; + } + + *rx_icm_addr = recalc_cs_ft->rx_icm_addr; + + return 0; +} + +static int dr_domain_init_resources(struct mlx5dr_domain *dmn) +{ + int ret; + + ret = mlx5_core_alloc_pd(dmn->mdev, &dmn->pdn); + if (ret) { + mlx5dr_dbg(dmn, "Couldn't allocate PD\n"); + return ret; + } + + dmn->uar = mlx5_get_uars_page(dmn->mdev); + if (!dmn->uar) { + mlx5dr_err(dmn, "Couldn't allocate UAR\n"); + goto clean_pd; + } + + dmn->ste_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_STE); + if (!dmn->ste_icm_pool) { + mlx5dr_err(dmn, "Couldn't get icm memory for %s\n", + dev_name(dmn->mdev->device)); + goto clean_uar; + } + + dmn->action_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_MODIFY_ACTION); + if (!dmn->action_icm_pool) { + mlx5dr_err(dmn, "Couldn't get action icm memory for %s\n", + dev_name(dmn->mdev->device)); + goto free_ste_icm_pool; + } + + ret = mlx5dr_send_ring_alloc(dmn); + if (ret) { + mlx5dr_err(dmn, "Couldn't create send-ring for %s\n", + dev_name(dmn->mdev->device)); + goto free_action_icm_pool; + } + + return 0; + +free_action_icm_pool: + mlx5dr_icm_pool_destroy(dmn->action_icm_pool); +free_ste_icm_pool: + mlx5dr_icm_pool_destroy(dmn->ste_icm_pool); +clean_uar: + mlx5_put_uars_page(dmn->mdev, dmn->uar); +clean_pd: + mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn); + + return ret; +} + +static void dr_domain_uninit_resources(struct mlx5dr_domain *dmn) +{ + mlx5dr_send_ring_free(dmn, dmn->send_ring); + mlx5dr_icm_pool_destroy(dmn->action_icm_pool); + mlx5dr_icm_pool_destroy(dmn->ste_icm_pool); + mlx5_put_uars_page(dmn->mdev, dmn->uar); + mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn); +} + +static int dr_domain_query_vport(struct mlx5dr_domain *dmn, + bool other_vport, + u16 vport_number) +{ + struct mlx5dr_cmd_vport_cap *vport_caps; + int ret; + + vport_caps = &dmn->info.caps.vports_caps[vport_number]; + + ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev, + other_vport, + vport_number, + &vport_caps->icm_address_rx, + &vport_caps->icm_address_tx); + if (ret) + return ret; + + ret = mlx5dr_cmd_query_gvmi(dmn->mdev, + other_vport, + vport_number, + &vport_caps->vport_gvmi); + if (ret) + return ret; + + vport_caps->num = vport_number; + vport_caps->vhca_gvmi = dmn->info.caps.gvmi; + + return 0; +} + +static int dr_domain_query_vports(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_esw_caps *esw_caps = &dmn->info.caps.esw_caps; + struct mlx5dr_cmd_vport_cap *wire_vport; + int vport; + int ret; + + /* Query vports (except wire vport) */ + for (vport = 0; vport < dmn->info.caps.num_esw_ports - 1; vport++) { + ret = dr_domain_query_vport(dmn, !!vport, vport); + if (ret) + return ret; + } + + /* Last vport is the wire port */ + wire_vport = &dmn->info.caps.vports_caps[vport]; + wire_vport->num = WIRE_PORT; + wire_vport->icm_address_rx = esw_caps->uplink_icm_address_rx; + wire_vport->icm_address_tx = esw_caps->uplink_icm_address_tx; + wire_vport->vport_gvmi = 0; + wire_vport->vhca_gvmi = dmn->info.caps.gvmi; + + return 0; +} + +static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_domain *dmn) +{ + int ret; + + if (!dmn->info.caps.eswitch_manager) + return -EOPNOTSUPP; + + ret = mlx5dr_cmd_query_esw_caps(mdev, &dmn->info.caps.esw_caps); + if (ret) + return ret; + + dmn->info.caps.fdb_sw_owner = dmn->info.caps.esw_caps.sw_owner; + dmn->info.caps.esw_rx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_rx; + dmn->info.caps.esw_tx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_tx; + + dmn->info.caps.vports_caps = kcalloc(dmn->info.caps.num_esw_ports, + sizeof(dmn->info.caps.vports_caps[0]), + GFP_KERNEL); + if (!dmn->info.caps.vports_caps) + return -ENOMEM; + + ret = dr_domain_query_vports(dmn); + if (ret) { + mlx5dr_dbg(dmn, "Failed to query vports caps\n"); + goto free_vports_caps; + } + + dmn->info.caps.num_vports = dmn->info.caps.num_esw_ports - 1; + + return 0; + +free_vports_caps: + kfree(dmn->info.caps.vports_caps); + dmn->info.caps.vports_caps = NULL; + return ret; +} + +static int dr_domain_caps_init(struct mlx5_core_dev *mdev, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_vport_cap *vport_cap; + int ret; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) { + mlx5dr_dbg(dmn, "Failed to allocate domain, bad link type\n"); + return -EOPNOTSUPP; + } + + dmn->info.caps.num_esw_ports = mlx5_eswitch_get_total_vports(mdev); + + ret = mlx5dr_cmd_query_device(mdev, &dmn->info.caps); + if (ret) + return ret; + + ret = dr_domain_query_fdb_caps(mdev, dmn); + if (ret) + return ret; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + if (!dmn->info.caps.rx_sw_owner) + return -ENOTSUPP; + + dmn->info.supp_sw_steering = true; + dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX; + dmn->info.rx.default_icm_addr = dmn->info.caps.nic_rx_drop_address; + dmn->info.rx.drop_icm_addr = dmn->info.caps.nic_rx_drop_address; + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + if (!dmn->info.caps.tx_sw_owner) + return -ENOTSUPP; + + dmn->info.supp_sw_steering = true; + dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX; + dmn->info.tx.default_icm_addr = dmn->info.caps.nic_tx_allow_address; + dmn->info.tx.drop_icm_addr = dmn->info.caps.nic_tx_drop_address; + break; + case MLX5DR_DOMAIN_TYPE_FDB: + if (!dmn->info.caps.eswitch_manager) + return -ENOTSUPP; + + if (!dmn->info.caps.fdb_sw_owner) + return -ENOTSUPP; + + dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX; + dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX; + vport_cap = mlx5dr_get_vport_cap(&dmn->info.caps, 0); + if (!vport_cap) { + mlx5dr_dbg(dmn, "Failed to get esw manager vport\n"); + return -ENOENT; + } + + dmn->info.supp_sw_steering = true; + dmn->info.tx.default_icm_addr = vport_cap->icm_address_tx; + dmn->info.rx.default_icm_addr = vport_cap->icm_address_rx; + dmn->info.rx.drop_icm_addr = dmn->info.caps.esw_rx_drop_address; + dmn->info.tx.drop_icm_addr = dmn->info.caps.esw_tx_drop_address; + break; + default: + mlx5dr_dbg(dmn, "Invalid domain\n"); + ret = -EINVAL; + break; + } + + return ret; +} + +static void dr_domain_caps_uninit(struct mlx5dr_domain *dmn) +{ + kfree(dmn->info.caps.vports_caps); +} + +struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) +{ + struct mlx5dr_domain *dmn; + int ret; + + if (type > MLX5DR_DOMAIN_TYPE_FDB) + return NULL; + + dmn = kzalloc(sizeof(*dmn), GFP_KERNEL); + if (!dmn) + return NULL; + + dmn->mdev = mdev; + dmn->type = type; + refcount_set(&dmn->refcount, 1); + mutex_init(&dmn->mutex); + + if (dr_domain_caps_init(mdev, dmn)) { + mlx5dr_dbg(dmn, "Failed init domain, no caps\n"); + goto free_domain; + } + + dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K; + dmn->info.max_log_sw_icm_sz = min_t(u32, DR_CHUNK_SIZE_1024K, + dmn->info.caps.log_icm_size); + + if (!dmn->info.supp_sw_steering) { + mlx5dr_err(dmn, "SW steering not supported for %s\n", + dev_name(mdev->device)); + goto uninit_caps; + } + + /* Allocate resources */ + ret = dr_domain_init_resources(dmn); + if (ret) { + mlx5dr_err(dmn, "Failed init domain resources for %s\n", + dev_name(mdev->device)); + goto uninit_caps; + } + + ret = dr_domain_init_cache(dmn); + if (ret) { + mlx5dr_err(dmn, "Failed initialize domain cache\n"); + goto uninit_resourses; + } + + /* Init CRC table for htbl CRC calculation */ + mlx5dr_crc32_init_table(); + + return dmn; + +uninit_resourses: + dr_domain_uninit_resources(dmn); +uninit_caps: + dr_domain_caps_uninit(dmn); +free_domain: + kfree(dmn); + return NULL; +} + +/* Assure synchronization of the device steering tables with updates made by SW + * insertion. + */ +int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) +{ + int ret = 0; + + if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) { + mutex_lock(&dmn->mutex); + ret = mlx5dr_send_ring_force_drain(dmn); + mutex_unlock(&dmn->mutex); + if (ret) + return ret; + } + + if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW) + ret = mlx5dr_cmd_sync_steering(dmn->mdev); + + return ret; +} + +int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) +{ + if (refcount_read(&dmn->refcount) > 1) + return -EBUSY; + + /* make sure resources are not used by the hardware */ + mlx5dr_cmd_sync_steering(dmn->mdev); + dr_domain_uninit_cache(dmn); + dr_domain_uninit_resources(dmn); + dr_domain_caps_uninit(dmn); + mutex_destroy(&dmn->mutex); + kfree(dmn); + return 0; +} + +void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn) +{ + mutex_lock(&dmn->mutex); + + if (dmn->peer_dmn) + refcount_dec(&dmn->peer_dmn->refcount); + + dmn->peer_dmn = peer_dmn; + + if (dmn->peer_dmn) + refcount_inc(&dmn->peer_dmn->refcount); + + mutex_unlock(&dmn->mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c new file mode 100644 index 000000000000..60ef6e6171e3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/types.h> +#include "dr_types.h" + +struct mlx5dr_fw_recalc_cs_ft * +mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num) +{ + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; + u32 table_id, group_id, modify_hdr_id; + u64 rx_icm_addr, modify_ttl_action; + int ret; + + recalc_cs_ft = kzalloc(sizeof(*recalc_cs_ft), GFP_KERNEL); + if (!recalc_cs_ft) + return NULL; + + ret = mlx5dr_cmd_create_flow_table(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, + 0, 0, dmn->info.caps.max_ft_level - 1, + false, true, &rx_icm_addr, &table_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating TTL W/A FW flow table %d\n", ret); + goto free_ttl_tbl; + } + + ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, &group_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating TTL W/A FW flow group %d\n", ret); + goto destroy_flow_table; + } + + /* Modify TTL action by adding zero to trigger CS recalculation */ + modify_ttl_action = 0; + MLX5_SET(set_action_in, &modify_ttl_action, action_type, MLX5_ACTION_TYPE_ADD); + MLX5_SET(set_action_in, &modify_ttl_action, field, MLX5_ACTION_IN_FIELD_OUT_IP_TTL); + + ret = mlx5dr_cmd_alloc_modify_header(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, 1, + &modify_ttl_action, + &modify_hdr_id); + if (ret) { + mlx5dr_err(dmn, "Failed modify header TTL %d\n", ret); + goto destroy_flow_group; + } + + ret = mlx5dr_cmd_set_fte_modify_and_vport(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, group_id, modify_hdr_id, + vport_num); + if (ret) { + mlx5dr_err(dmn, "Failed setting TTL W/A flow table entry %d\n", ret); + goto dealloc_modify_header; + } + + recalc_cs_ft->modify_hdr_id = modify_hdr_id; + recalc_cs_ft->rx_icm_addr = rx_icm_addr; + recalc_cs_ft->table_id = table_id; + recalc_cs_ft->group_id = group_id; + + return recalc_cs_ft; + +dealloc_modify_header: + mlx5dr_cmd_dealloc_modify_header(dmn->mdev, modify_hdr_id); +destroy_flow_group: + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, group_id); +destroy_flow_table: + mlx5dr_cmd_destroy_flow_table(dmn->mdev, table_id, MLX5_FLOW_TABLE_TYPE_FDB); +free_ttl_tbl: + kfree(recalc_cs_ft); + return NULL; +} + +void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn, + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft) +{ + mlx5dr_cmd_del_flow_table_entry(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + recalc_cs_ft->table_id); + mlx5dr_cmd_dealloc_modify_header(dmn->mdev, recalc_cs_ft->modify_hdr_id); + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + recalc_cs_ft->table_id, + recalc_cs_ft->group_id); + mlx5dr_cmd_destroy_flow_table(dmn->mdev, + recalc_cs_ft->table_id, + MLX5_FLOW_TABLE_TYPE_FDB); + + kfree(recalc_cs_ft); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c new file mode 100644 index 000000000000..e76f61e7555e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -0,0 +1,570 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 +#define DR_ICM_SYNC_THRESHOLD (64 * 1024 * 1024) + +struct mlx5dr_icm_pool; + +struct mlx5dr_icm_bucket { + struct mlx5dr_icm_pool *pool; + + /* Chunks that aren't visible to HW not directly and not in cache */ + struct list_head free_list; + unsigned int free_list_count; + + /* Used chunks, HW may be accessing this memory */ + struct list_head used_list; + unsigned int used_list_count; + + /* HW may be accessing this memory but at some future, + * undetermined time, it might cease to do so. Before deciding to call + * sync_ste, this list is moved to sync_list + */ + struct list_head hot_list; + unsigned int hot_list_count; + + /* Pending sync list, entries from the hot list are moved to this list. + * sync_ste is executed and then sync_list is concatenated to the free list + */ + struct list_head sync_list; + unsigned int sync_list_count; + + u32 total_chunks; + u32 num_of_entries; + u32 entry_size; + /* protect the ICM bucket */ + struct mutex mutex; +}; + +struct mlx5dr_icm_pool { + struct mlx5dr_icm_bucket *buckets; + enum mlx5dr_icm_type icm_type; + enum mlx5dr_icm_chunk_size max_log_chunk_sz; + enum mlx5dr_icm_chunk_size num_of_buckets; + struct list_head icm_mr_list; + /* protect the ICM MR list */ + struct mutex mr_mutex; + struct mlx5dr_domain *dmn; +}; + +struct mlx5dr_icm_dm { + u32 obj_id; + enum mlx5_sw_icm_type type; + u64 addr; + size_t length; +}; + +struct mlx5dr_icm_mr { + struct mlx5dr_icm_pool *pool; + struct mlx5_core_mkey mkey; + struct mlx5dr_icm_dm dm; + size_t used_length; + size_t length; + u64 icm_start_addr; + struct list_head mr_list; +}; + +static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev, + u32 pd, u64 length, u64 start_addr, int mode, + struct mlx5_core_mkey *mkey) +{ + u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, access_mode_1_0, mode); + MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + if (mode == MLX5_MKC_ACCESS_MODE_SW_ICM) { + MLX5_SET(mkc, mkc, rw, 1); + MLX5_SET(mkc, mkc, rr, 1); + } + + MLX5_SET64(mkc, mkc, len, length); + MLX5_SET(mkc, mkc, pd, pd); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); + + return mlx5_core_create_mkey(mdev, mkey, in, inlen); +} + +static struct mlx5dr_icm_mr * +dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool, + enum mlx5_sw_icm_type type, + size_t align_base) +{ + struct mlx5_core_dev *mdev = pool->dmn->mdev; + struct mlx5dr_icm_mr *icm_mr; + size_t align_diff; + int err; + + icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL); + if (!icm_mr) + return NULL; + + icm_mr->pool = pool; + INIT_LIST_HEAD(&icm_mr->mr_list); + + icm_mr->dm.type = type; + + /* 2^log_biggest_table * entry-size * double-for-alignment */ + icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) * 2; + + err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0, + &icm_mr->dm.addr, &icm_mr->dm.obj_id); + if (err) { + mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err); + goto free_icm_mr; + } + + /* Register device memory */ + err = dr_icm_create_dm_mkey(mdev, pool->dmn->pdn, + icm_mr->dm.length, + icm_mr->dm.addr, + MLX5_MKC_ACCESS_MODE_SW_ICM, + &icm_mr->mkey); + if (err) { + mlx5dr_err(pool->dmn, "Failed to create SW ICM MKEY, err (%d)\n", err); + goto free_dm; + } + + icm_mr->icm_start_addr = icm_mr->dm.addr; + + align_diff = icm_mr->icm_start_addr % align_base; + if (align_diff) + icm_mr->used_length = align_base - align_diff; + + list_add_tail(&icm_mr->mr_list, &pool->icm_mr_list); + + return icm_mr; + +free_dm: + mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0, + icm_mr->dm.addr, icm_mr->dm.obj_id); +free_icm_mr: + kvfree(icm_mr); + return NULL; +} + +static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) +{ + struct mlx5_core_dev *mdev = icm_mr->pool->dmn->mdev; + struct mlx5dr_icm_dm *dm = &icm_mr->dm; + + list_del(&icm_mr->mr_list); + mlx5_core_destroy_mkey(mdev, &icm_mr->mkey); + mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0, + dm->addr, dm->obj_id); + kvfree(icm_mr); +} + +static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk) +{ + struct mlx5dr_icm_bucket *bucket = chunk->bucket; + + chunk->ste_arr = kvzalloc(bucket->num_of_entries * + sizeof(chunk->ste_arr[0]), GFP_KERNEL); + if (!chunk->ste_arr) + return -ENOMEM; + + chunk->hw_ste_arr = kvzalloc(bucket->num_of_entries * + DR_STE_SIZE_REDUCED, GFP_KERNEL); + if (!chunk->hw_ste_arr) + goto out_free_ste_arr; + + chunk->miss_list = kvmalloc(bucket->num_of_entries * + sizeof(chunk->miss_list[0]), GFP_KERNEL); + if (!chunk->miss_list) + goto out_free_hw_ste_arr; + + return 0; + +out_free_hw_ste_arr: + kvfree(chunk->hw_ste_arr); +out_free_ste_arr: + kvfree(chunk->ste_arr); + return -ENOMEM; +} + +static int dr_icm_chunks_create(struct mlx5dr_icm_bucket *bucket) +{ + size_t mr_free_size, mr_req_size, mr_row_size; + struct mlx5dr_icm_pool *pool = bucket->pool; + struct mlx5dr_icm_mr *icm_mr = NULL; + struct mlx5dr_icm_chunk *chunk; + enum mlx5_sw_icm_type dm_type; + size_t align_base; + int i, err = 0; + + mr_req_size = bucket->num_of_entries * bucket->entry_size; + mr_row_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type); + + if (pool->icm_type == DR_ICM_TYPE_STE) { + dm_type = MLX5_SW_ICM_TYPE_STEERING; + /* Align base is the biggest chunk size / row size */ + align_base = mr_row_size; + } else { + dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY; + /* Align base is 64B */ + align_base = DR_ICM_MODIFY_HDR_ALIGN_BASE; + } + + mutex_lock(&pool->mr_mutex); + if (!list_empty(&pool->icm_mr_list)) { + icm_mr = list_last_entry(&pool->icm_mr_list, + struct mlx5dr_icm_mr, mr_list); + + if (icm_mr) + mr_free_size = icm_mr->dm.length - icm_mr->used_length; + } + + if (!icm_mr || mr_free_size < mr_row_size) { + icm_mr = dr_icm_pool_mr_create(pool, dm_type, align_base); + if (!icm_mr) { + err = -ENOMEM; + goto out_err; + } + } + + /* Create memory aligned chunks */ + for (i = 0; i < mr_row_size / mr_req_size; i++) { + chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL); + if (!chunk) { + err = -ENOMEM; + goto out_err; + } + + chunk->bucket = bucket; + chunk->rkey = icm_mr->mkey.key; + /* mr start addr is zero based */ + chunk->mr_addr = icm_mr->used_length; + chunk->icm_addr = (uintptr_t)icm_mr->icm_start_addr + icm_mr->used_length; + icm_mr->used_length += mr_req_size; + chunk->num_of_entries = bucket->num_of_entries; + chunk->byte_size = chunk->num_of_entries * bucket->entry_size; + + if (pool->icm_type == DR_ICM_TYPE_STE) { + err = dr_icm_chunk_ste_init(chunk); + if (err) + goto out_free_chunk; + } + + INIT_LIST_HEAD(&chunk->chunk_list); + list_add(&chunk->chunk_list, &bucket->free_list); + bucket->free_list_count++; + bucket->total_chunks++; + } + mutex_unlock(&pool->mr_mutex); + return 0; + +out_free_chunk: + kvfree(chunk); +out_err: + mutex_unlock(&pool->mr_mutex); + return err; +} + +static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) +{ + kvfree(chunk->miss_list); + kvfree(chunk->hw_ste_arr); + kvfree(chunk->ste_arr); +} + +static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk) +{ + struct mlx5dr_icm_bucket *bucket = chunk->bucket; + + list_del(&chunk->chunk_list); + bucket->total_chunks--; + + if (bucket->pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_chunk_ste_cleanup(chunk); + + kvfree(chunk); +} + +static void dr_icm_bucket_init(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *bucket, + enum mlx5dr_icm_chunk_size chunk_size) +{ + if (pool->icm_type == DR_ICM_TYPE_STE) + bucket->entry_size = DR_STE_SIZE; + else + bucket->entry_size = DR_MODIFY_ACTION_SIZE; + + bucket->num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); + bucket->pool = pool; + mutex_init(&bucket->mutex); + INIT_LIST_HEAD(&bucket->free_list); + INIT_LIST_HEAD(&bucket->used_list); + INIT_LIST_HEAD(&bucket->hot_list); + INIT_LIST_HEAD(&bucket->sync_list); +} + +static void dr_icm_bucket_cleanup(struct mlx5dr_icm_bucket *bucket) +{ + struct mlx5dr_icm_chunk *chunk, *next; + + mutex_destroy(&bucket->mutex); + list_splice_tail_init(&bucket->sync_list, &bucket->free_list); + list_splice_tail_init(&bucket->hot_list, &bucket->free_list); + + list_for_each_entry_safe(chunk, next, &bucket->free_list, chunk_list) + dr_icm_chunk_destroy(chunk); + + WARN_ON(bucket->total_chunks != 0); + + /* Cleanup of unreturned chunks */ + list_for_each_entry_safe(chunk, next, &bucket->used_list, chunk_list) + dr_icm_chunk_destroy(chunk); +} + +static u64 dr_icm_hot_mem_size(struct mlx5dr_icm_pool *pool) +{ + u64 hot_size = 0; + int chunk_order; + + for (chunk_order = 0; chunk_order < pool->num_of_buckets; chunk_order++) + hot_size += pool->buckets[chunk_order].hot_list_count * + mlx5dr_icm_pool_chunk_size_to_byte(chunk_order, pool->icm_type); + + return hot_size; +} + +static bool dr_icm_reuse_hot_entries(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *bucket) +{ + u64 bytes_for_sync; + + bytes_for_sync = dr_icm_hot_mem_size(pool); + if (bytes_for_sync < DR_ICM_SYNC_THRESHOLD || !bucket->hot_list_count) + return false; + + return true; +} + +static void dr_icm_chill_bucket_start(struct mlx5dr_icm_bucket *bucket) +{ + list_splice_tail_init(&bucket->hot_list, &bucket->sync_list); + bucket->sync_list_count += bucket->hot_list_count; + bucket->hot_list_count = 0; +} + +static void dr_icm_chill_bucket_end(struct mlx5dr_icm_bucket *bucket) +{ + list_splice_tail_init(&bucket->sync_list, &bucket->free_list); + bucket->free_list_count += bucket->sync_list_count; + bucket->sync_list_count = 0; +} + +static void dr_icm_chill_bucket_abort(struct mlx5dr_icm_bucket *bucket) +{ + list_splice_tail_init(&bucket->sync_list, &bucket->hot_list); + bucket->hot_list_count += bucket->sync_list_count; + bucket->sync_list_count = 0; +} + +static void dr_icm_chill_buckets_start(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *cb, + bool buckets[DR_CHUNK_SIZE_MAX]) +{ + struct mlx5dr_icm_bucket *bucket; + int i; + + for (i = 0; i < pool->num_of_buckets; i++) { + bucket = &pool->buckets[i]; + if (bucket == cb) { + dr_icm_chill_bucket_start(bucket); + continue; + } + + /* Freeing the mutex is done at the end of that process, after + * sync_ste was executed at dr_icm_chill_buckets_end func. + */ + if (mutex_trylock(&bucket->mutex)) { + dr_icm_chill_bucket_start(bucket); + buckets[i] = true; + } + } +} + +static void dr_icm_chill_buckets_end(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *cb, + bool buckets[DR_CHUNK_SIZE_MAX]) +{ + struct mlx5dr_icm_bucket *bucket; + int i; + + for (i = 0; i < pool->num_of_buckets; i++) { + bucket = &pool->buckets[i]; + if (bucket == cb) { + dr_icm_chill_bucket_end(bucket); + continue; + } + + if (!buckets[i]) + continue; + + dr_icm_chill_bucket_end(bucket); + mutex_unlock(&bucket->mutex); + } +} + +static void dr_icm_chill_buckets_abort(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *cb, + bool buckets[DR_CHUNK_SIZE_MAX]) +{ + struct mlx5dr_icm_bucket *bucket; + int i; + + for (i = 0; i < pool->num_of_buckets; i++) { + bucket = &pool->buckets[i]; + if (bucket == cb) { + dr_icm_chill_bucket_abort(bucket); + continue; + } + + if (!buckets[i]) + continue; + + dr_icm_chill_bucket_abort(bucket); + mutex_unlock(&bucket->mutex); + } +} + +/* Allocate an ICM chunk, each chunk holds a piece of ICM memory and + * also memory used for HW STE management for optimizations. + */ +struct mlx5dr_icm_chunk * +mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size) +{ + struct mlx5dr_icm_chunk *chunk = NULL; /* Fix compilation warning */ + bool buckets[DR_CHUNK_SIZE_MAX] = {}; + struct mlx5dr_icm_bucket *bucket; + int err; + + if (chunk_size > pool->max_log_chunk_sz) + return NULL; + + bucket = &pool->buckets[chunk_size]; + + mutex_lock(&bucket->mutex); + + /* Take chunk from pool if available, otherwise allocate new chunks */ + if (list_empty(&bucket->free_list)) { + if (dr_icm_reuse_hot_entries(pool, bucket)) { + dr_icm_chill_buckets_start(pool, bucket, buckets); + err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); + if (err) { + dr_icm_chill_buckets_abort(pool, bucket, buckets); + mlx5dr_dbg(pool->dmn, "Sync_steering failed\n"); + chunk = NULL; + goto out; + } + dr_icm_chill_buckets_end(pool, bucket, buckets); + } else { + dr_icm_chunks_create(bucket); + } + } + + if (!list_empty(&bucket->free_list)) { + chunk = list_last_entry(&bucket->free_list, + struct mlx5dr_icm_chunk, + chunk_list); + if (chunk) { + list_del_init(&chunk->chunk_list); + list_add_tail(&chunk->chunk_list, &bucket->used_list); + bucket->free_list_count--; + bucket->used_list_count++; + } + } +out: + mutex_unlock(&bucket->mutex); + return chunk; +} + +void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk) +{ + struct mlx5dr_icm_bucket *bucket = chunk->bucket; + + if (bucket->pool->icm_type == DR_ICM_TYPE_STE) { + memset(chunk->ste_arr, 0, + bucket->num_of_entries * sizeof(chunk->ste_arr[0])); + memset(chunk->hw_ste_arr, 0, + bucket->num_of_entries * DR_STE_SIZE_REDUCED); + } + + mutex_lock(&bucket->mutex); + list_del_init(&chunk->chunk_list); + list_add_tail(&chunk->chunk_list, &bucket->hot_list); + bucket->hot_list_count++; + bucket->used_list_count--; + mutex_unlock(&bucket->mutex); +} + +struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, + enum mlx5dr_icm_type icm_type) +{ + enum mlx5dr_icm_chunk_size max_log_chunk_sz; + struct mlx5dr_icm_pool *pool; + int i; + + if (icm_type == DR_ICM_TYPE_STE) + max_log_chunk_sz = dmn->info.max_log_sw_icm_sz; + else + max_log_chunk_sz = dmn->info.max_log_action_icm_sz; + + pool = kvzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + pool->buckets = kcalloc(max_log_chunk_sz + 1, + sizeof(pool->buckets[0]), + GFP_KERNEL); + if (!pool->buckets) + goto free_pool; + + pool->dmn = dmn; + pool->icm_type = icm_type; + pool->max_log_chunk_sz = max_log_chunk_sz; + pool->num_of_buckets = max_log_chunk_sz + 1; + INIT_LIST_HEAD(&pool->icm_mr_list); + + for (i = 0; i < pool->num_of_buckets; i++) + dr_icm_bucket_init(pool, &pool->buckets[i], i); + + mutex_init(&pool->mr_mutex); + + return pool; + +free_pool: + kvfree(pool); + return NULL; +} + +void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool) +{ + struct mlx5dr_icm_mr *icm_mr, *next; + int i; + + mutex_destroy(&pool->mr_mutex); + + list_for_each_entry_safe(icm_mr, next, &pool->icm_mr_list, mr_list) + dr_icm_pool_mr_destroy(icm_mr); + + for (i = 0; i < pool->num_of_buckets; i++) + dr_icm_bucket_cleanup(&pool->buckets[i]); + + kfree(pool->buckets); + kvfree(pool); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c new file mode 100644 index 000000000000..01008cd66f75 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -0,0 +1,770 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +static bool dr_mask_is_smac_set(struct mlx5dr_match_spec *spec) +{ + return (spec->smac_47_16 || spec->smac_15_0); +} + +static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dmac_47_16 || spec->dmac_15_0); +} + +static bool dr_mask_is_src_addr_set(struct mlx5dr_match_spec *spec) +{ + return (spec->src_ip_127_96 || spec->src_ip_95_64 || + spec->src_ip_63_32 || spec->src_ip_31_0); +} + +static bool dr_mask_is_dst_addr_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dst_ip_127_96 || spec->dst_ip_95_64 || + spec->dst_ip_63_32 || spec->dst_ip_31_0); +} + +static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec) +{ + return (spec->ip_protocol || spec->frag || spec->tcp_flags || + spec->ip_ecn || spec->ip_dscp); +} + +static bool dr_mask_is_tcp_udp_base_set(struct mlx5dr_match_spec *spec) +{ + return (spec->tcp_sport || spec->tcp_dport || + spec->udp_sport || spec->udp_dport); +} + +static bool dr_mask_is_ipv4_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dst_ip_31_0 || spec->src_ip_31_0); +} + +static bool dr_mask_is_ipv4_5_tuple_set(struct mlx5dr_match_spec *spec) +{ + return (dr_mask_is_l3_base_set(spec) || + dr_mask_is_tcp_udp_base_set(spec) || + dr_mask_is_ipv4_set(spec)); +} + +static bool dr_mask_is_eth_l2_tnl_set(struct mlx5dr_match_misc *misc) +{ + return misc->vxlan_vni; +} + +static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec) +{ + return spec->ttl_hoplimit; +} + +#define DR_MASK_IS_L2_DST(_spec, _misc, _inner_outer) (_spec.first_vid || \ + (_spec).first_cfi || (_spec).first_prio || (_spec).cvlan_tag || \ + (_spec).svlan_tag || (_spec).dmac_47_16 || (_spec).dmac_15_0 || \ + (_spec).ethertype || (_spec).ip_version || \ + (_misc)._inner_outer##_second_vid || \ + (_misc)._inner_outer##_second_cfi || \ + (_misc)._inner_outer##_second_prio || \ + (_misc)._inner_outer##_second_cvlan_tag || \ + (_misc)._inner_outer##_second_svlan_tag) + +#define DR_MASK_IS_ETH_L4_SET(_spec, _misc, _inner_outer) ( \ + dr_mask_is_l3_base_set(&(_spec)) || \ + dr_mask_is_tcp_udp_base_set(&(_spec)) || \ + dr_mask_is_ttl_set(&(_spec)) || \ + (_misc)._inner_outer##_ipv6_flow_label) + +#define DR_MASK_IS_ETH_L4_MISC_SET(_misc3, _inner_outer) ( \ + (_misc3)._inner_outer##_tcp_seq_num || \ + (_misc3)._inner_outer##_tcp_ack_num) + +#define DR_MASK_IS_FIRST_MPLS_SET(_misc2, _inner_outer) ( \ + (_misc2)._inner_outer##_first_mpls_label || \ + (_misc2)._inner_outer##_first_mpls_exp || \ + (_misc2)._inner_outer##_first_mpls_s_bos || \ + (_misc2)._inner_outer##_first_mpls_ttl) + +static bool dr_mask_is_gre_set(struct mlx5dr_match_misc *misc) +{ + return (misc->gre_key_h || misc->gre_key_l || + misc->gre_protocol || misc->gre_c_present || + misc->gre_k_present || misc->gre_s_present); +} + +#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET(_misc2, gre_udp) ( \ + (_misc2).outer_first_mpls_over_##gre_udp##_label || \ + (_misc2).outer_first_mpls_over_##gre_udp##_exp || \ + (_misc2).outer_first_mpls_over_##gre_udp##_s_bos || \ + (_misc2).outer_first_mpls_over_##gre_udp##_ttl) + +#define DR_MASK_IS_FLEX_PARSER_0_SET(_misc2) ( \ + DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), gre) || \ + DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), udp)) + +static bool dr_mask_is_flex_parser_tnl_set(struct mlx5dr_match_misc3 *misc3) +{ + return (misc3->outer_vxlan_gpe_vni || + misc3->outer_vxlan_gpe_next_protocol || + misc3->outer_vxlan_gpe_flags); +} + +static bool dr_mask_is_flex_parser_icmpv6_set(struct mlx5dr_match_misc3 *misc3) +{ + return (misc3->icmpv6_type || misc3->icmpv6_code || + misc3->icmpv6_header_data); +} + +static bool dr_mask_is_wqe_metadata_set(struct mlx5dr_match_misc2 *misc2) +{ + return misc2->metadata_reg_a; +} + +static bool dr_mask_is_reg_c_0_3_set(struct mlx5dr_match_misc2 *misc2) +{ + return (misc2->metadata_reg_c_0 || misc2->metadata_reg_c_1 || + misc2->metadata_reg_c_2 || misc2->metadata_reg_c_3); +} + +static bool dr_mask_is_reg_c_4_7_set(struct mlx5dr_match_misc2 *misc2) +{ + return (misc2->metadata_reg_c_4 || misc2->metadata_reg_c_5 || + misc2->metadata_reg_c_6 || misc2->metadata_reg_c_7); +} + +static bool dr_mask_is_gvmi_or_qpn_set(struct mlx5dr_match_misc *misc) +{ + return (misc->source_sqn || misc->source_port); +} + +static bool +dr_matcher_supp_flex_parser_vxlan_gpe(struct mlx5dr_domain *dmn) +{ + return dmn->info.caps.flex_protocols & + MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED; +} + +int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + bool ipv6) +{ + if (ipv6) { + nic_matcher->ste_builder = nic_matcher->ste_builder6; + nic_matcher->num_of_builders = nic_matcher->num_of_builders6; + } else { + nic_matcher->ste_builder = nic_matcher->ste_builder4; + nic_matcher->num_of_builders = nic_matcher->num_of_builders4; + } + + if (!nic_matcher->num_of_builders) { + mlx5dr_dbg(matcher->tbl->dmn, + "Rule not supported on this matcher due to IP related fields\n"); + return -EINVAL; + } + + return 0; +} + +static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + bool ipv6) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_match_param mask = {}; + struct mlx5dr_match_misc3 *misc3; + struct mlx5dr_ste_build *sb; + u8 *num_of_builders; + bool inner, rx; + int idx = 0; + int ret, i; + + if (ipv6) { + sb = nic_matcher->ste_builder6; + num_of_builders = &nic_matcher->num_of_builders6; + } else { + sb = nic_matcher->ste_builder4; + num_of_builders = &nic_matcher->num_of_builders4; + } + + rx = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX; + + /* Create a temporary mask to track and clear used mask fields */ + if (matcher->match_criteria & DR_MATCHER_CRITERIA_OUTER) + mask.outer = matcher->mask.outer; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC) + mask.misc = matcher->mask.misc; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_INNER) + mask.inner = matcher->mask.inner; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC2) + mask.misc2 = matcher->mask.misc2; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC3) + mask.misc3 = matcher->mask.misc3; + + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, + &matcher->mask, NULL); + if (ret) + return ret; + + /* Outer */ + if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER | + DR_MATCHER_CRITERIA_MISC | + DR_MATCHER_CRITERIA_MISC2 | + DR_MATCHER_CRITERIA_MISC3)) { + inner = false; + + if (dr_mask_is_wqe_metadata_set(&mask.misc2)) + mlx5dr_ste_build_general_purpose(&sb[idx++], &mask, inner, rx); + + if (dr_mask_is_reg_c_0_3_set(&mask.misc2)) + mlx5dr_ste_build_register_0(&sb[idx++], &mask, inner, rx); + + if (dr_mask_is_reg_c_4_7_set(&mask.misc2)) + mlx5dr_ste_build_register_1(&sb[idx++], &mask, inner, rx); + + if (dr_mask_is_gvmi_or_qpn_set(&mask.misc) && + (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) { + ret = mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask, + &dmn->info.caps, + inner, rx); + if (ret) + return ret; + } + + if (dr_mask_is_smac_set(&mask.outer) && + dr_mask_is_dmac_set(&mask.outer)) { + ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask, + inner, rx); + if (ret) + return ret; + } + + if (dr_mask_is_smac_set(&mask.outer)) + mlx5dr_ste_build_eth_l2_src(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_L2_DST(mask.outer, mask.misc, outer)) + mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx); + + if (ipv6) { + if (dr_mask_is_dst_addr_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask, + inner, rx); + + if (dr_mask_is_src_addr_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv6_src(&sb[idx++], &mask, + inner, rx); + + if (DR_MASK_IS_ETH_L4_SET(mask.outer, mask.misc, outer)) + mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask, + inner, rx); + } else { + if (dr_mask_is_ipv4_5_tuple_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask, + inner, rx); + + if (dr_mask_is_ttl_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv4_misc(&sb[idx++], &mask, + inner, rx); + } + + if (dr_mask_is_flex_parser_tnl_set(&mask.misc3) && + dr_matcher_supp_flex_parser_vxlan_gpe(dmn)) + mlx5dr_ste_build_flex_parser_tnl(&sb[idx++], &mask, + inner, rx); + + if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer)) + mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, outer)) + mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2)) + mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, + inner, rx); + + misc3 = &mask.misc3; + if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc3) && + mlx5dr_matcher_supp_flex_parser_icmp_v4(&dmn->info.caps)) || + (dr_mask_is_flex_parser_icmpv6_set(&mask.misc3) && + mlx5dr_matcher_supp_flex_parser_icmp_v6(&dmn->info.caps))) { + ret = mlx5dr_ste_build_flex_parser_1(&sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + if (ret) + return ret; + } + if (dr_mask_is_gre_set(&mask.misc)) + mlx5dr_ste_build_gre(&sb[idx++], &mask, inner, rx); + } + + /* Inner */ + if (matcher->match_criteria & (DR_MATCHER_CRITERIA_INNER | + DR_MATCHER_CRITERIA_MISC | + DR_MATCHER_CRITERIA_MISC2 | + DR_MATCHER_CRITERIA_MISC3)) { + inner = true; + + if (dr_mask_is_eth_l2_tnl_set(&mask.misc)) + mlx5dr_ste_build_eth_l2_tnl(&sb[idx++], &mask, inner, rx); + + if (dr_mask_is_smac_set(&mask.inner) && + dr_mask_is_dmac_set(&mask.inner)) { + ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], + &mask, inner, rx); + if (ret) + return ret; + } + + if (dr_mask_is_smac_set(&mask.inner)) + mlx5dr_ste_build_eth_l2_src(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_L2_DST(mask.inner, mask.misc, inner)) + mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx); + + if (ipv6) { + if (dr_mask_is_dst_addr_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask, + inner, rx); + + if (dr_mask_is_src_addr_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv6_src(&sb[idx++], &mask, + inner, rx); + + if (DR_MASK_IS_ETH_L4_SET(mask.inner, mask.misc, inner)) + mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask, + inner, rx); + } else { + if (dr_mask_is_ipv4_5_tuple_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask, + inner, rx); + + if (dr_mask_is_ttl_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv4_misc(&sb[idx++], &mask, + inner, rx); + } + + if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, inner)) + mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, inner)) + mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2)) + mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, inner, rx); + } + /* Empty matcher, takes all */ + if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY) + mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx); + + if (idx == 0) { + mlx5dr_dbg(dmn, "Cannot generate any valid rules from mask\n"); + return -EINVAL; + } + + /* Check that all mask fields were consumed */ + for (i = 0; i < sizeof(struct mlx5dr_match_param); i++) { + if (((u8 *)&mask)[i] != 0) { + mlx5dr_info(dmn, "Mask contains unsupported parameters\n"); + return -EOPNOTSUPP; + } + } + + *num_of_builders = idx; + + return 0; +} + +static int dr_matcher_connect(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *curr_nic_matcher, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +{ + struct mlx5dr_table_rx_tx *nic_tbl = curr_nic_matcher->nic_tbl; + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *prev_htbl; + int ret; + + /* Connect end anchor hash table to next_htbl or to the default address */ + if (next_nic_matcher) { + info.type = CONNECT_HIT; + info.hit_next_htbl = next_nic_matcher->s_htbl; + } else { + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + } + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + curr_nic_matcher->e_anchor, + &info, info.type == CONNECT_HIT); + if (ret) + return ret; + + /* Connect start hash table to end anchor */ + info.type = CONNECT_MISS; + info.miss_icm_addr = curr_nic_matcher->e_anchor->chunk->icm_addr; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + curr_nic_matcher->s_htbl, + &info, false); + if (ret) + return ret; + + /* Connect previous hash table to matcher start hash table */ + if (prev_nic_matcher) + prev_htbl = prev_nic_matcher->e_anchor; + else + prev_htbl = nic_tbl->s_anchor; + + info.type = CONNECT_HIT; + info.hit_next_htbl = curr_nic_matcher->s_htbl; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_htbl, + &info, true); + if (ret) + return ret; + + /* Update the pointing ste and next hash table */ + curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->ste_arr; + prev_htbl->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl; + + if (next_nic_matcher) { + next_nic_matcher->s_htbl->pointing_ste = curr_nic_matcher->e_anchor->ste_arr; + curr_nic_matcher->e_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; + } + + return 0; +} + +static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_matcher *next_matcher, *prev_matcher, *tmp_matcher; + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + bool first = true; + int ret; + + next_matcher = NULL; + if (!list_empty(&tbl->matcher_list)) + list_for_each_entry(tmp_matcher, &tbl->matcher_list, matcher_list) { + if (tmp_matcher->prio >= matcher->prio) { + next_matcher = tmp_matcher; + break; + } + first = false; + } + + prev_matcher = NULL; + if (next_matcher && !first) + prev_matcher = list_entry(next_matcher->matcher_list.prev, + struct mlx5dr_matcher, + matcher_list); + else if (!first) + prev_matcher = list_entry(tbl->matcher_list.prev, + struct mlx5dr_matcher, + matcher_list); + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { + ret = dr_matcher_connect(dmn, &matcher->rx, + next_matcher ? &next_matcher->rx : NULL, + prev_matcher ? &prev_matcher->rx : NULL); + if (ret) + return ret; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { + ret = dr_matcher_connect(dmn, &matcher->tx, + next_matcher ? &next_matcher->tx : NULL, + prev_matcher ? &prev_matcher->tx : NULL); + if (ret) + return ret; + } + + if (prev_matcher) + list_add(&matcher->matcher_list, &prev_matcher->matcher_list); + else if (next_matcher) + list_add_tail(&matcher->matcher_list, + &next_matcher->matcher_list); + else + list_add(&matcher->matcher_list, &tbl->matcher_list); + + return 0; +} + +static void dr_matcher_uninit_nic(struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + mlx5dr_htbl_put(nic_matcher->s_htbl); + mlx5dr_htbl_put(nic_matcher->e_anchor); +} + +static void dr_matcher_uninit_fdb(struct mlx5dr_matcher *matcher) +{ + dr_matcher_uninit_nic(&matcher->rx); + dr_matcher_uninit_nic(&matcher->tx); +} + +static void dr_matcher_uninit(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_matcher_uninit_nic(&matcher->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_matcher_uninit_nic(&matcher->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_matcher_uninit_fdb(matcher); + break; + default: + WARN_ON(true); + break; + } +} + +static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + int ret, ret_v4, ret_v6; + + ret_v4 = dr_matcher_set_ste_builders(matcher, nic_matcher, false); + ret_v6 = dr_matcher_set_ste_builders(matcher, nic_matcher, true); + + if (ret_v4 && ret_v6) { + mlx5dr_dbg(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n"); + return -EINVAL; + } + + if (!ret_v4) + nic_matcher->ste_builder = nic_matcher->ste_builder4; + else + nic_matcher->ste_builder = nic_matcher->ste_builder6; + + nic_matcher->e_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!nic_matcher->e_anchor) + return -ENOMEM; + + nic_matcher->s_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + nic_matcher->ste_builder[0].lu_type, + nic_matcher->ste_builder[0].byte_mask); + if (!nic_matcher->s_htbl) { + ret = -ENOMEM; + goto free_e_htbl; + } + + /* make sure the tables exist while empty */ + mlx5dr_htbl_get(nic_matcher->s_htbl); + mlx5dr_htbl_get(nic_matcher->e_anchor); + + return 0; + +free_e_htbl: + mlx5dr_ste_htbl_free(nic_matcher->e_anchor); + return ret; +} + +static int dr_matcher_init_fdb(struct mlx5dr_matcher *matcher) +{ + int ret; + + ret = dr_matcher_init_nic(matcher, &matcher->rx); + if (ret) + return ret; + + ret = dr_matcher_init_nic(matcher, &matcher->tx); + if (ret) + goto uninit_nic_rx; + + return 0; + +uninit_nic_rx: + dr_matcher_uninit_nic(&matcher->rx); + return ret; +} + +static int dr_matcher_init(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + int ret; + + if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) { + mlx5dr_info(dmn, "Invalid match criteria attribute\n"); + return -EINVAL; + } + + if (mask) { + if (mask->match_sz > sizeof(struct mlx5dr_match_param)) { + mlx5dr_info(dmn, "Invalid match size attribute\n"); + return -EINVAL; + } + mlx5dr_ste_copy_param(matcher->match_criteria, + &matcher->mask, mask); + } + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + matcher->rx.nic_tbl = &tbl->rx; + ret = dr_matcher_init_nic(matcher, &matcher->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + matcher->tx.nic_tbl = &tbl->tx; + ret = dr_matcher_init_nic(matcher, &matcher->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + matcher->rx.nic_tbl = &tbl->rx; + matcher->tx.nic_tbl = &tbl->tx; + ret = dr_matcher_init_fdb(matcher); + break; + default: + WARN_ON(true); + return -EINVAL; + } + + return ret; +} + +struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *tbl, + u16 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_matcher *matcher; + int ret; + + refcount_inc(&tbl->refcount); + + matcher = kzalloc(sizeof(*matcher), GFP_KERNEL); + if (!matcher) + goto dec_ref; + + matcher->tbl = tbl; + matcher->prio = priority; + matcher->match_criteria = match_criteria_enable; + refcount_set(&matcher->refcount, 1); + INIT_LIST_HEAD(&matcher->matcher_list); + + mutex_lock(&tbl->dmn->mutex); + + ret = dr_matcher_init(matcher, mask); + if (ret) + goto free_matcher; + + ret = dr_matcher_add_to_tbl(matcher); + if (ret) + goto matcher_uninit; + + mutex_unlock(&tbl->dmn->mutex); + + return matcher; + +matcher_uninit: + dr_matcher_uninit(matcher); +free_matcher: + mutex_unlock(&tbl->dmn->mutex); + kfree(matcher); +dec_ref: + refcount_dec(&tbl->refcount); + return NULL; +} + +static int dr_matcher_disconnect(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *prev_anchor; + + if (prev_nic_matcher) + prev_anchor = prev_nic_matcher->e_anchor; + else + prev_anchor = nic_tbl->s_anchor; + + /* Connect previous anchor hash table to next matcher or to the default address */ + if (next_nic_matcher) { + info.type = CONNECT_HIT; + info.hit_next_htbl = next_nic_matcher->s_htbl; + next_nic_matcher->s_htbl->pointing_ste = prev_anchor->ste_arr; + prev_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; + } else { + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + prev_anchor->ste_arr[0].next_htbl = NULL; + } + + return mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_anchor, + &info, true); +} + +static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_matcher *prev_matcher, *next_matcher; + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + int ret = 0; + + if (list_is_last(&matcher->matcher_list, &tbl->matcher_list)) + next_matcher = NULL; + else + next_matcher = list_next_entry(matcher, matcher_list); + + if (matcher->matcher_list.prev == &tbl->matcher_list) + prev_matcher = NULL; + else + prev_matcher = list_prev_entry(matcher, matcher_list); + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { + ret = dr_matcher_disconnect(dmn, &tbl->rx, + next_matcher ? &next_matcher->rx : NULL, + prev_matcher ? &prev_matcher->rx : NULL); + if (ret) + return ret; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { + ret = dr_matcher_disconnect(dmn, &tbl->tx, + next_matcher ? &next_matcher->tx : NULL, + prev_matcher ? &prev_matcher->tx : NULL); + if (ret) + return ret; + } + + list_del(&matcher->matcher_list); + + return 0; +} + +int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_table *tbl = matcher->tbl; + + if (refcount_read(&matcher->refcount) > 1) + return -EBUSY; + + mutex_lock(&tbl->dmn->mutex); + + dr_matcher_remove_from_tbl(matcher); + dr_matcher_uninit(matcher); + refcount_dec(&matcher->tbl->refcount); + + mutex_unlock(&tbl->dmn->mutex); + kfree(matcher); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c new file mode 100644 index 000000000000..3bc3f66b8fa8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -0,0 +1,1243 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +#define DR_RULE_MAX_STE_CHAIN (DR_RULE_MAX_STES + DR_ACTION_MAX_STES) + +struct mlx5dr_rule_action_member { + struct mlx5dr_action *action; + struct list_head list; +}; + +static int dr_rule_append_to_miss_list(struct mlx5dr_ste *new_last_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_ste_send_info *ste_info_last; + struct mlx5dr_ste *last_ste; + + /* The new entry will be inserted after the last */ + last_ste = list_entry(miss_list->prev, struct mlx5dr_ste, miss_list_node); + WARN_ON(!last_ste); + + ste_info_last = kzalloc(sizeof(*ste_info_last), GFP_KERNEL); + if (!ste_info_last) + return -ENOMEM; + + mlx5dr_ste_set_miss_addr(last_ste->hw_ste, + mlx5dr_ste_get_icm_addr(new_last_ste)); + list_add_tail(&new_last_ste->miss_list_node, miss_list); + + mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_REDUCED, + 0, last_ste->hw_ste, + ste_info_last, send_list, true); + + return 0; +} + +static struct mlx5dr_ste * +dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 *hw_ste) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_htbl *new_htbl; + struct mlx5dr_ste *ste; + + /* Create new table for miss entry */ + new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!new_htbl) { + mlx5dr_dbg(dmn, "Failed allocating collision table\n"); + return NULL; + } + + /* One and only entry, never grows */ + ste = new_htbl->ste_arr; + mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr); + mlx5dr_htbl_get(new_htbl); + + return ste; +} + +static struct mlx5dr_ste * +dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 *hw_ste, + struct mlx5dr_ste *orig_ste) +{ + struct mlx5dr_ste *ste; + + ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste); + if (!ste) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed creating collision entry\n"); + return NULL; + } + + ste->ste_chain_location = orig_ste->ste_chain_location; + + /* In collision entry, all members share the same miss_list_head */ + ste->htbl->miss_list = mlx5dr_ste_get_miss_list(orig_ste); + + /* Next table */ + if (mlx5dr_ste_create_next_htbl(matcher, nic_matcher, ste, hw_ste, + DR_CHUNK_SIZE_1)) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n"); + goto free_tbl; + } + + return ste; + +free_tbl: + mlx5dr_ste_free(ste, matcher, nic_matcher); + return NULL; +} + +static int +dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info, + struct mlx5dr_domain *dmn) +{ + int ret; + + list_del(&ste_info->send_list); + ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data, + ste_info->size, ste_info->offset); + if (ret) + goto out; + /* Copy data to ste, only reduced size, the last 16B (mask) + * is already written to the hw. + */ + memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_REDUCED); + +out: + kfree(ste_info); + return ret; +} + +static int dr_rule_send_update_list(struct list_head *send_ste_list, + struct mlx5dr_domain *dmn, + bool is_reverse) +{ + struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info; + int ret; + + if (is_reverse) { + list_for_each_entry_safe_reverse(ste_info, tmp_ste_info, + send_ste_list, send_list) { + ret = dr_rule_handle_one_ste_in_update_list(ste_info, + dmn); + if (ret) + return ret; + } + } else { + list_for_each_entry_safe(ste_info, tmp_ste_info, + send_ste_list, send_list) { + ret = dr_rule_handle_one_ste_in_update_list(ste_info, + dmn); + if (ret) + return ret; + } + } + + return 0; +} + +static struct mlx5dr_ste * +dr_rule_find_ste_in_miss_list(struct list_head *miss_list, u8 *hw_ste) +{ + struct mlx5dr_ste *ste; + + if (list_empty(miss_list)) + return NULL; + + /* Check if hw_ste is present in the list */ + list_for_each_entry(ste, miss_list, miss_list_node) { + if (mlx5dr_ste_equal_tag(ste->hw_ste, hw_ste)) + return ste; + } + + return NULL; +} + +static struct mlx5dr_ste * +dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct list_head *update_list, + struct mlx5dr_ste *col_ste, + u8 *hw_ste) +{ + struct mlx5dr_ste *new_ste; + int ret; + + new_ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste); + if (!new_ste) + return NULL; + + /* In collision entry, all members share the same miss_list_head */ + new_ste->htbl->miss_list = mlx5dr_ste_get_miss_list(col_ste); + + /* Update the previous from the list */ + ret = dr_rule_append_to_miss_list(new_ste, + mlx5dr_ste_get_miss_list(col_ste), + update_list); + if (ret) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed update dup entry\n"); + goto err_exit; + } + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); + return NULL; +} + +static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *cur_ste, + struct mlx5dr_ste *new_ste) +{ + new_ste->next_htbl = cur_ste->next_htbl; + new_ste->ste_chain_location = cur_ste->ste_chain_location; + + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, new_ste->ste_chain_location)) + new_ste->next_htbl->pointing_ste = new_ste; + + /* We need to copy the refcount since this ste + * may have been traversed several times + */ + refcount_set(&new_ste->refcount, refcount_read(&cur_ste->refcount)); + + /* Link old STEs rule_mem list to the new ste */ + mlx5dr_rule_update_rule_member(cur_ste, new_ste); + INIT_LIST_HEAD(&new_ste->rule_list); + list_splice_tail_init(&cur_ste->rule_list, &new_ste->rule_list); +} + +static struct mlx5dr_ste * +dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *cur_ste, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste_send_info *ste_info; + bool use_update_list = false; + u8 hw_ste[DR_STE_SIZE] = {}; + struct mlx5dr_ste *new_ste; + int new_idx; + u8 sb_idx; + + /* Copy STE mask from the matcher */ + sb_idx = cur_ste->ste_chain_location - 1; + mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask); + + /* Copy STE control and tag */ + memcpy(hw_ste, cur_ste->hw_ste, DR_STE_SIZE_REDUCED); + mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr); + + new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl); + new_ste = &new_htbl->ste_arr[new_idx]; + + if (mlx5dr_ste_not_used_ste(new_ste)) { + mlx5dr_htbl_get(new_htbl); + list_add_tail(&new_ste->miss_list_node, + mlx5dr_ste_get_miss_list(new_ste)); + } else { + new_ste = dr_rule_rehash_handle_collision(matcher, + nic_matcher, + update_list, + new_ste, + hw_ste); + if (!new_ste) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed adding collision entry, index: %d\n", + new_idx); + return NULL; + } + new_htbl->ctrl.num_of_collisions++; + use_update_list = true; + } + + memcpy(new_ste->hw_ste, hw_ste, DR_STE_SIZE_REDUCED); + + new_htbl->ctrl.num_of_valid_entries++; + + if (use_update_list) { + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + goto err_exit; + + mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, + hw_ste, ste_info, + update_list, true); + } + + dr_rule_rehash_copy_ste_ctrl(matcher, nic_matcher, cur_ste, new_ste); + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); + return NULL; +} + +static int dr_rule_rehash_copy_miss_list(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct list_head *cur_miss_list, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste *tmp_ste, *cur_ste, *new_ste; + + if (list_empty(cur_miss_list)) + return 0; + + list_for_each_entry_safe(cur_ste, tmp_ste, cur_miss_list, miss_list_node) { + new_ste = dr_rule_rehash_copy_ste(matcher, + nic_matcher, + cur_ste, + new_htbl, + update_list); + if (!new_ste) + goto err_insert; + + list_del(&cur_ste->miss_list_node); + mlx5dr_htbl_put(cur_ste->htbl); + } + return 0; + +err_insert: + mlx5dr_err(matcher->tbl->dmn, "Fatal error during resize\n"); + WARN_ON(true); + return -EINVAL; +} + +static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_htbl *cur_htbl, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste *cur_ste; + int cur_entries; + int err = 0; + int i; + + cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk_size); + + if (cur_entries < 1) { + mlx5dr_dbg(matcher->tbl->dmn, "Invalid number of entries\n"); + return -EINVAL; + } + + for (i = 0; i < cur_entries; i++) { + cur_ste = &cur_htbl->ste_arr[i]; + if (mlx5dr_ste_not_used_ste(cur_ste)) /* Empty, nothing to copy */ + continue; + + err = dr_rule_rehash_copy_miss_list(matcher, + nic_matcher, + mlx5dr_ste_get_miss_list(cur_ste), + new_htbl, + update_list); + if (err) + goto clean_copy; + } + +clean_copy: + return err; +} + +static struct mlx5dr_ste_htbl * +dr_rule_rehash_htbl(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste_htbl *cur_htbl, + u8 ste_location, + struct list_head *update_list, + enum mlx5dr_icm_chunk_size new_size) +{ + struct mlx5dr_ste_send_info *del_ste_info, *tmp_ste_info; + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_ste_send_info *ste_info; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_domain_rx_tx *nic_dmn; + u8 formatted_ste[DR_STE_SIZE] = {}; + LIST_HEAD(rehash_table_send_list); + struct mlx5dr_ste *ste_to_update; + struct mlx5dr_ste_htbl *new_htbl; + int err; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + return NULL; + + new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + new_size, + cur_htbl->lu_type, + cur_htbl->byte_mask); + if (!new_htbl) { + mlx5dr_err(dmn, "Failed to allocate new hash table\n"); + goto free_ste_info; + } + + /* Write new table to HW */ + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr; + mlx5dr_ste_set_formatted_ste(dmn->info.caps.gvmi, + nic_dmn, + new_htbl, + formatted_ste, + &info); + + new_htbl->pointing_ste = cur_htbl->pointing_ste; + new_htbl->pointing_ste->next_htbl = new_htbl; + err = dr_rule_rehash_copy_htbl(matcher, + nic_matcher, + cur_htbl, + new_htbl, + &rehash_table_send_list); + if (err) + goto free_new_htbl; + + if (mlx5dr_send_postsend_htbl(dmn, new_htbl, formatted_ste, + nic_matcher->ste_builder[ste_location - 1].bit_mask)) { + mlx5dr_err(dmn, "Failed writing table to HW\n"); + goto free_new_htbl; + } + + /* Writing to the hw is done in regular order of rehash_table_send_list, + * in order to have the origin data written before the miss address of + * collision entries, if exists. + */ + if (dr_rule_send_update_list(&rehash_table_send_list, dmn, false)) { + mlx5dr_err(dmn, "Failed updating table to HW\n"); + goto free_ste_list; + } + + /* Connect previous hash table to current */ + if (ste_location == 1) { + /* The previous table is an anchor, anchors size is always one STE */ + struct mlx5dr_ste_htbl *prev_htbl = cur_htbl->pointing_ste->htbl; + + /* On matcher s_anchor we keep an extra refcount */ + mlx5dr_htbl_get(new_htbl); + mlx5dr_htbl_put(cur_htbl); + + nic_matcher->s_htbl = new_htbl; + + /* It is safe to operate dr_ste_set_hit_addr on the hw_ste here + * (48B len) which works only on first 32B + */ + mlx5dr_ste_set_hit_addr(prev_htbl->ste_arr[0].hw_ste, + new_htbl->chunk->icm_addr, + new_htbl->chunk->num_of_entries); + + ste_to_update = &prev_htbl->ste_arr[0]; + } else { + mlx5dr_ste_set_hit_addr_by_next_htbl(cur_htbl->pointing_ste->hw_ste, + new_htbl); + ste_to_update = cur_htbl->pointing_ste; + } + + mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_REDUCED, + 0, ste_to_update->hw_ste, ste_info, + update_list, false); + + return new_htbl; + +free_ste_list: + /* Clean all ste_info's from the new table */ + list_for_each_entry_safe(del_ste_info, tmp_ste_info, + &rehash_table_send_list, send_list) { + list_del(&del_ste_info->send_list); + kfree(del_ste_info); + } + +free_new_htbl: + mlx5dr_ste_htbl_free(new_htbl); +free_ste_info: + kfree(ste_info); + mlx5dr_info(dmn, "Failed creating rehash table\n"); + return NULL; +} + +static struct mlx5dr_ste_htbl *dr_rule_rehash(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste_htbl *cur_htbl, + u8 ste_location, + struct list_head *update_list) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + enum mlx5dr_icm_chunk_size new_size; + + new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk_size); + new_size = min_t(u32, new_size, dmn->info.max_log_sw_icm_sz); + + if (new_size == cur_htbl->chunk_size) + return NULL; /* Skip rehash, we already at the max size */ + + return dr_rule_rehash_htbl(rule, nic_rule, cur_htbl, ste_location, + update_list, new_size); +} + +static struct mlx5dr_ste * +dr_rule_handle_collision(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *hw_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_ste_send_info *ste_info; + struct mlx5dr_ste *new_ste; + + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + return NULL; + + new_ste = dr_rule_create_collision_entry(matcher, nic_matcher, hw_ste, ste); + if (!new_ste) + goto free_send_info; + + if (dr_rule_append_to_miss_list(new_ste, miss_list, send_list)) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed to update prev miss_list\n"); + goto err_exit; + } + + mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, hw_ste, + ste_info, send_list, false); + + ste->htbl->ctrl.num_of_collisions++; + ste->htbl->ctrl.num_of_valid_entries++; + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); +free_send_info: + kfree(ste_info); + return NULL; +} + +static void dr_rule_remove_action_members(struct mlx5dr_rule *rule) +{ + struct mlx5dr_rule_action_member *action_mem; + struct mlx5dr_rule_action_member *tmp; + + list_for_each_entry_safe(action_mem, tmp, &rule->rule_actions_list, list) { + list_del(&action_mem->list); + refcount_dec(&action_mem->action->refcount); + kvfree(action_mem); + } +} + +static int dr_rule_add_action_members(struct mlx5dr_rule *rule, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_rule_action_member *action_mem; + int i; + + for (i = 0; i < num_actions; i++) { + action_mem = kvzalloc(sizeof(*action_mem), GFP_KERNEL); + if (!action_mem) + goto free_action_members; + + action_mem->action = actions[i]; + INIT_LIST_HEAD(&action_mem->list); + list_add_tail(&action_mem->list, &rule->rule_actions_list); + refcount_inc(&action_mem->action->refcount); + } + + return 0; + +free_action_members: + dr_rule_remove_action_members(rule); + return -ENOMEM; +} + +/* While the pointer of ste is no longer valid, like while moving ste to be + * the first in the miss_list, and to be in the origin table, + * all rule-members that are attached to this ste should update their ste member + * to the new pointer + */ +void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *ste, + struct mlx5dr_ste *new_ste) +{ + struct mlx5dr_rule_member *rule_mem; + + if (!list_empty(&ste->rule_list)) + list_for_each_entry(rule_mem, &ste->rule_list, use_ste_list) + rule_mem->ste = new_ste; +} + +static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule) +{ + struct mlx5dr_rule_member *rule_mem; + struct mlx5dr_rule_member *tmp_mem; + + if (list_empty(&nic_rule->rule_members_list)) + return; + list_for_each_entry_safe(rule_mem, tmp_mem, &nic_rule->rule_members_list, list) { + list_del(&rule_mem->list); + list_del(&rule_mem->use_ste_list); + mlx5dr_ste_put(rule_mem->ste, rule->matcher, nic_rule->nic_matcher); + kvfree(rule_mem); + } +} + +static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn) +{ + struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl; + + if (dmn->info.max_log_sw_icm_sz <= htbl->chunk_size) + return false; + + if (!ctrl->may_grow) + return false; + + if (ctrl->num_of_collisions >= ctrl->increase_threshold && + (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= ctrl->increase_threshold) + return true; + + return false; +} + +static int dr_rule_add_member(struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste *ste) +{ + struct mlx5dr_rule_member *rule_mem; + + rule_mem = kvzalloc(sizeof(*rule_mem), GFP_KERNEL); + if (!rule_mem) + return -ENOMEM; + + rule_mem->ste = ste; + list_add_tail(&rule_mem->list, &nic_rule->rule_members_list); + + list_add_tail(&rule_mem->use_ste_list, &ste->rule_list); + + return 0; +} + +static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct list_head *send_ste_list, + struct mlx5dr_ste *last_ste, + u8 *hw_ste_arr, + u32 new_hw_ste_arr_sz) +{ + struct mlx5dr_matcher_rx_tx *nic_matcher = nic_rule->nic_matcher; + struct mlx5dr_ste_send_info *ste_info_arr[DR_ACTION_MAX_STES]; + u8 num_of_builders = nic_matcher->num_of_builders; + struct mlx5dr_matcher *matcher = rule->matcher; + u8 *curr_hw_ste, *prev_hw_ste; + struct mlx5dr_ste *action_ste; + int i, k, ret; + + /* Two cases: + * 1. num_of_builders is equal to new_hw_ste_arr_sz, the action in the ste + * 2. num_of_builders is less then new_hw_ste_arr_sz, new ste was added + * to support the action. + */ + if (num_of_builders == new_hw_ste_arr_sz) + return 0; + + for (i = num_of_builders, k = 0; i < new_hw_ste_arr_sz; i++, k++) { + curr_hw_ste = hw_ste_arr + i * DR_STE_SIZE; + prev_hw_ste = (i == 0) ? curr_hw_ste : hw_ste_arr + ((i - 1) * DR_STE_SIZE); + action_ste = dr_rule_create_collision_htbl(matcher, + nic_matcher, + curr_hw_ste); + if (!action_ste) + return -ENOMEM; + + mlx5dr_ste_get(action_ste); + + /* While free ste we go over the miss list, so add this ste to the list */ + list_add_tail(&action_ste->miss_list_node, + mlx5dr_ste_get_miss_list(action_ste)); + + ste_info_arr[k] = kzalloc(sizeof(*ste_info_arr[k]), + GFP_KERNEL); + if (!ste_info_arr[k]) + goto err_exit; + + /* Point current ste to the new action */ + mlx5dr_ste_set_hit_addr_by_next_htbl(prev_hw_ste, action_ste->htbl); + ret = dr_rule_add_member(nic_rule, action_ste); + if (ret) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed adding rule member\n"); + goto free_ste_info; + } + mlx5dr_send_fill_and_append_ste_send_info(action_ste, DR_STE_SIZE, 0, + curr_hw_ste, + ste_info_arr[k], + send_ste_list, false); + } + + return 0; + +free_ste_info: + kfree(ste_info_arr[k]); +err_exit: + mlx5dr_ste_put(action_ste, matcher, nic_matcher); + return -ENOMEM; +} + +static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_htbl *cur_htbl, + struct mlx5dr_ste *ste, + u8 ste_location, + u8 *hw_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_ste_send_info *ste_info; + + /* Take ref on table, only on first time this ste is used */ + mlx5dr_htbl_get(cur_htbl); + + /* new entry -> new branch */ + list_add_tail(&ste->miss_list_node, miss_list); + + mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr); + + ste->ste_chain_location = ste_location; + + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + goto clean_ste_setting; + + if (mlx5dr_ste_create_next_htbl(matcher, + nic_matcher, + ste, + hw_ste, + DR_CHUNK_SIZE_1)) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n"); + goto clean_ste_info; + } + + cur_htbl->ctrl.num_of_valid_entries++; + + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, 0, hw_ste, + ste_info, send_list, false); + + return 0; + +clean_ste_info: + kfree(ste_info); +clean_ste_setting: + list_del_init(&ste->miss_list_node); + mlx5dr_htbl_put(cur_htbl); + + return -ENOMEM; +} + +static struct mlx5dr_ste * +dr_rule_handle_ste_branch(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *cur_htbl, + u8 *hw_ste, + u8 ste_location, + struct mlx5dr_ste_htbl **put_htbl) +{ + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_domain_rx_tx *nic_dmn; + struct mlx5dr_ste_htbl *new_htbl; + struct mlx5dr_ste *matched_ste; + struct list_head *miss_list; + bool skip_rehash = false; + struct mlx5dr_ste *ste; + int index; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + +again: + index = mlx5dr_ste_calc_hash_index(hw_ste, cur_htbl); + miss_list = &cur_htbl->chunk->miss_list[index]; + ste = &cur_htbl->ste_arr[index]; + + if (mlx5dr_ste_not_used_ste(ste)) { + if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl, + ste, ste_location, + hw_ste, miss_list, + send_ste_list)) + return NULL; + } else { + /* Hash table index in use, check if this ste is in the miss list */ + matched_ste = dr_rule_find_ste_in_miss_list(miss_list, hw_ste); + if (matched_ste) { + /* If it is last STE in the chain, and has the same tag + * it means that all the previous stes are the same, + * if so, this rule is duplicated. + */ + if (mlx5dr_ste_is_last_in_rule(nic_matcher, + matched_ste->ste_chain_location)) { + mlx5dr_info(dmn, "Duplicate rule inserted, aborting!!\n"); + return NULL; + } + return matched_ste; + } + + if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) { + /* Hash table index in use, try to resize of the hash */ + skip_rehash = true; + + /* Hold the table till we update. + * Release in dr_rule_create_rule() + */ + *put_htbl = cur_htbl; + mlx5dr_htbl_get(cur_htbl); + + new_htbl = dr_rule_rehash(rule, nic_rule, cur_htbl, + ste_location, send_ste_list); + if (!new_htbl) { + mlx5dr_htbl_put(cur_htbl); + mlx5dr_info(dmn, "failed creating rehash table, htbl-log_size: %d\n", + cur_htbl->chunk_size); + } else { + cur_htbl = new_htbl; + } + goto again; + } else { + /* Hash table index in use, add another collision (miss) */ + ste = dr_rule_handle_collision(matcher, + nic_matcher, + ste, + hw_ste, + miss_list, + send_ste_list); + if (!ste) { + mlx5dr_dbg(dmn, "failed adding collision entry, index: %d\n", + index); + return NULL; + } + } + } + return ste; +} + +static bool dr_rule_cmp_value_to_mask(u8 *mask, u8 *value, + u32 s_idx, u32 e_idx) +{ + u32 i; + + for (i = s_idx; i < e_idx; i++) { + if (value[i] & ~mask[i]) { + pr_info("Rule parameters contains a value not specified by mask\n"); + return false; + } + } + return true; +} + +static bool dr_rule_verify(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + struct mlx5dr_match_param *param) +{ + u8 match_criteria = matcher->match_criteria; + size_t value_size = value->match_sz; + u8 *mask_p = (u8 *)&matcher->mask; + u8 *param_p = (u8 *)param; + u32 s_idx, e_idx; + + if (!value_size || + (value_size > sizeof(struct mlx5dr_match_param) || + (value_size % sizeof(u32)))) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule parameters length is incorrect\n"); + return false; + } + + mlx5dr_ste_copy_param(matcher->match_criteria, param, value); + + if (match_criteria & DR_MATCHER_CRITERIA_OUTER) { + s_idx = offsetof(struct mlx5dr_match_param, outer); + e_idx = min(s_idx + sizeof(param->outer), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { + s_idx = offsetof(struct mlx5dr_match_param, misc); + e_idx = min(s_idx + sizeof(param->misc), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_INNER) { + s_idx = offsetof(struct mlx5dr_match_param, inner); + e_idx = min(s_idx + sizeof(param->inner), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC2) { + s_idx = offsetof(struct mlx5dr_match_param, misc2); + e_idx = min(s_idx + sizeof(param->misc2), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC3) { + s_idx = offsetof(struct mlx5dr_match_param, misc3); + e_idx = min(s_idx + sizeof(param->misc3), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n"); + return false; + } + } + return true; +} + +static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule) +{ + dr_rule_clean_rule_members(rule, nic_rule); + return 0; +} + +static int dr_rule_destroy_rule_fdb(struct mlx5dr_rule *rule) +{ + dr_rule_destroy_rule_nic(rule, &rule->rx); + dr_rule_destroy_rule_nic(rule, &rule->tx); + return 0; +} + +static int dr_rule_destroy_rule(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_rule_destroy_rule_nic(rule, &rule->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_rule_destroy_rule_nic(rule, &rule->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_rule_destroy_rule_fdb(rule); + break; + default: + return -EINVAL; + } + + dr_rule_remove_action_members(rule); + kfree(rule); + return 0; +} + +static bool dr_rule_is_ipv6(struct mlx5dr_match_param *param) +{ + return (param->outer.ip_version == 6 || + param->inner.ip_version == 6 || + param->outer.ethertype == ETH_P_IPV6 || + param->inner.ethertype == ETH_P_IPV6); +} + +static bool dr_rule_skip(enum mlx5dr_domain_type domain, + enum mlx5dr_ste_entry_type ste_type, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value) +{ + if (domain != MLX5DR_DOMAIN_TYPE_FDB) + return false; + + if (mask->misc.source_port) { + if (ste_type == MLX5DR_STE_TYPE_RX) + if (value->misc.source_port != WIRE_PORT) + return true; + + if (ste_type == MLX5DR_STE_TYPE_TX) + if (value->misc.source_port == WIRE_PORT) + return true; + } + + /* Metadata C can be used to describe the source vport */ + if (mask->misc2.metadata_reg_c_0) { + if (ste_type == MLX5DR_STE_TYPE_RX) + if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) != WIRE_PORT) + return true; + + if (ste_type == MLX5DR_STE_TYPE_TX) + if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) == WIRE_PORT) + return true; + } + return false; +} + +static int +dr_rule_create_rule_nic(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_match_param *param, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info; + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_domain_rx_tx *nic_dmn; + struct mlx5dr_ste_htbl *htbl = NULL; + struct mlx5dr_ste_htbl *cur_htbl; + struct mlx5dr_ste *ste = NULL; + LIST_HEAD(send_ste_list); + u8 *hw_ste_arr = NULL; + u32 new_hw_ste_arr_sz; + int ret, i; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + + INIT_LIST_HEAD(&nic_rule->rule_members_list); + + if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param)) + return 0; + + ret = mlx5dr_matcher_select_builders(matcher, + nic_matcher, + dr_rule_is_ipv6(param)); + if (ret) + goto out_err; + + hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL); + if (!hw_ste_arr) { + ret = -ENOMEM; + goto out_err; + } + + /* Set the tag values inside the ste array */ + ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr); + if (ret) + goto free_hw_ste; + + /* Set the actions values/addresses inside the ste array */ + ret = mlx5dr_actions_build_ste_arr(matcher, nic_matcher, actions, + num_actions, hw_ste_arr, + &new_hw_ste_arr_sz); + if (ret) + goto free_hw_ste; + + cur_htbl = nic_matcher->s_htbl; + + /* Go over the array of STEs, and build dr_ste accordingly. + * The loop is over only the builders which are equal or less to the + * number of stes, in case we have actions that lives in other stes. + */ + for (i = 0; i < nic_matcher->num_of_builders; i++) { + /* Calculate CRC and keep new ste entry */ + u8 *cur_hw_ste_ent = hw_ste_arr + (i * DR_STE_SIZE); + + ste = dr_rule_handle_ste_branch(rule, + nic_rule, + &send_ste_list, + cur_htbl, + cur_hw_ste_ent, + i + 1, + &htbl); + if (!ste) { + mlx5dr_err(dmn, "Failed creating next branch\n"); + ret = -ENOENT; + goto free_rule; + } + + cur_htbl = ste->next_htbl; + + /* Keep all STEs in the rule struct */ + ret = dr_rule_add_member(nic_rule, ste); + if (ret) { + mlx5dr_dbg(dmn, "Failed adding rule member index %d\n", i); + goto free_ste; + } + + mlx5dr_ste_get(ste); + } + + /* Connect actions */ + ret = dr_rule_handle_action_stes(rule, nic_rule, &send_ste_list, + ste, hw_ste_arr, new_hw_ste_arr_sz); + if (ret) { + mlx5dr_dbg(dmn, "Failed apply actions\n"); + goto free_rule; + } + ret = dr_rule_send_update_list(&send_ste_list, dmn, true); + if (ret) { + mlx5dr_err(dmn, "Failed sending ste!\n"); + goto free_rule; + } + + if (htbl) + mlx5dr_htbl_put(htbl); + + return 0; + +free_ste: + mlx5dr_ste_put(ste, matcher, nic_matcher); +free_rule: + dr_rule_clean_rule_members(rule, nic_rule); + /* Clean all ste_info's */ + list_for_each_entry_safe(ste_info, tmp_ste_info, &send_ste_list, send_list) { + list_del(&ste_info->send_list); + kfree(ste_info); + } +free_hw_ste: + kfree(hw_ste_arr); +out_err: + return ret; +} + +static int +dr_rule_create_rule_fdb(struct mlx5dr_rule *rule, + struct mlx5dr_match_param *param, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_match_param copy_param = {}; + int ret; + + /* Copy match_param since they will be consumed during the first + * nic_rule insertion. + */ + memcpy(©_param, param, sizeof(struct mlx5dr_match_param)); + + ret = dr_rule_create_rule_nic(rule, &rule->rx, param, + num_actions, actions); + if (ret) + return ret; + + ret = dr_rule_create_rule_nic(rule, &rule->tx, ©_param, + num_actions, actions); + if (ret) + goto destroy_rule_nic_rx; + + return 0; + +destroy_rule_nic_rx: + dr_rule_destroy_rule_nic(rule, &rule->rx); + return ret; +} + +static struct mlx5dr_rule * +dr_rule_create_rule(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_match_param param = {}; + struct mlx5dr_rule *rule; + int ret; + + if (!dr_rule_verify(matcher, value, ¶m)) + return NULL; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return NULL; + + rule->matcher = matcher; + INIT_LIST_HEAD(&rule->rule_actions_list); + + ret = dr_rule_add_action_members(rule, num_actions, actions); + if (ret) + goto free_rule; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + rule->rx.nic_matcher = &matcher->rx; + ret = dr_rule_create_rule_nic(rule, &rule->rx, ¶m, + num_actions, actions); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + rule->tx.nic_matcher = &matcher->tx; + ret = dr_rule_create_rule_nic(rule, &rule->tx, ¶m, + num_actions, actions); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + rule->rx.nic_matcher = &matcher->rx; + rule->tx.nic_matcher = &matcher->tx; + ret = dr_rule_create_rule_fdb(rule, ¶m, + num_actions, actions); + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + goto remove_action_members; + + return rule; + +remove_action_members: + dr_rule_remove_action_members(rule); +free_rule: + kfree(rule); + mlx5dr_info(dmn, "Failed creating rule\n"); + return NULL; +} + +struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_rule *rule; + + mutex_lock(&matcher->tbl->dmn->mutex); + refcount_inc(&matcher->refcount); + + rule = dr_rule_create_rule(matcher, value, num_actions, actions); + if (!rule) + refcount_dec(&matcher->refcount); + + mutex_unlock(&matcher->tbl->dmn->mutex); + + return rule; +} + +int mlx5dr_rule_destroy(struct mlx5dr_rule *rule) +{ + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_table *tbl = rule->matcher->tbl; + int ret; + + mutex_lock(&tbl->dmn->mutex); + + ret = dr_rule_destroy_rule(rule); + + mutex_unlock(&tbl->dmn->mutex); + + if (!ret) + refcount_dec(&matcher->refcount); + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c new file mode 100644 index 000000000000..ef0dea44f3b3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -0,0 +1,976 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +#define QUEUE_SIZE 128 +#define SIGNAL_PER_DIV_QUEUE 16 +#define TH_NUMS_TO_DRAIN 2 + +enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; + +struct dr_data_seg { + u64 addr; + u32 length; + u32 lkey; + unsigned int send_flags; +}; + +struct postsend_info { + struct dr_data_seg write; + struct dr_data_seg read; + u64 remote_addr; + u32 rkey; +}; + +struct dr_qp_rtr_attr { + struct mlx5dr_cmd_gid_attr dgid_attr; + enum ib_mtu mtu; + u32 qp_num; + u16 port_num; + u8 min_rnr_timer; + u8 sgid_index; + u16 udp_src_port; +}; + +struct dr_qp_rts_attr { + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; +}; + +struct dr_qp_init_attr { + u32 cqn; + u32 pdn; + u32 max_send_wr; + struct mlx5_uars_page *uar; +}; + +static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) +{ + unsigned int idx; + u8 opcode; + + opcode = get_cqe_opcode(cqe64); + if (opcode == MLX5_CQE_REQ_ERR) { + idx = be16_to_cpu(cqe64->wqe_counter) & + (dr_cq->qp->sq.wqe_cnt - 1); + dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; + } else if (opcode == MLX5_CQE_RESP_ERR) { + ++dr_cq->qp->sq.cc; + } else { + idx = be16_to_cpu(cqe64->wqe_counter) & + (dr_cq->qp->sq.wqe_cnt - 1); + dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; + + return CQ_OK; + } + + return CQ_POLL_ERR; +} + +static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq) +{ + struct mlx5_cqe64 *cqe64; + int err; + + cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq); + if (!cqe64) + return CQ_EMPTY; + + mlx5_cqwq_pop(&dr_cq->wq); + err = dr_parse_cqe(dr_cq, cqe64); + mlx5_cqwq_update_db_record(&dr_cq->wq); + + return err; +} + +static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) +{ + int npolled; + int err = 0; + + for (npolled = 0; npolled < ne; ++npolled) { + err = dr_cq_poll_one(dr_cq); + if (err != CQ_OK) + break; + } + + return err == CQ_POLL_ERR ? err : npolled; +} + +static void dr_qp_event(struct mlx5_core_qp *mqp, int event) +{ + pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn); +} + +static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, + struct dr_qp_init_attr *attr) +{ + u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; + struct mlx5_wq_param wqp; + struct mlx5dr_qp *dr_qp; + int inlen; + void *qpc; + void *in; + int err; + + dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL); + if (!dr_qp) + return NULL; + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + dr_qp->rq.pc = 0; + dr_qp->rq.cc = 0; + dr_qp->rq.wqe_cnt = 4; + dr_qp->sq.pc = 0; + dr_qp->sq.cc = 0; + dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); + + MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); + MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); + err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, + &dr_qp->wq_ctrl); + if (err) { + mlx5_core_info(mdev, "Can't create QP WQ\n"); + goto err_wq; + } + + dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt, + sizeof(dr_qp->sq.wqe_head[0]), + GFP_KERNEL); + + if (!dr_qp->sq.wqe_head) { + mlx5_core_warn(mdev, "Can't allocate wqe head\n"); + goto err_wqe_head; + } + + inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * + dr_qp->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_in; + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, attr->pdn); + MLX5_SET(qpc, qpc, uar_page, attr->uar->index); + MLX5_SET(qpc, qpc, log_page_size, + dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); + MLX5_SET(qpc, qpc, cqn_snd, attr->cqn); + MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn); + MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); + MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); + if (MLX5_CAP_GEN(mdev, cqe_version) == 1) + MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); + mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, + in, pas)); + + err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen); + kfree(in); + + if (err) { + mlx5_core_warn(mdev, " Can't create QP\n"); + goto err_in; + } + dr_qp->mqp.event = dr_qp_event; + dr_qp->uar = attr->uar; + + return dr_qp; + +err_in: + kfree(dr_qp->sq.wqe_head); +err_wqe_head: + mlx5_wq_destroy(&dr_qp->wq_ctrl); +err_wq: + kfree(dr_qp); + return NULL; +} + +static void dr_destroy_qp(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp) +{ + mlx5_core_destroy_qp(mdev, &dr_qp->mqp); + kfree(dr_qp->sq.wqe_head); + mlx5_wq_destroy(&dr_qp->wq_ctrl); + kfree(dr_qp); +} + +static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) +{ + dma_wmb(); + *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff); + + /* After wmb() the hw aware of new work */ + wmb(); + + mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); +} + +static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, + u32 rkey, struct dr_data_seg *data_seg, + u32 opcode, int nreq) +{ + struct mlx5_wqe_raddr_seg *wq_raddr; + struct mlx5_wqe_ctrl_seg *wq_ctrl; + struct mlx5_wqe_data_seg *wq_dseg; + unsigned int size; + unsigned int idx; + + size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 + + sizeof(*wq_raddr) / 16; + + idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); + + wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); + wq_ctrl->imm = 0; + wq_ctrl->fm_ce_se = (data_seg->send_flags) ? + MLX5_WQE_CTRL_CQ_UPDATE : 0; + wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) | + opcode); + wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8); + wq_raddr = (void *)(wq_ctrl + 1); + wq_raddr->raddr = cpu_to_be64(remote_addr); + wq_raddr->rkey = cpu_to_be32(rkey); + wq_raddr->reserved = 0; + + wq_dseg = (void *)(wq_raddr + 1); + wq_dseg->byte_count = cpu_to_be32(data_seg->length); + wq_dseg->lkey = cpu_to_be32(data_seg->lkey); + wq_dseg->addr = cpu_to_be64(data_seg->addr); + + dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++; + + if (nreq) + dr_cmd_notify_hw(dr_qp, wq_ctrl); +} + +static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) +{ + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0); + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->read, MLX5_OPCODE_RDMA_READ, 1); +} + +/** + * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent + * with send_list parameters: + * + * @ste: The data that attached to this specific ste + * @size: of data to write + * @offset: of the data from start of the hw_ste entry + * @data: data + * @ste_info: ste to be sent with send_list + * @send_list: to append into it + * @copy_data: if true indicates that the data should be kept because + * it's not backuped any where (like in re-hash). + * if false, it lets the data to be updated after + * it was added to the list. + */ +void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, + u16 offset, u8 *data, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_list, + bool copy_data) +{ + ste_info->size = size; + ste_info->ste = ste; + ste_info->offset = offset; + + if (copy_data) { + memcpy(ste_info->data_cont, data, size); + ste_info->data = ste_info->data_cont; + } else { + ste_info->data = data; + } + + list_add_tail(&ste_info->send_list, send_list); +} + +/* The function tries to consume one wc each time, unless the queue is full, in + * that case, which means that the hw is behind the sw in a full queue len + * the function will drain the cq till it empty. + */ +static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring) +{ + bool is_drain = false; + int ne; + + if (send_ring->pending_wqe < send_ring->signal_th) + return 0; + + /* Queue is full start drain it */ + if (send_ring->pending_wqe >= + dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN) + is_drain = true; + + do { + ne = dr_poll_cq(send_ring->cq, 1); + if (ne < 0) + return ne; + else if (ne == 1) + send_ring->pending_wqe -= send_ring->signal_th; + } while (is_drain && send_ring->pending_wqe); + + return 0; +} + +static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring, + struct postsend_info *send_info) +{ + send_ring->pending_wqe++; + + if (send_ring->pending_wqe % send_ring->signal_th == 0) + send_info->write.send_flags |= IB_SEND_SIGNALED; + + send_ring->pending_wqe++; + send_info->read.length = send_info->write.length; + /* Read into the same write area */ + send_info->read.addr = (uintptr_t)send_info->write.addr; + send_info->read.lkey = send_ring->mr->mkey.key; + + if (send_ring->pending_wqe % send_ring->signal_th == 0) + send_info->read.send_flags = IB_SEND_SIGNALED; + else + send_info->read.send_flags = 0; +} + +static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, + struct postsend_info *send_info) +{ + struct mlx5dr_send_ring *send_ring = dmn->send_ring; + u32 buff_offset; + int ret; + + ret = dr_handle_pending_wc(dmn, send_ring); + if (ret) + return ret; + + if (send_info->write.length > dmn->info.max_inline_size) { + buff_offset = (send_ring->tx_head & + (dmn->send_ring->signal_th - 1)) * + send_ring->max_post_send_size; + /* Copy to ring mr */ + memcpy(send_ring->buf + buff_offset, + (void *)(uintptr_t)send_info->write.addr, + send_info->write.length); + send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; + send_info->write.lkey = send_ring->mr->mkey.key; + } + + send_ring->tx_head++; + dr_fill_data_segs(send_ring, send_info); + dr_post_send(send_ring->qp, send_info); + + return 0; +} + +static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 **data, + u32 *byte_size, + int *iterations, + int *num_stes) +{ + int alloc_size; + + if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) { + *iterations = htbl->chunk->byte_size / + dmn->send_ring->max_post_send_size; + *byte_size = dmn->send_ring->max_post_send_size; + alloc_size = *byte_size; + *num_stes = *byte_size / DR_STE_SIZE; + } else { + *iterations = 1; + *num_stes = htbl->chunk->num_of_entries; + alloc_size = *num_stes * DR_STE_SIZE; + } + + *data = kzalloc(alloc_size, GFP_KERNEL); + if (!*data) + return -ENOMEM; + + return 0; +} + +/** + * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm. + * + * @dmn: Domain + * @ste: The ste struct that contains the data (at + * least part of it) + * @data: The real data to send size data + * @size: for writing. + * @offset: The offset from the icm mapped data to + * start write to this for write only part of the + * buffer. + * + * Return: 0 on success. + */ +int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, + u8 *data, u16 size, u16 offset) +{ + struct postsend_info send_info = {}; + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = size; + send_info.write.lkey = 0; + send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; + send_info.rkey = ste->htbl->chunk->rkey; + + return dr_postsend_icm_data(dmn, &send_info); +} + +int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, u8 *mask) +{ + u32 byte_size = htbl->chunk->byte_size; + int num_stes_per_iter; + int iterations; + u8 *data; + int ret; + int i; + int j; + + ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, + &iterations, &num_stes_per_iter); + if (ret) + return ret; + + /* Send the data iteration times */ + for (i = 0; i < iterations; i++) { + u32 ste_index = i * (byte_size / DR_STE_SIZE); + struct postsend_info send_info = {}; + + /* Copy all ste's on the data buffer + * need to add the bit_mask + */ + for (j = 0; j < num_stes_per_iter; j++) { + u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste; + u32 ste_off = j * DR_STE_SIZE; + + if (mlx5dr_ste_is_not_valid_entry(hw_ste)) { + memcpy(data + ste_off, + formatted_ste, DR_STE_SIZE); + } else { + /* Copy data */ + memcpy(data + ste_off, + htbl->ste_arr[ste_index + j].hw_ste, + DR_STE_SIZE_REDUCED); + /* Copy bit_mask */ + memcpy(data + ste_off + DR_STE_SIZE_REDUCED, + mask, DR_STE_SIZE_MASK); + } + } + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = + mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); + send_info.rkey = htbl->chunk->rkey; + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + goto out_free; + } + +out_free: + kfree(data); + return ret; +} + +/* Initialize htble with default STEs */ +int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *ste_init_data, + bool update_hw_ste) +{ + u32 byte_size = htbl->chunk->byte_size; + int iterations; + int num_stes; + u8 *data; + int ret; + int i; + + ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, + &iterations, &num_stes); + if (ret) + return ret; + + for (i = 0; i < num_stes; i++) { + u8 *copy_dst; + + /* Copy the same ste on the data buffer */ + copy_dst = data + i * DR_STE_SIZE; + memcpy(copy_dst, ste_init_data, DR_STE_SIZE); + + if (update_hw_ste) { + /* Copy the reduced ste to hash table ste_arr */ + copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; + memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); + } + } + + /* Send the data iteration times */ + for (i = 0; i < iterations; i++) { + u8 ste_index = i * (byte_size / DR_STE_SIZE); + struct postsend_info send_info = {}; + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = + mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); + send_info.rkey = htbl->chunk->rkey; + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + goto out_free; + } + +out_free: + kfree(data); + return ret; +} + +int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action) +{ + struct postsend_info send_info = {}; + int ret; + + send_info.write.addr = (uintptr_t)action->rewrite.data; + send_info.write.length = action->rewrite.chunk->byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = action->rewrite.chunk->mr_addr; + send_info.rkey = action->rewrite.chunk->rkey; + + mutex_lock(&dmn->mutex); + ret = dr_postsend_icm_data(dmn, &send_info); + mutex_unlock(&dmn->mutex); + + return ret; +} + +static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + int port) +{ + u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port); + MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); + MLX5_SET(qpc, qpc, rre, 1); + MLX5_SET(qpc, qpc, rwe, 1); + + return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc, + &dr_qp->mqp); +} + +static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + struct dr_qp_rts_attr *attr) +{ + u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); + + MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn); + + MLX5_SET(qpc, qpc, log_ack_req_freq, 0); + MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); + MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); + + return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc, + &dr_qp->mqp); +} + +static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + struct dr_qp_rtr_attr *attr) +{ + u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); + + MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn); + + MLX5_SET(qpc, qpc, mtu, attr->mtu); + MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1); + MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), + attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac)); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), + attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid)); + MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, + attr->sgid_index); + + if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2) + MLX5_SET(qpc, qpc, primary_address_path.udp_sport, + attr->udp_src_port); + + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); + MLX5_SET(qpc, qpc, min_rnr_nak, 1); + + return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc, + &dr_qp->mqp); +} + +static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; + struct dr_qp_rts_attr rts_attr = {}; + struct dr_qp_rtr_attr rtr_attr = {}; + enum ib_mtu mtu = IB_MTU_1024; + u16 gid_index = 0; + int port = 1; + int ret; + + /* Init */ + ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); + if (ret) + return ret; + + /* RTR */ + ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr); + if (ret) + return ret; + + rtr_attr.mtu = mtu; + rtr_attr.qp_num = dr_qp->mqp.qpn; + rtr_attr.min_rnr_timer = 12; + rtr_attr.port_num = port; + rtr_attr.sgid_index = gid_index; + rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; + + ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); + if (ret) + return ret; + + /* RTS */ + rts_attr.timeout = 14; + rts_attr.retry_cnt = 7; + rts_attr.rnr_retry = 7; + + ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); + if (ret) + return ret; + + return 0; +} + +static void dr_cq_event(struct mlx5_core_cq *mcq, + enum mlx5_event event) +{ + pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn); +} + +static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, + struct mlx5_uars_page *uar, + size_t ncqe) +{ + u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {}; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_wq_param wqp; + struct mlx5_cqe64 *cqe; + struct mlx5dr_cq *cq; + int inlen, err, eqn; + unsigned int irqn; + void *cqc, *in; + __be64 *pas; + u32 i; + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return NULL; + + ncqe = roundup_pow_of_two(ncqe); + MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe)); + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + goto out; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; + } + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + goto err_cqwq; + + err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); + if (err) { + kvfree(in); + goto err_cqwq; + } + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, uar_page, uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); + + cq->mcq.event = dr_cq_event; + + err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); + kvfree(in); + + if (err) + goto err_cqwq; + + cq->mcq.cqe_sz = 64; + cq->mcq.set_ci_db = cq->wq_ctrl.db.db; + cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; + *cq->mcq.set_ci_db = 0; + *cq->mcq.arm_db = 0; + cq->mcq.vector = 0; + cq->mcq.irqn = irqn; + cq->mcq.uar = uar; + + return cq; + +err_cqwq: + mlx5_wq_destroy(&cq->wq_ctrl); +out: + kfree(cq); + return NULL; +} + +static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) +{ + mlx5_core_destroy_cq(mdev, &cq->mcq); + mlx5_wq_destroy(&cq->wq_ctrl); + kfree(cq); +} + +static int +dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey) +{ + u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, a, 1); + MLX5_SET(mkc, mkc, rw, 1); + MLX5_SET(mkc, mkc, rr, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in)); +} + +static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, + u32 pdn, void *buf, size_t size) +{ + struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); + struct device *dma_device; + dma_addr_t dma_addr; + int err; + + if (!mr) + return NULL; + + dma_device = &mdev->pdev->dev; + dma_addr = dma_map_single(dma_device, buf, size, + DMA_BIDIRECTIONAL); + err = dma_mapping_error(dma_device, dma_addr); + if (err) { + mlx5_core_warn(mdev, "Can't dma buf\n"); + kfree(mr); + return NULL; + } + + err = dr_create_mkey(mdev, pdn, &mr->mkey); + if (err) { + mlx5_core_warn(mdev, "Can't create mkey\n"); + dma_unmap_single(dma_device, dma_addr, size, + DMA_BIDIRECTIONAL); + kfree(mr); + return NULL; + } + + mr->dma_addr = dma_addr; + mr->size = size; + mr->addr = buf; + + return mr; +} + +static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) +{ + mlx5_core_destroy_mkey(mdev, &mr->mkey); + dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size, + DMA_BIDIRECTIONAL); + kfree(mr); +} + +int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) +{ + struct dr_qp_init_attr init_attr = {}; + int cq_size; + int size; + int ret; + + dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL); + if (!dmn->send_ring) + return -ENOMEM; + + cq_size = QUEUE_SIZE + 1; + dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); + if (!dmn->send_ring->cq) { + ret = -ENOMEM; + goto free_send_ring; + } + + init_attr.cqn = dmn->send_ring->cq->mcq.cqn; + init_attr.pdn = dmn->pdn; + init_attr.uar = dmn->uar; + init_attr.max_send_wr = QUEUE_SIZE; + + dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); + if (!dmn->send_ring->qp) { + ret = -ENOMEM; + goto clean_cq; + } + + dmn->send_ring->cq->qp = dmn->send_ring->qp; + + dmn->info.max_send_wr = QUEUE_SIZE; + dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, + DR_STE_SIZE); + + dmn->send_ring->signal_th = dmn->info.max_send_wr / + SIGNAL_PER_DIV_QUEUE; + + /* Prepare qp to be used */ + ret = dr_prepare_qp_to_rts(dmn); + if (ret) + goto clean_qp; + + dmn->send_ring->max_post_send_size = + mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K, + DR_ICM_TYPE_STE); + + /* Allocating the max size as a buffer for writing */ + size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size; + dmn->send_ring->buf = kzalloc(size, GFP_KERNEL); + if (!dmn->send_ring->buf) { + ret = -ENOMEM; + goto clean_qp; + } + + memset(dmn->send_ring->buf, 0, size); + dmn->send_ring->buf_size = size; + + dmn->send_ring->mr = dr_reg_mr(dmn->mdev, + dmn->pdn, dmn->send_ring->buf, size); + if (!dmn->send_ring->mr) { + ret = -ENOMEM; + goto free_mem; + } + + dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, + dmn->pdn, dmn->send_ring->sync_buff, + MIN_READ_SYNC); + if (!dmn->send_ring->sync_mr) { + ret = -ENOMEM; + goto clean_mr; + } + + return 0; + +clean_mr: + dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); +free_mem: + kfree(dmn->send_ring->buf); +clean_qp: + dr_destroy_qp(dmn->mdev, dmn->send_ring->qp); +clean_cq: + dr_destroy_cq(dmn->mdev, dmn->send_ring->cq); +free_send_ring: + kfree(dmn->send_ring); + + return ret; +} + +void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring) +{ + dr_destroy_qp(dmn->mdev, send_ring->qp); + dr_destroy_cq(dmn->mdev, send_ring->cq); + dr_dereg_mr(dmn->mdev, send_ring->sync_mr); + dr_dereg_mr(dmn->mdev, send_ring->mr); + kfree(send_ring->buf); + kfree(send_ring); +} + +int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_send_ring *send_ring = dmn->send_ring; + struct postsend_info send_info = {}; + u8 data[DR_STE_SIZE]; + int num_of_sends_req; + int ret; + int i; + + /* Sending this amount of requests makes sure we will get drain */ + num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; + + /* Send fake requests forcing the last to be signaled */ + send_info.write.addr = (uintptr_t)data; + send_info.write.length = DR_STE_SIZE; + send_info.write.lkey = 0; + /* Using the sync_mr in order to write/read */ + send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; + send_info.rkey = send_ring->sync_mr->mkey.key; + + for (i = 0; i < num_of_sends_req; i++) { + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + return ret; + } + + ret = dr_handle_pending_wc(dmn, send_ring); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c new file mode 100644 index 000000000000..6b0af64536d8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -0,0 +1,2308 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/types.h> +#include "dr_types.h" + +#define DR_STE_CRC_POLY 0xEDB88320L +#define STE_IPV4 0x1 +#define STE_IPV6 0x2 +#define STE_TCP 0x1 +#define STE_UDP 0x2 +#define STE_SPI 0x3 +#define IP_VERSION_IPV4 0x4 +#define IP_VERSION_IPV6 0x6 +#define STE_SVLAN 0x1 +#define STE_CVLAN 0x2 + +#define DR_STE_ENABLE_FLOW_TAG BIT(31) + +/* Set to STE a specific value using DR_STE_SET */ +#define DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, value) do { \ + if ((spec)->s_fname) { \ + MLX5_SET(ste_##lookup_type, tag, t_fname, value); \ + (spec)->s_fname = 0; \ + } \ +} while (0) + +/* Set to STE spec->s_fname to tag->t_fname */ +#define DR_STE_SET_TAG(lookup_type, tag, t_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, spec->s_fname) + +/* Set to STE -1 to bit_mask->bm_fname and set spec->s_fname as used */ +#define DR_STE_SET_MASK(lookup_type, bit_mask, bm_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, bit_mask, bm_fname, spec, s_fname, -1) + +/* Set to STE spec->s_fname to bit_mask->bm_fname and set spec->s_fname as used */ +#define DR_STE_SET_MASK_V(lookup_type, bit_mask, bm_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, bit_mask, bm_fname, spec, s_fname, (spec)->s_fname) + +#define DR_STE_SET_TCP_FLAGS(lookup_type, tag, spec) do { \ + MLX5_SET(ste_##lookup_type, tag, tcp_ns, !!((spec)->tcp_flags & (1 << 8))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_cwr, !!((spec)->tcp_flags & (1 << 7))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_ece, !!((spec)->tcp_flags & (1 << 6))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_urg, !!((spec)->tcp_flags & (1 << 5))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_ack, !!((spec)->tcp_flags & (1 << 4))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_psh, !!((spec)->tcp_flags & (1 << 3))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_rst, !!((spec)->tcp_flags & (1 << 2))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_syn, !!((spec)->tcp_flags & (1 << 1))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_fin, !!((spec)->tcp_flags & (1 << 0))); \ +} while (0) + +#define DR_STE_SET_MPLS_MASK(lookup_type, mask, in_out, bit_mask) do { \ + DR_STE_SET_MASK_V(lookup_type, mask, mpls0_label, mask, \ + in_out##_first_mpls_label);\ + DR_STE_SET_MASK_V(lookup_type, mask, mpls0_s_bos, mask, \ + in_out##_first_mpls_s_bos); \ + DR_STE_SET_MASK_V(lookup_type, mask, mpls0_exp, mask, \ + in_out##_first_mpls_exp); \ + DR_STE_SET_MASK_V(lookup_type, mask, mpls0_ttl, mask, \ + in_out##_first_mpls_ttl); \ +} while (0) + +#define DR_STE_SET_MPLS_TAG(lookup_type, mask, in_out, tag) do { \ + DR_STE_SET_TAG(lookup_type, tag, mpls0_label, mask, \ + in_out##_first_mpls_label);\ + DR_STE_SET_TAG(lookup_type, tag, mpls0_s_bos, mask, \ + in_out##_first_mpls_s_bos); \ + DR_STE_SET_TAG(lookup_type, tag, mpls0_exp, mask, \ + in_out##_first_mpls_exp); \ + DR_STE_SET_TAG(lookup_type, tag, mpls0_ttl, mask, \ + in_out##_first_mpls_ttl); \ +} while (0) + +#define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\ + (_misc)->outer_first_mpls_over_gre_label || \ + (_misc)->outer_first_mpls_over_gre_exp || \ + (_misc)->outer_first_mpls_over_gre_s_bos || \ + (_misc)->outer_first_mpls_over_gre_ttl) +#define DR_STE_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\ + (_misc)->outer_first_mpls_over_udp_label || \ + (_misc)->outer_first_mpls_over_udp_exp || \ + (_misc)->outer_first_mpls_over_udp_s_bos || \ + (_misc)->outer_first_mpls_over_udp_ttl) + +#define DR_STE_CALC_LU_TYPE(lookup_type, rx, inner) \ + ((inner) ? MLX5DR_STE_LU_TYPE_##lookup_type##_I : \ + (rx) ? MLX5DR_STE_LU_TYPE_##lookup_type##_D : \ + MLX5DR_STE_LU_TYPE_##lookup_type##_O) + +enum dr_ste_tunl_action { + DR_STE_TUNL_ACTION_NONE = 0, + DR_STE_TUNL_ACTION_ENABLE = 1, + DR_STE_TUNL_ACTION_DECAP = 2, + DR_STE_TUNL_ACTION_L3_DECAP = 3, + DR_STE_TUNL_ACTION_POP_VLAN = 4, +}; + +enum dr_ste_action_type { + DR_STE_ACTION_TYPE_PUSH_VLAN = 1, + DR_STE_ACTION_TYPE_ENCAP_L3 = 3, + DR_STE_ACTION_TYPE_ENCAP = 4, +}; + +struct dr_hw_ste_format { + u8 ctrl[DR_STE_SIZE_CTRL]; + u8 tag[DR_STE_SIZE_TAG]; + u8 mask[DR_STE_SIZE_MASK]; +}; + +u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 masked[DR_STE_SIZE_TAG] = {}; + u32 crc32, index; + u16 bit; + int i; + + /* Don't calculate CRC if the result is predicted */ + if (htbl->chunk->num_of_entries == 1 || htbl->byte_mask == 0) + return 0; + + /* Mask tag using byte mask, bit per byte */ + bit = 1 << (DR_STE_SIZE_TAG - 1); + for (i = 0; i < DR_STE_SIZE_TAG; i++) { + if (htbl->byte_mask & bit) + masked[i] = hw_ste->tag[i]; + + bit = bit >> 1; + } + + crc32 = mlx5dr_crc32_slice8_calc(masked, DR_STE_SIZE_TAG); + index = crc32 & (htbl->chunk->num_of_entries - 1); + + return index; +} + +static u16 dr_ste_conv_bit_to_byte_mask(u8 *bit_mask) +{ + u16 byte_mask = 0; + int i; + + for (i = 0; i < DR_STE_SIZE_MASK; i++) { + byte_mask = byte_mask << 1; + if (bit_mask[i] == 0xff) + byte_mask |= 1; + } + return byte_mask; +} + +void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + + memcpy(hw_ste->mask, bit_mask, DR_STE_SIZE_MASK); +} + +void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, qp_list_pointer, + DR_STE_ENABLE_FLOW_TAG | flow_tag); +} + +void mlx5dr_ste_set_counter_id(u8 *hw_ste_p, u32 ctr_id) +{ + /* This can be used for both rx_steering_mult and for sx_transmit */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_15_0, ctr_id); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_23_16, ctr_id >> 16); +} + +void mlx5dr_ste_set_go_back_bit(u8 *hw_ste_p) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, go_back, 1); +} + +void mlx5dr_ste_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr, + bool go_back) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_type, + DR_STE_ACTION_TYPE_PUSH_VLAN); + MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr); + /* Due to HW limitation we need to set this bit, otherwise reforamt + + * push vlan will not work. + */ + if (go_back) + mlx5dr_ste_set_go_back_bit(hw_ste_p); +} + +void mlx5dr_ste_set_tx_encap(void *hw_ste_p, u32 reformat_id, int size, bool encap_l3) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_type, + encap_l3 ? DR_STE_ACTION_TYPE_ENCAP_L3 : DR_STE_ACTION_TYPE_ENCAP); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_description, size / 2); + MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, reformat_id); +} + +void mlx5dr_ste_set_rx_decap(u8 *hw_ste_p) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_DECAP); +} + +void mlx5dr_ste_set_rx_pop_vlan(u8 *hw_ste_p) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_POP_VLAN); +} + +void mlx5dr_ste_set_rx_decap_l3(u8 *hw_ste_p, bool vlan) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_L3_DECAP); + MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0); +} + +void mlx5dr_ste_set_entry_type(u8 *hw_ste_p, u8 entry_type) +{ + MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type); +} + +u8 mlx5dr_ste_get_entry_type(u8 *hw_ste_p) +{ + return MLX5_GET(ste_general, hw_ste_p, entry_type); +} + +void mlx5dr_ste_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions, + u32 re_write_index) +{ + MLX5_SET(ste_modify_packet, hw_ste_p, number_of_re_write_actions, + num_of_actions); + MLX5_SET(ste_modify_packet, hw_ste_p, header_re_write_actions_pointer, + re_write_index); +} + +void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi) +{ + MLX5_SET(ste_general, hw_ste_p, next_table_base_63_48, gvmi); +} + +void mlx5dr_ste_init(u8 *hw_ste_p, u8 lu_type, u8 entry_type, + u16 gvmi) +{ + MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type); + MLX5_SET(ste_general, hw_ste_p, entry_sub_type, lu_type); + MLX5_SET(ste_general, hw_ste_p, next_lu_type, MLX5DR_STE_LU_TYPE_DONT_CARE); + + /* Set GVMI once, this is the same for RX/TX + * bits 63_48 of next table base / miss address encode the next GVMI + */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, gvmi, gvmi); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, next_table_base_63_48, gvmi); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_63_48, gvmi); +} + +static void dr_ste_set_always_hit(struct dr_hw_ste_format *hw_ste) +{ + memset(&hw_ste->tag, 0, sizeof(hw_ste->tag)); + memset(&hw_ste->mask, 0, sizeof(hw_ste->mask)); +} + +static void dr_ste_set_always_miss(struct dr_hw_ste_format *hw_ste) +{ + hw_ste->tag[0] = 0xdc; + hw_ste->mask[0] = 0; +} + +u64 mlx5dr_ste_get_miss_addr(u8 *hw_ste) +{ + u64 index = + (MLX5_GET(ste_rx_steering_mult, hw_ste, miss_address_31_6) | + MLX5_GET(ste_rx_steering_mult, hw_ste, miss_address_39_32) << 26); + + return index << 6; +} + +void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size) +{ + u64 index = (icm_addr >> 5) | ht_size; + + MLX5_SET(ste_general, hw_ste, next_table_base_39_32_size, index >> 27); + MLX5_SET(ste_general, hw_ste, next_table_base_31_5_size, index); +} + +u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->ste_arr; + + return ste->htbl->chunk->icm_addr + DR_STE_SIZE * index; +} + +u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->ste_arr; + + return ste->htbl->chunk->mr_addr + DR_STE_SIZE * index; +} + +struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->ste_arr; + + return &ste->htbl->miss_list[index]; +} + +static void dr_ste_always_hit_htbl(struct mlx5dr_ste *ste, + struct mlx5dr_ste_htbl *next_htbl) +{ + struct mlx5dr_icm_chunk *chunk = next_htbl->chunk; + u8 *hw_ste = ste->hw_ste; + + MLX5_SET(ste_general, hw_ste, byte_mask, next_htbl->byte_mask); + MLX5_SET(ste_general, hw_ste, next_lu_type, next_htbl->lu_type); + mlx5dr_ste_set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries); + + dr_ste_set_always_hit((struct dr_hw_ste_format *)ste->hw_ste); +} + +bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 ste_location) +{ + return ste_location == nic_matcher->num_of_builders; +} + +/* Replace relevant fields, except of: + * htbl - keep the origin htbl + * miss_list + list - already took the src from the list. + * icm_addr/mr_addr - depends on the hosting table. + * + * Before: + * | a | -> | b | -> | c | -> + * + * After: + * | a | -> | c | -> + * While the data that was in b copied to a. + */ +static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src) +{ + memcpy(dst->hw_ste, src->hw_ste, DR_STE_SIZE_REDUCED); + dst->next_htbl = src->next_htbl; + if (dst->next_htbl) + dst->next_htbl->pointing_ste = dst; + + refcount_set(&dst->refcount, refcount_read(&src->refcount)); + + INIT_LIST_HEAD(&dst->rule_list); + list_splice_tail_init(&src->rule_list, &dst->rule_list); +} + +/* Free ste which is the head and the only one in miss_list */ +static void +dr_ste_remove_head_ste(struct mlx5dr_ste *ste, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_send_info *ste_info_head, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) +{ + u8 tmp_data_ste[DR_STE_SIZE] = {}; + struct mlx5dr_ste tmp_ste = {}; + u64 miss_addr; + + tmp_ste.hw_ste = tmp_data_ste; + + /* Use temp ste because dr_ste_always_miss_addr + * touches bit_mask area which doesn't exist at ste->hw_ste. + */ + memcpy(tmp_ste.hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED); + miss_addr = nic_matcher->e_anchor->chunk->icm_addr; + mlx5dr_ste_always_miss_addr(&tmp_ste, miss_addr); + memcpy(ste->hw_ste, tmp_ste.hw_ste, DR_STE_SIZE_REDUCED); + + list_del_init(&ste->miss_list_node); + + /* Write full STE size in order to have "always_miss" */ + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, + 0, tmp_data_ste, + ste_info_head, + send_ste_list, + true /* Copy data */); + + stats_tbl->ctrl.num_of_valid_entries--; +} + +/* Free ste which is the head but NOT the only one in miss_list: + * |_ste_| --> |_next_ste_| -->|__| -->|__| -->/0 + */ +static void +dr_ste_replace_head_ste(struct mlx5dr_ste *ste, struct mlx5dr_ste *next_ste, + struct mlx5dr_ste_send_info *ste_info_head, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) + +{ + struct mlx5dr_ste_htbl *next_miss_htbl; + + next_miss_htbl = next_ste->htbl; + + /* Remove from the miss_list the next_ste before copy */ + list_del_init(&next_ste->miss_list_node); + + /* All rule-members that use next_ste should know about that */ + mlx5dr_rule_update_rule_member(next_ste, ste); + + /* Move data from next into ste */ + dr_ste_replace(ste, next_ste); + + /* Del the htbl that contains the next_ste. + * The origin htbl stay with the same number of entries. + */ + mlx5dr_htbl_put(next_miss_htbl); + + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE_REDUCED, + 0, ste->hw_ste, + ste_info_head, + send_ste_list, + true /* Copy data */); + + stats_tbl->ctrl.num_of_collisions--; + stats_tbl->ctrl.num_of_valid_entries--; +} + +/* Free ste that is located in the middle of the miss list: + * |__| -->|_prev_ste_|->|_ste_|-->|_next_ste_| + */ +static void dr_ste_remove_middle_ste(struct mlx5dr_ste *ste, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) +{ + struct mlx5dr_ste *prev_ste; + u64 miss_addr; + + prev_ste = list_entry(mlx5dr_ste_get_miss_list(ste)->prev, struct mlx5dr_ste, + miss_list_node); + if (!prev_ste) { + WARN_ON(true); + return; + } + + miss_addr = mlx5dr_ste_get_miss_addr(ste->hw_ste); + mlx5dr_ste_set_miss_addr(prev_ste->hw_ste, miss_addr); + + mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_REDUCED, 0, + prev_ste->hw_ste, ste_info, + send_ste_list, true /* Copy data*/); + + list_del_init(&ste->miss_list_node); + + stats_tbl->ctrl.num_of_valid_entries--; + stats_tbl->ctrl.num_of_collisions--; +} + +void mlx5dr_ste_free(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_ste_send_info *cur_ste_info, *tmp_ste_info; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_send_info ste_info_head; + struct mlx5dr_ste *next_ste, *first_ste; + bool put_on_origin_table = true; + struct mlx5dr_ste_htbl *stats_tbl; + LIST_HEAD(send_ste_list); + + first_ste = list_entry(mlx5dr_ste_get_miss_list(ste)->next, + struct mlx5dr_ste, miss_list_node); + stats_tbl = first_ste->htbl; + + /* Two options: + * 1. ste is head: + * a. head ste is the only ste in the miss list + * b. head ste is not the only ste in the miss-list + * 2. ste is not head + */ + if (first_ste == ste) { /* Ste is the head */ + struct mlx5dr_ste *last_ste; + + last_ste = list_last_entry(mlx5dr_ste_get_miss_list(ste), + struct mlx5dr_ste, miss_list_node); + if (last_ste == first_ste) + next_ste = NULL; + else + next_ste = list_entry(ste->miss_list_node.next, + struct mlx5dr_ste, miss_list_node); + + if (!next_ste) { + /* One and only entry in the list */ + dr_ste_remove_head_ste(ste, nic_matcher, + &ste_info_head, + &send_ste_list, + stats_tbl); + } else { + /* First but not only entry in the list */ + dr_ste_replace_head_ste(ste, next_ste, &ste_info_head, + &send_ste_list, stats_tbl); + put_on_origin_table = false; + } + } else { /* Ste in the middle of the list */ + dr_ste_remove_middle_ste(ste, &ste_info_head, &send_ste_list, stats_tbl); + } + + /* Update HW */ + list_for_each_entry_safe(cur_ste_info, tmp_ste_info, + &send_ste_list, send_list) { + list_del(&cur_ste_info->send_list); + mlx5dr_send_postsend_ste(dmn, cur_ste_info->ste, + cur_ste_info->data, cur_ste_info->size, + cur_ste_info->offset); + } + + if (put_on_origin_table) + mlx5dr_htbl_put(ste->htbl); +} + +bool mlx5dr_ste_equal_tag(void *src, void *dst) +{ + struct dr_hw_ste_format *s_hw_ste = (struct dr_hw_ste_format *)src; + struct dr_hw_ste_format *d_hw_ste = (struct dr_hw_ste_format *)dst; + + return !memcmp(s_hw_ste->tag, d_hw_ste->tag, DR_STE_SIZE_TAG); +} + +void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste, + struct mlx5dr_ste_htbl *next_htbl) +{ + struct mlx5dr_icm_chunk *chunk = next_htbl->chunk; + + mlx5dr_ste_set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries); +} + +void mlx5dr_ste_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) +{ + u64 index = miss_addr >> 6; + + /* Miss address for TX and RX STEs located in the same offsets */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32, index >> 26); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6, index); +} + +void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr) +{ + u8 *hw_ste = ste->hw_ste; + + MLX5_SET(ste_rx_steering_mult, hw_ste, next_lu_type, MLX5DR_STE_LU_TYPE_DONT_CARE); + mlx5dr_ste_set_miss_addr(hw_ste, miss_addr); + dr_ste_set_always_miss((struct dr_hw_ste_format *)ste->hw_ste); +} + +/* The assumption here is that we don't update the ste->hw_ste if it is not + * used ste, so it will be all zero, checking the next_lu_type. + */ +bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)p_hw_ste; + + if (MLX5_GET(ste_general, hw_ste, next_lu_type) == + MLX5DR_STE_LU_TYPE_NOP) + return true; + + return false; +} + +bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste) +{ + return !refcount_read(&ste->refcount); +} + +static u16 get_bits_per_mask(u16 byte_mask) +{ + u16 bits = 0; + + while (byte_mask) { + byte_mask = byte_mask & (byte_mask - 1); + bits++; + } + + return bits; +} + +/* Init one ste as a pattern for ste data array */ +void mlx5dr_ste_set_formatted_ste(u16 gvmi, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, + struct mlx5dr_htbl_connect_info *connect_info) +{ + struct mlx5dr_ste ste = {}; + + mlx5dr_ste_init(formatted_ste, htbl->lu_type, nic_dmn->ste_type, gvmi); + ste.hw_ste = formatted_ste; + + if (connect_info->type == CONNECT_HIT) + dr_ste_always_hit_htbl(&ste, connect_info->hit_next_htbl); + else + mlx5dr_ste_always_miss_addr(&ste, connect_info->miss_icm_addr); +} + +int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_htbl_connect_info *connect_info, + bool update_hw_ste) +{ + u8 formatted_ste[DR_STE_SIZE] = {}; + + mlx5dr_ste_set_formatted_ste(dmn->info.caps.gvmi, + nic_dmn, + htbl, + formatted_ste, + connect_info); + + return mlx5dr_send_postsend_formatted_htbl(dmn, htbl, formatted_ste, update_hw_ste); +} + +int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *cur_hw_ste, + enum mlx5dr_icm_chunk_size log_table_size) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)cur_hw_ste; + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *next_htbl; + + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste->ste_chain_location)) { + u32 bits_in_mask; + u8 next_lu_type; + u16 byte_mask; + + next_lu_type = MLX5_GET(ste_general, hw_ste, next_lu_type); + byte_mask = MLX5_GET(ste_general, hw_ste, byte_mask); + + /* Don't allocate table more than required, + * the size of the table defined via the byte_mask, so no need + * to allocate more than that. + */ + bits_in_mask = get_bits_per_mask(byte_mask) * BITS_PER_BYTE; + log_table_size = min(log_table_size, bits_in_mask); + + next_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + log_table_size, + next_lu_type, + byte_mask); + if (!next_htbl) { + mlx5dr_dbg(dmn, "Failed allocating table\n"); + return -ENOMEM; + } + + /* Write new table to HW */ + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr; + if (mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, next_htbl, + &info, false)) { + mlx5dr_info(dmn, "Failed writing table to HW\n"); + goto free_table; + } + + mlx5dr_ste_set_hit_addr_by_next_htbl(cur_hw_ste, next_htbl); + ste->next_htbl = next_htbl; + next_htbl->pointing_ste = ste; + } + + return 0; + +free_table: + mlx5dr_ste_htbl_free(next_htbl); + return -ENOENT; +} + +static void dr_ste_set_ctrl(struct mlx5dr_ste_htbl *htbl) +{ + struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl; + int num_of_entries; + + htbl->ctrl.may_grow = true; + + if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1) + htbl->ctrl.may_grow = false; + + /* Threshold is 50%, one is added to table of size 1 */ + num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size); + ctrl->increase_threshold = (num_of_entries + 1) / 2; +} + +struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + u8 lu_type, u16 byte_mask) +{ + struct mlx5dr_icm_chunk *chunk; + struct mlx5dr_ste_htbl *htbl; + int i; + + htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); + if (!htbl) + return NULL; + + chunk = mlx5dr_icm_alloc_chunk(pool, chunk_size); + if (!chunk) + goto out_free_htbl; + + htbl->chunk = chunk; + htbl->lu_type = lu_type; + htbl->byte_mask = byte_mask; + htbl->ste_arr = chunk->ste_arr; + htbl->hw_ste_arr = chunk->hw_ste_arr; + htbl->miss_list = chunk->miss_list; + refcount_set(&htbl->refcount, 0); + + for (i = 0; i < chunk->num_of_entries; i++) { + struct mlx5dr_ste *ste = &htbl->ste_arr[i]; + + ste->hw_ste = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; + ste->htbl = htbl; + refcount_set(&ste->refcount, 0); + INIT_LIST_HEAD(&ste->miss_list_node); + INIT_LIST_HEAD(&htbl->miss_list[i]); + INIT_LIST_HEAD(&ste->rule_list); + } + + htbl->chunk_size = chunk_size; + dr_ste_set_ctrl(htbl); + return htbl; + +out_free_htbl: + kfree(htbl); + return NULL; +} + +int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl) +{ + if (refcount_read(&htbl->refcount)) + return -EBUSY; + + mlx5dr_icm_free_chunk(htbl->chunk); + kfree(htbl); + return 0; +} + +int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, + u8 match_criteria, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value) +{ + if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) { + if (mask->misc.source_port && mask->misc.source_port != 0xffff) { + mlx5dr_dbg(dmn, "Partial mask source_port is not supported\n"); + return -EINVAL; + } + } + + return 0; +} + +int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_match_param *value, + u8 *ste_arr) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_build *sb; + int ret, i; + + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, + &matcher->mask, value); + if (ret) + return ret; + + sb = nic_matcher->ste_builder; + for (i = 0; i < nic_matcher->num_of_builders; i++) { + mlx5dr_ste_init(ste_arr, + sb->lu_type, + nic_dmn->ste_type, + dmn->info.caps.gvmi); + + mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask); + + ret = sb->ste_build_tag_func(value, sb, ste_arr); + if (ret) + return ret; + + /* Connect the STEs */ + if (i < (nic_matcher->num_of_builders - 1)) { + /* Need the next builder for these fields, + * not relevant for the last ste in the chain. + */ + sb++; + MLX5_SET(ste_general, ste_arr, next_lu_type, sb->lu_type); + MLX5_SET(ste_general, ste_arr, byte_mask, sb->byte_mask); + } + ste_arr += DR_STE_SIZE; + } + return 0; +} + +static int dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, dmac_15_0, mask, dmac_15_0); + + if (mask->smac_47_16 || mask->smac_15_0) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_47_32, + mask->smac_47_16 >> 16); + MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_31_0, + mask->smac_47_16 << 16 | mask->smac_15_0); + mask->smac_47_16 = 0; + mask->smac_15_0 = 0; + } + + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_MASK(eth_l2_src_dst, bit_mask, l3_type, mask, ip_version); + + if (mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + } else if (mask->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1); + mask->svlan_tag = 0; + } + + if (mask->cvlan_tag || mask->svlan_tag) { + pr_info("Invalid c/svlan mask configuration\n"); + return -EINVAL; + } + + return 0; +} + +static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec) +{ + spec->gre_c_present = MLX5_GET(fte_match_set_misc, mask, gre_c_present); + spec->gre_k_present = MLX5_GET(fte_match_set_misc, mask, gre_k_present); + spec->gre_s_present = MLX5_GET(fte_match_set_misc, mask, gre_s_present); + spec->source_vhca_port = MLX5_GET(fte_match_set_misc, mask, source_vhca_port); + spec->source_sqn = MLX5_GET(fte_match_set_misc, mask, source_sqn); + + spec->source_port = MLX5_GET(fte_match_set_misc, mask, source_port); + + spec->outer_second_prio = MLX5_GET(fte_match_set_misc, mask, outer_second_prio); + spec->outer_second_cfi = MLX5_GET(fte_match_set_misc, mask, outer_second_cfi); + spec->outer_second_vid = MLX5_GET(fte_match_set_misc, mask, outer_second_vid); + spec->inner_second_prio = MLX5_GET(fte_match_set_misc, mask, inner_second_prio); + spec->inner_second_cfi = MLX5_GET(fte_match_set_misc, mask, inner_second_cfi); + spec->inner_second_vid = MLX5_GET(fte_match_set_misc, mask, inner_second_vid); + + spec->outer_second_cvlan_tag = + MLX5_GET(fte_match_set_misc, mask, outer_second_cvlan_tag); + spec->inner_second_cvlan_tag = + MLX5_GET(fte_match_set_misc, mask, inner_second_cvlan_tag); + spec->outer_second_svlan_tag = + MLX5_GET(fte_match_set_misc, mask, outer_second_svlan_tag); + spec->inner_second_svlan_tag = + MLX5_GET(fte_match_set_misc, mask, inner_second_svlan_tag); + + spec->gre_protocol = MLX5_GET(fte_match_set_misc, mask, gre_protocol); + + spec->gre_key_h = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi); + spec->gre_key_l = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo); + + spec->vxlan_vni = MLX5_GET(fte_match_set_misc, mask, vxlan_vni); + + spec->geneve_vni = MLX5_GET(fte_match_set_misc, mask, geneve_vni); + spec->geneve_oam = MLX5_GET(fte_match_set_misc, mask, geneve_oam); + + spec->outer_ipv6_flow_label = + MLX5_GET(fte_match_set_misc, mask, outer_ipv6_flow_label); + + spec->inner_ipv6_flow_label = + MLX5_GET(fte_match_set_misc, mask, inner_ipv6_flow_label); + + spec->geneve_opt_len = MLX5_GET(fte_match_set_misc, mask, geneve_opt_len); + spec->geneve_protocol_type = + MLX5_GET(fte_match_set_misc, mask, geneve_protocol_type); + + spec->bth_dst_qp = MLX5_GET(fte_match_set_misc, mask, bth_dst_qp); +} + +static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec) +{ + u32 raw_ip[4]; + + spec->smac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16); + + spec->smac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0); + spec->ethertype = MLX5_GET(fte_match_set_lyr_2_4, mask, ethertype); + + spec->dmac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16); + + spec->dmac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0); + spec->first_prio = MLX5_GET(fte_match_set_lyr_2_4, mask, first_prio); + spec->first_cfi = MLX5_GET(fte_match_set_lyr_2_4, mask, first_cfi); + spec->first_vid = MLX5_GET(fte_match_set_lyr_2_4, mask, first_vid); + + spec->ip_protocol = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_protocol); + spec->ip_dscp = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_dscp); + spec->ip_ecn = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_ecn); + spec->cvlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, cvlan_tag); + spec->svlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, svlan_tag); + spec->frag = MLX5_GET(fte_match_set_lyr_2_4, mask, frag); + spec->ip_version = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_version); + spec->tcp_flags = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_flags); + spec->tcp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_sport); + spec->tcp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_dport); + + spec->ttl_hoplimit = MLX5_GET(fte_match_set_lyr_2_4, mask, ttl_hoplimit); + + spec->udp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_sport); + spec->udp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_dport); + + memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(raw_ip)); + + spec->src_ip_127_96 = be32_to_cpu(raw_ip[0]); + spec->src_ip_95_64 = be32_to_cpu(raw_ip[1]); + spec->src_ip_63_32 = be32_to_cpu(raw_ip[2]); + spec->src_ip_31_0 = be32_to_cpu(raw_ip[3]); + + memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(raw_ip)); + + spec->dst_ip_127_96 = be32_to_cpu(raw_ip[0]); + spec->dst_ip_95_64 = be32_to_cpu(raw_ip[1]); + spec->dst_ip_63_32 = be32_to_cpu(raw_ip[2]); + spec->dst_ip_31_0 = be32_to_cpu(raw_ip[3]); +} + +static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec) +{ + spec->outer_first_mpls_label = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_label); + spec->outer_first_mpls_exp = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_exp); + spec->outer_first_mpls_s_bos = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_s_bos); + spec->outer_first_mpls_ttl = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_ttl); + spec->inner_first_mpls_label = + MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_label); + spec->inner_first_mpls_exp = + MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_exp); + spec->inner_first_mpls_s_bos = + MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_s_bos); + spec->inner_first_mpls_ttl = + MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_ttl); + spec->outer_first_mpls_over_gre_label = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_label); + spec->outer_first_mpls_over_gre_exp = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_exp); + spec->outer_first_mpls_over_gre_s_bos = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_s_bos); + spec->outer_first_mpls_over_gre_ttl = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_ttl); + spec->outer_first_mpls_over_udp_label = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_label); + spec->outer_first_mpls_over_udp_exp = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_exp); + spec->outer_first_mpls_over_udp_s_bos = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_s_bos); + spec->outer_first_mpls_over_udp_ttl = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_ttl); + spec->metadata_reg_c_7 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_7); + spec->metadata_reg_c_6 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_6); + spec->metadata_reg_c_5 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_5); + spec->metadata_reg_c_4 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_4); + spec->metadata_reg_c_3 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_3); + spec->metadata_reg_c_2 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_2); + spec->metadata_reg_c_1 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_1); + spec->metadata_reg_c_0 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_0); + spec->metadata_reg_a = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_a); + spec->metadata_reg_b = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_b); +} + +static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec) +{ + spec->inner_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_seq_num); + spec->outer_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_seq_num); + spec->inner_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_ack_num); + spec->outer_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_ack_num); + spec->outer_vxlan_gpe_vni = + MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_vni); + spec->outer_vxlan_gpe_next_protocol = + MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_next_protocol); + spec->outer_vxlan_gpe_flags = + MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_flags); + spec->icmpv4_header_data = MLX5_GET(fte_match_set_misc3, mask, icmp_header_data); + spec->icmpv6_header_data = + MLX5_GET(fte_match_set_misc3, mask, icmpv6_header_data); + spec->icmpv4_type = MLX5_GET(fte_match_set_misc3, mask, icmp_type); + spec->icmpv4_code = MLX5_GET(fte_match_set_misc3, mask, icmp_code); + spec->icmpv6_type = MLX5_GET(fte_match_set_misc3, mask, icmpv6_type); + spec->icmpv6_code = MLX5_GET(fte_match_set_misc3, mask, icmpv6_code); +} + +void mlx5dr_ste_copy_param(u8 match_criteria, + struct mlx5dr_match_param *set_param, + struct mlx5dr_match_parameters *mask) +{ + u8 tail_param[MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)] = {}; + u8 *data = (u8 *)mask->match_buf; + size_t param_location; + void *buff; + + if (match_criteria & DR_MATCHER_CRITERIA_OUTER) { + if (mask->match_sz < sizeof(struct mlx5dr_match_spec)) { + memcpy(tail_param, data, mask->match_sz); + buff = tail_param; + } else { + buff = mask->match_buf; + } + dr_ste_copy_mask_spec(buff, &set_param->outer); + } + param_location = sizeof(struct mlx5dr_match_spec); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc(buff, &set_param->misc); + } + param_location += sizeof(struct mlx5dr_match_misc); + + if (match_criteria & DR_MATCHER_CRITERIA_INNER) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_spec)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_spec(buff, &set_param->inner); + } + param_location += sizeof(struct mlx5dr_match_spec); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC2) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc2)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc2(buff, &set_param->misc2); + } + + param_location += sizeof(struct mlx5dr_match_misc2); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC3) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc3)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc3(buff, &set_param->misc3); + } +} + +static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0); + + if (spec->smac_47_16 || spec->smac_15_0) { + MLX5_SET(ste_eth_l2_src_dst, tag, smac_47_32, + spec->smac_47_16 >> 16); + MLX5_SET(ste_eth_l2_src_dst, tag, smac_31_0, + spec->smac_47_16 << 16 | spec->smac_15_0); + spec->smac_47_16 = 0; + spec->smac_15_0 = 0; + } + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + pr_info("Unsupported ip_version value\n"); + return -EINVAL; + } + } + + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_priority, spec, first_prio); + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + return 0; +} + +int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + int ret; + + ret = dr_ste_build_eth_l2_src_des_bit_mask(mask, inner, sb->bit_mask); + if (ret) + return ret; + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_des_tag; + + return 0; +} + +static void dr_ste_build_eth_l3_ipv6_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_127_96, mask, dst_ip_127_96); + DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_95_64, mask, dst_ip_95_64); + DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_63_32, mask, dst_ip_63_32); + DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_31_0, mask, dst_ip_31_0); +} + +static int dr_ste_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0); + + return 0; +} + +void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l3_ipv6_dst_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_DST, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv6_dst_tag; +} + +static void dr_ste_build_eth_l3_ipv6_src_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_127_96, mask, src_ip_127_96); + DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_95_64, mask, src_ip_95_64); + DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_63_32, mask, src_ip_63_32); + DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_31_0, mask, src_ip_31_0); +} + +static int dr_ste_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0); + + return 0; +} + +void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l3_ipv6_src_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_SRC, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv6_src_tag; +} + +static void dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(struct mlx5dr_match_param *value, + bool inner, + u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + destination_address, mask, dst_ip_31_0); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + source_address, mask, src_ip_31_0); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + destination_port, mask, tcp_dport); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + destination_port, mask, udp_dport); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + source_port, mask, tcp_sport); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + source_port, mask, udp_sport); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + protocol, mask, ip_protocol); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + fragmented, mask, frag); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + dscp, mask, ip_dscp); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + ecn, mask, ip_ecn); + + if (mask->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, bit_mask, mask); + mask->tcp_flags = 0; + } +} + +static int dr_ste_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, ecn, spec, ip_ecn); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_5_TUPLE, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv4_5_tuple_tag; +} + +static void +dr_ste_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_MASK(eth_l2_src, bit_mask, l3_type, mask, ip_version); + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } + + if (inner) { + if (misc_mask->inner_second_cvlan_tag || + misc_mask->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1); + misc_mask->inner_second_cvlan_tag = 0; + misc_mask->inner_second_svlan_tag = 0; + } + + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_vlan_id, misc_mask, inner_second_vid); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_cfi, misc_mask, inner_second_cfi); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_priority, misc_mask, inner_second_prio); + } else { + if (misc_mask->outer_second_cvlan_tag || + misc_mask->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1); + misc_mask->outer_second_cvlan_tag = 0; + misc_mask->outer_second_svlan_tag = 0; + } + + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_vlan_id, misc_mask, outer_second_vid); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_cfi, misc_mask, outer_second_cfi); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_priority, misc_mask, outer_second_prio); + } +} + +static int dr_ste_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value, + bool inner, u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_spec = &value->misc; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_src, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_src, tag, l3_ethertype, spec, ethertype); + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + pr_info("Unsupported ip_version value\n"); + return -EINVAL; + } + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (inner) { + if (misc_spec->inner_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->inner_second_cvlan_tag = 0; + } else if (misc_spec->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, inner_second_prio); + } else { + if (misc_spec->outer_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->outer_second_cvlan_tag = 0; + } else if (misc_spec->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->outer_second_svlan_tag = 0; + } + DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, outer_second_prio); + } + + return 0; +} + +static void dr_ste_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, smac_47_16, mask, smac_47_16); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, smac_15_0, mask, smac_15_0); + + dr_ste_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int dr_ste_build_eth_l2_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16); + DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0); + + return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p); +} + +void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l2_src_bit_mask(mask, inner, sb->bit_mask); + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_tag; +} + +static void dr_ste_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l2_dst, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_MASK_V(eth_l2_dst, bit_mask, dmac_15_0, mask, dmac_15_0); + + dr_ste_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int dr_ste_build_eth_l2_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0); + + return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p); +} + +void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l2_dst_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_DST, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l2_dst_tag; +} + +static void dr_ste_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, dmac_15_0, mask, dmac_15_0); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_MASK(eth_l2_tnl, bit_mask, l3_type, mask, ip_version); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl, bit_mask, + l2_tunneling_network_id, (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } +} + +static int dr_ste_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc *misc = &value->misc; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_tnl, tag, l3_ethertype, spec, ethertype); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl, tag, l2_tunneling_network_id, + (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + return -EINVAL; + } + } + + return 0; +} + +void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, bool inner, bool rx) +{ + dr_ste_build_eth_l2_tnl_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_ETHL2_TUNNELING_I; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l2_tnl_tag; +} + +static void dr_ste_build_eth_l3_ipv4_misc_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l3_ipv4_misc, bit_mask, time_to_live, mask, ttl_hoplimit); +} + +static int dr_ste_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit); + + return 0; +} + +void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l3_ipv4_misc_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_MISC, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv4_misc_tag; +} + +static void dr_ste_build_ipv6_l3_l4_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l4, bit_mask, dst_port, mask, tcp_dport); + DR_STE_SET_MASK_V(eth_l4, bit_mask, src_port, mask, tcp_sport); + DR_STE_SET_MASK_V(eth_l4, bit_mask, dst_port, mask, udp_dport); + DR_STE_SET_MASK_V(eth_l4, bit_mask, src_port, mask, udp_sport); + DR_STE_SET_MASK_V(eth_l4, bit_mask, protocol, mask, ip_protocol); + DR_STE_SET_MASK_V(eth_l4, bit_mask, fragmented, mask, frag); + DR_STE_SET_MASK_V(eth_l4, bit_mask, dscp, mask, ip_dscp); + DR_STE_SET_MASK_V(eth_l4, bit_mask, ecn, mask, ip_ecn); + DR_STE_SET_MASK_V(eth_l4, bit_mask, ipv6_hop_limit, mask, ttl_hoplimit); + + if (mask->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l4, bit_mask, mask); + mask->tcp_flags = 0; + } +} + +static int dr_ste_build_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l4, tag, src_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l4, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l4, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l4, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l4, tag, ecn, spec, ip_ecn); + DR_STE_SET_TAG(eth_l4, tag, ipv6_hop_limit, spec, ttl_hoplimit); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l4, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_ipv6_l3_l4_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_ipv6_l3_l4_tag; +} + +static int dr_ste_build_empty_always_hit_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + return 0; +} + +void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx) +{ + sb->rx = rx; + sb->lu_type = MLX5DR_STE_LU_TYPE_DONT_CARE; + sb->byte_mask = 0; + sb->ste_build_tag_func = &dr_ste_build_empty_always_hit_tag; +} + +static void dr_ste_build_mpls_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc2_mask = &value->misc2; + + if (inner) + DR_STE_SET_MPLS_MASK(mpls, misc2_mask, inner, bit_mask); + else + DR_STE_SET_MPLS_MASK(mpls, misc2_mask, outer, bit_mask); +} + +static int dr_ste_build_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc2_mask = &value->misc2; + u8 *tag = hw_ste->tag; + + if (sb->inner) + DR_STE_SET_MPLS_TAG(mpls, misc2_mask, inner, tag); + else + DR_STE_SET_MPLS_TAG(mpls, misc2_mask, outer, tag); + + return 0; +} + +void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_mpls_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(MPLS_FIRST, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_mpls_tag; +} + +static void dr_ste_build_gre_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_MASK_V(gre, bit_mask, gre_protocol, misc_mask, gre_protocol); + DR_STE_SET_MASK_V(gre, bit_mask, gre_k_present, misc_mask, gre_k_present); + DR_STE_SET_MASK_V(gre, bit_mask, gre_key_h, misc_mask, gre_key_h); + DR_STE_SET_MASK_V(gre, bit_mask, gre_key_l, misc_mask, gre_key_l); + + DR_STE_SET_MASK_V(gre, bit_mask, gre_c_present, misc_mask, gre_c_present); + DR_STE_SET_MASK_V(gre, bit_mask, gre_s_present, misc_mask, gre_s_present); +} + +static int dr_ste_build_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc *misc = &value->misc; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol); + + DR_STE_SET_TAG(gre, tag, gre_k_present, misc, gre_k_present); + DR_STE_SET_TAG(gre, tag, gre_key_h, misc, gre_key_h); + DR_STE_SET_TAG(gre, tag, gre_key_l, misc, gre_key_l); + + DR_STE_SET_TAG(gre, tag, gre_c_present, misc, gre_c_present); + + DR_STE_SET_TAG(gre, tag, gre_s_present, misc, gre_s_present); + + return 0; +} + +void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, bool inner, bool rx) +{ + dr_ste_build_gre_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_GRE; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_gre_tag; +} + +static void dr_ste_build_flex_parser_0_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + + if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) { + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_label, + misc_2_mask, outer_first_mpls_over_gre_label); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_exp, + misc_2_mask, outer_first_mpls_over_gre_exp); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_s_bos, + misc_2_mask, outer_first_mpls_over_gre_s_bos); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_ttl, + misc_2_mask, outer_first_mpls_over_gre_ttl); + } else { + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_label, + misc_2_mask, outer_first_mpls_over_udp_label); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_exp, + misc_2_mask, outer_first_mpls_over_udp_exp); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_s_bos, + misc_2_mask, outer_first_mpls_over_udp_s_bos); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_ttl, + misc_2_mask, outer_first_mpls_over_udp_ttl); + } +} + +static int dr_ste_build_flex_parser_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + u8 *tag = hw_ste->tag; + + if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) { + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label, + misc_2_mask, outer_first_mpls_over_gre_label); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp, + misc_2_mask, outer_first_mpls_over_gre_exp); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos, + misc_2_mask, outer_first_mpls_over_gre_s_bos); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl, + misc_2_mask, outer_first_mpls_over_gre_ttl); + } else { + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label, + misc_2_mask, outer_first_mpls_over_udp_label); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp, + misc_2_mask, outer_first_mpls_over_udp_exp); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos, + misc_2_mask, outer_first_mpls_over_udp_s_bos); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl, + misc_2_mask, outer_first_mpls_over_udp_ttl); + } + return 0; +} + +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_flex_parser_0_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_flex_parser_0_tag; +} + +#define ICMP_TYPE_OFFSET_FIRST_DW 24 +#define ICMP_CODE_OFFSET_FIRST_DW 16 +#define ICMP_HEADER_DATA_OFFSET_SECOND_DW 0 + +static int dr_ste_build_flex_parser_1_bit_mask(struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + u8 *bit_mask) +{ + struct mlx5dr_match_misc3 *misc_3_mask = &mask->misc3; + bool is_ipv4_mask = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3_mask); + u32 icmp_header_data_mask; + u32 icmp_type_mask; + u32 icmp_code_mask; + int dw0_location; + int dw1_location; + + if (is_ipv4_mask) { + icmp_header_data_mask = misc_3_mask->icmpv4_header_data; + icmp_type_mask = misc_3_mask->icmpv4_type; + icmp_code_mask = misc_3_mask->icmpv4_code; + dw0_location = caps->flex_parser_id_icmp_dw0; + dw1_location = caps->flex_parser_id_icmp_dw1; + } else { + icmp_header_data_mask = misc_3_mask->icmpv6_header_data; + icmp_type_mask = misc_3_mask->icmpv6_type; + icmp_code_mask = misc_3_mask->icmpv6_code; + dw0_location = caps->flex_parser_id_icmpv6_dw0; + dw1_location = caps->flex_parser_id_icmpv6_dw1; + } + + switch (dw0_location) { + case 4: + if (icmp_type_mask) { + MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_4, + (icmp_type_mask << ICMP_TYPE_OFFSET_FIRST_DW)); + if (is_ipv4_mask) + misc_3_mask->icmpv4_type = 0; + else + misc_3_mask->icmpv6_type = 0; + } + if (icmp_code_mask) { + u32 cur_val = MLX5_GET(ste_flex_parser_1, bit_mask, + flex_parser_4); + MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_4, + cur_val | (icmp_code_mask << ICMP_CODE_OFFSET_FIRST_DW)); + if (is_ipv4_mask) + misc_3_mask->icmpv4_code = 0; + else + misc_3_mask->icmpv6_code = 0; + } + break; + default: + return -EINVAL; + } + + switch (dw1_location) { + case 5: + if (icmp_header_data_mask) { + MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_5, + (icmp_header_data_mask << ICMP_HEADER_DATA_OFFSET_SECOND_DW)); + if (is_ipv4_mask) + misc_3_mask->icmpv4_header_data = 0; + else + misc_3_mask->icmpv6_header_data = 0; + } + break; + default: + return -EINVAL; + } + + return 0; +} + +static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc3 *misc_3 = &value->misc3; + u8 *tag = hw_ste->tag; + u32 icmp_header_data; + int dw0_location; + int dw1_location; + u32 icmp_type; + u32 icmp_code; + bool is_ipv4; + + is_ipv4 = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3); + if (is_ipv4) { + icmp_header_data = misc_3->icmpv4_header_data; + icmp_type = misc_3->icmpv4_type; + icmp_code = misc_3->icmpv4_code; + dw0_location = sb->caps->flex_parser_id_icmp_dw0; + dw1_location = sb->caps->flex_parser_id_icmp_dw1; + } else { + icmp_header_data = misc_3->icmpv6_header_data; + icmp_type = misc_3->icmpv6_type; + icmp_code = misc_3->icmpv6_code; + dw0_location = sb->caps->flex_parser_id_icmpv6_dw0; + dw1_location = sb->caps->flex_parser_id_icmpv6_dw1; + } + + switch (dw0_location) { + case 4: + if (icmp_type) { + MLX5_SET(ste_flex_parser_1, tag, flex_parser_4, + (icmp_type << ICMP_TYPE_OFFSET_FIRST_DW)); + if (is_ipv4) + misc_3->icmpv4_type = 0; + else + misc_3->icmpv6_type = 0; + } + + if (icmp_code) { + u32 cur_val = MLX5_GET(ste_flex_parser_1, tag, + flex_parser_4); + MLX5_SET(ste_flex_parser_1, tag, flex_parser_4, + cur_val | (icmp_code << ICMP_CODE_OFFSET_FIRST_DW)); + if (is_ipv4) + misc_3->icmpv4_code = 0; + else + misc_3->icmpv6_code = 0; + } + break; + default: + return -EINVAL; + } + + switch (dw1_location) { + case 5: + if (icmp_header_data) { + MLX5_SET(ste_flex_parser_1, tag, flex_parser_5, + (icmp_header_data << ICMP_HEADER_DATA_OFFSET_SECOND_DW)); + if (is_ipv4) + misc_3->icmpv4_header_data = 0; + else + misc_3->icmpv6_header_data = 0; + } + break; + default: + return -EINVAL; + } + + return 0; +} + +int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + int ret; + + ret = dr_ste_build_flex_parser_1_bit_mask(mask, caps, sb->bit_mask); + if (ret) + return ret; + + sb->rx = rx; + sb->inner = inner; + sb->caps = caps; + sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_1; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_flex_parser_1_tag; + + return 0; +} + +static void dr_ste_build_general_purpose_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + + DR_STE_SET_MASK_V(general_purpose, bit_mask, + general_purpose_lookup_field, misc_2_mask, + metadata_reg_a); +} + +static int dr_ste_build_general_purpose_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field, + misc_2_mask, metadata_reg_a); + + return 0; +} + +void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_general_purpose_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_GENERAL_PURPOSE; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_general_purpose_tag; +} + +static void dr_ste_build_eth_l4_misc_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc3 *misc_3_mask = &value->misc3; + + if (inner) { + DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, seq_num, misc_3_mask, + inner_tcp_seq_num); + DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, ack_num, misc_3_mask, + inner_tcp_ack_num); + } else { + DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, seq_num, misc_3_mask, + outer_tcp_seq_num); + DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, ack_num, misc_3_mask, + outer_tcp_ack_num); + } +} + +static int dr_ste_build_eth_l4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 *tag = hw_ste->tag; + + if (sb->inner) { + DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, inner_tcp_ack_num); + } else { + DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, outer_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, outer_tcp_ack_num); + } + + return 0; +} + +void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l4_misc_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4_MISC, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l4_misc_tag; +} + +static void dr_ste_build_flex_parser_tnl_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc3 *misc_3_mask = &value->misc3; + + if (misc_3_mask->outer_vxlan_gpe_flags || + misc_3_mask->outer_vxlan_gpe_next_protocol) { + MLX5_SET(ste_flex_parser_tnl, bit_mask, + flex_parser_tunneling_header_63_32, + (misc_3_mask->outer_vxlan_gpe_flags << 24) | + (misc_3_mask->outer_vxlan_gpe_next_protocol)); + misc_3_mask->outer_vxlan_gpe_flags = 0; + misc_3_mask->outer_vxlan_gpe_next_protocol = 0; + } + + if (misc_3_mask->outer_vxlan_gpe_vni) { + MLX5_SET(ste_flex_parser_tnl, bit_mask, + flex_parser_tunneling_header_31_0, + misc_3_mask->outer_vxlan_gpe_vni << 8); + misc_3_mask->outer_vxlan_gpe_vni = 0; + } +} + +static int dr_ste_build_flex_parser_tnl_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 *tag = hw_ste->tag; + + if (misc3->outer_vxlan_gpe_flags || + misc3->outer_vxlan_gpe_next_protocol) { + MLX5_SET(ste_flex_parser_tnl, tag, + flex_parser_tunneling_header_63_32, + (misc3->outer_vxlan_gpe_flags << 24) | + (misc3->outer_vxlan_gpe_next_protocol)); + misc3->outer_vxlan_gpe_flags = 0; + misc3->outer_vxlan_gpe_next_protocol = 0; + } + + if (misc3->outer_vxlan_gpe_vni) { + MLX5_SET(ste_flex_parser_tnl, tag, + flex_parser_tunneling_header_31_0, + misc3->outer_vxlan_gpe_vni << 8); + misc3->outer_vxlan_gpe_vni = 0; + } + + return 0; +} + +void mlx5dr_ste_build_flex_parser_tnl(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_flex_parser_tnl_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_flex_parser_tnl_tag; +} + +static void dr_ste_build_register_0_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + + DR_STE_SET_MASK_V(register_0, bit_mask, register_0_h, + misc_2_mask, metadata_reg_c_0); + DR_STE_SET_MASK_V(register_0, bit_mask, register_0_l, + misc_2_mask, metadata_reg_c_1); + DR_STE_SET_MASK_V(register_0, bit_mask, register_1_h, + misc_2_mask, metadata_reg_c_2); + DR_STE_SET_MASK_V(register_0, bit_mask, register_1_l, + misc_2_mask, metadata_reg_c_3); +} + +static int dr_ste_build_register_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0); + DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1); + DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2); + DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3); + + return 0; +} + +void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_register_0_bit_mask(mask, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_0; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_register_0_tag; +} + +static void dr_ste_build_register_1_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + + DR_STE_SET_MASK_V(register_1, bit_mask, register_2_h, + misc_2_mask, metadata_reg_c_4); + DR_STE_SET_MASK_V(register_1, bit_mask, register_2_l, + misc_2_mask, metadata_reg_c_5); + DR_STE_SET_MASK_V(register_1, bit_mask, register_3_h, + misc_2_mask, metadata_reg_c_6); + DR_STE_SET_MASK_V(register_1, bit_mask, register_3_l, + misc_2_mask, metadata_reg_c_7); +} + +static int dr_ste_build_register_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4); + DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5); + DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6); + DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7); + + return 0; +} + +void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_register_1_bit_mask(mask, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_1; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_register_1_tag; +} + +static int dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc *misc_mask = &value->misc; + + if (misc_mask->source_port != 0xffff) + return -EINVAL; + + DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port); + DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn); + + return 0; +} + +static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc *misc = &value->misc; + struct mlx5dr_cmd_vport_cap *vport_cap; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); + + vport_cap = mlx5dr_get_vport_cap(sb->caps, misc->source_port); + if (!vport_cap) + return -EINVAL; + + if (vport_cap->vport_gvmi) + MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi); + + misc->source_port = 0; + + return 0; +} + +int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + int ret; + + ret = dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask); + if (ret) + return ret; + + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_src_gvmi_qpn_tag; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c new file mode 100644 index 000000000000..e178d8d3dbc9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action) +{ + struct mlx5dr_matcher *last_matcher = NULL; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *last_htbl; + int ret; + + if (action && action->action_type != DR_ACTION_TYP_FT) + return -EOPNOTSUPP; + + mutex_lock(&tbl->dmn->mutex); + + if (!list_empty(&tbl->matcher_list)) + last_matcher = list_last_entry(&tbl->matcher_list, + struct mlx5dr_matcher, + matcher_list); + + if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX || + tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { + if (last_matcher) + last_htbl = last_matcher->rx.e_anchor; + else + last_htbl = tbl->rx.s_anchor; + + tbl->rx.default_icm_addr = action ? + action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr : + tbl->rx.nic_dmn->default_icm_addr; + + info.type = CONNECT_MISS; + info.miss_icm_addr = tbl->rx.default_icm_addr; + + ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn, + tbl->rx.nic_dmn, + last_htbl, + &info, true); + if (ret) { + mlx5dr_dbg(tbl->dmn, "Failed to set RX miss action, ret %d\n", ret); + goto out; + } + } + + if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX || + tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { + if (last_matcher) + last_htbl = last_matcher->tx.e_anchor; + else + last_htbl = tbl->tx.s_anchor; + + tbl->tx.default_icm_addr = action ? + action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr : + tbl->tx.nic_dmn->default_icm_addr; + + info.type = CONNECT_MISS; + info.miss_icm_addr = tbl->tx.default_icm_addr; + + ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn, + tbl->tx.nic_dmn, + last_htbl, &info, true); + if (ret) { + mlx5dr_dbg(tbl->dmn, "Failed to set TX miss action, ret %d\n", ret); + goto out; + } + } + + /* Release old action */ + if (tbl->miss_action) + refcount_dec(&tbl->miss_action->refcount); + + /* Set new miss action */ + tbl->miss_action = action; + if (tbl->miss_action) + refcount_inc(&action->refcount); + +out: + mutex_unlock(&tbl->dmn->mutex); + return ret; +} + +static void dr_table_uninit_nic(struct mlx5dr_table_rx_tx *nic_tbl) +{ + mlx5dr_htbl_put(nic_tbl->s_anchor); +} + +static void dr_table_uninit_fdb(struct mlx5dr_table *tbl) +{ + dr_table_uninit_nic(&tbl->rx); + dr_table_uninit_nic(&tbl->tx); +} + +static void dr_table_uninit(struct mlx5dr_table *tbl) +{ + mutex_lock(&tbl->dmn->mutex); + + switch (tbl->dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_table_uninit_nic(&tbl->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_table_uninit_nic(&tbl->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_table_uninit_fdb(tbl); + break; + default: + WARN_ON(true); + break; + } + + mutex_unlock(&tbl->dmn->mutex); +} + +static int dr_table_init_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + int ret; + + nic_tbl->default_icm_addr = nic_dmn->default_icm_addr; + + nic_tbl->s_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!nic_tbl->s_anchor) + return -ENOMEM; + + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_dmn->default_icm_addr; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + nic_tbl->s_anchor, + &info, true); + if (ret) + goto free_s_anchor; + + mlx5dr_htbl_get(nic_tbl->s_anchor); + + return 0; + +free_s_anchor: + mlx5dr_ste_htbl_free(nic_tbl->s_anchor); + return ret; +} + +static int dr_table_init_fdb(struct mlx5dr_table *tbl) +{ + int ret; + + ret = dr_table_init_nic(tbl->dmn, &tbl->rx); + if (ret) + return ret; + + ret = dr_table_init_nic(tbl->dmn, &tbl->tx); + if (ret) + goto destroy_rx; + + return 0; + +destroy_rx: + dr_table_uninit_nic(&tbl->rx); + return ret; +} + +static int dr_table_init(struct mlx5dr_table *tbl) +{ + int ret = 0; + + INIT_LIST_HEAD(&tbl->matcher_list); + + mutex_lock(&tbl->dmn->mutex); + + switch (tbl->dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_RX; + tbl->rx.nic_dmn = &tbl->dmn->info.rx; + ret = dr_table_init_nic(tbl->dmn, &tbl->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_TX; + tbl->tx.nic_dmn = &tbl->dmn->info.tx; + ret = dr_table_init_nic(tbl->dmn, &tbl->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_FDB; + tbl->rx.nic_dmn = &tbl->dmn->info.rx; + tbl->tx.nic_dmn = &tbl->dmn->info.tx; + ret = dr_table_init_fdb(tbl); + break; + default: + WARN_ON(true); + break; + } + + mutex_unlock(&tbl->dmn->mutex); + + return ret; +} + +static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl) +{ + return mlx5dr_cmd_destroy_flow_table(tbl->dmn->mdev, + tbl->table_id, + tbl->table_type); +} + +static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl) +{ + u64 icm_addr_rx = 0; + u64 icm_addr_tx = 0; + int ret; + + if (tbl->rx.s_anchor) + icm_addr_rx = tbl->rx.s_anchor->chunk->icm_addr; + + if (tbl->tx.s_anchor) + icm_addr_tx = tbl->tx.s_anchor->chunk->icm_addr; + + ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, + tbl->table_type, + icm_addr_rx, + icm_addr_tx, + tbl->dmn->info.caps.max_ft_level - 1, + true, false, NULL, + &tbl->table_id); + + return ret; +} + +struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level) +{ + struct mlx5dr_table *tbl; + int ret; + + refcount_inc(&dmn->refcount); + + tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); + if (!tbl) + goto dec_ref; + + tbl->dmn = dmn; + tbl->level = level; + refcount_set(&tbl->refcount, 1); + + ret = dr_table_init(tbl); + if (ret) + goto free_tbl; + + ret = dr_table_create_sw_owned_tbl(tbl); + if (ret) + goto uninit_tbl; + + return tbl; + +uninit_tbl: + dr_table_uninit(tbl); +free_tbl: + kfree(tbl); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +int mlx5dr_table_destroy(struct mlx5dr_table *tbl) +{ + int ret; + + if (refcount_read(&tbl->refcount) > 1) + return -EBUSY; + + ret = dr_table_destroy_sw_owned_tbl(tbl); + if (ret) + return ret; + + dr_table_uninit(tbl); + + if (tbl->miss_action) + refcount_dec(&tbl->miss_action->refcount); + + refcount_dec(&tbl->dmn->refcount); + kfree(tbl); + + return ret; +} + +u32 mlx5dr_table_get_id(struct mlx5dr_table *tbl) +{ + return tbl->table_id; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h new file mode 100644 index 000000000000..a37ee6359be2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -0,0 +1,1060 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef _DR_TYPES_ +#define _DR_TYPES_ + +#include <linux/mlx5/driver.h> +#include <linux/refcount.h> +#include "fs_core.h" +#include "wq.h" +#include "lib/mlx5.h" +#include "mlx5_ifc_dr.h" +#include "mlx5dr.h" + +#define DR_RULE_MAX_STES 17 +#define DR_ACTION_MAX_STES 5 +#define WIRE_PORT 0xFFFF +#define DR_STE_SVLAN 0x1 +#define DR_STE_CVLAN 0x2 + +#define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg) +#define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg) +#define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg) + +enum mlx5dr_icm_chunk_size { + DR_CHUNK_SIZE_1, + DR_CHUNK_SIZE_MIN = DR_CHUNK_SIZE_1, /* keep updated when changing */ + DR_CHUNK_SIZE_2, + DR_CHUNK_SIZE_4, + DR_CHUNK_SIZE_8, + DR_CHUNK_SIZE_16, + DR_CHUNK_SIZE_32, + DR_CHUNK_SIZE_64, + DR_CHUNK_SIZE_128, + DR_CHUNK_SIZE_256, + DR_CHUNK_SIZE_512, + DR_CHUNK_SIZE_1K, + DR_CHUNK_SIZE_2K, + DR_CHUNK_SIZE_4K, + DR_CHUNK_SIZE_8K, + DR_CHUNK_SIZE_16K, + DR_CHUNK_SIZE_32K, + DR_CHUNK_SIZE_64K, + DR_CHUNK_SIZE_128K, + DR_CHUNK_SIZE_256K, + DR_CHUNK_SIZE_512K, + DR_CHUNK_SIZE_1024K, + DR_CHUNK_SIZE_2048K, + DR_CHUNK_SIZE_MAX, +}; + +enum mlx5dr_icm_type { + DR_ICM_TYPE_STE, + DR_ICM_TYPE_MODIFY_ACTION, +}; + +static inline enum mlx5dr_icm_chunk_size +mlx5dr_icm_next_higher_chunk(enum mlx5dr_icm_chunk_size chunk) +{ + chunk += 2; + if (chunk < DR_CHUNK_SIZE_MAX) + return chunk; + + return DR_CHUNK_SIZE_MAX; +} + +enum { + DR_STE_SIZE = 64, + DR_STE_SIZE_CTRL = 32, + DR_STE_SIZE_TAG = 16, + DR_STE_SIZE_MASK = 16, +}; + +enum { + DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK, +}; + +enum { + DR_MODIFY_ACTION_SIZE = 8, +}; + +enum mlx5dr_matcher_criteria { + DR_MATCHER_CRITERIA_EMPTY = 0, + DR_MATCHER_CRITERIA_OUTER = 1 << 0, + DR_MATCHER_CRITERIA_MISC = 1 << 1, + DR_MATCHER_CRITERIA_INNER = 1 << 2, + DR_MATCHER_CRITERIA_MISC2 = 1 << 3, + DR_MATCHER_CRITERIA_MISC3 = 1 << 4, + DR_MATCHER_CRITERIA_MAX = 1 << 5, +}; + +enum mlx5dr_action_type { + DR_ACTION_TYP_TNL_L2_TO_L2, + DR_ACTION_TYP_L2_TO_TNL_L2, + DR_ACTION_TYP_TNL_L3_TO_L2, + DR_ACTION_TYP_L2_TO_TNL_L3, + DR_ACTION_TYP_DROP, + DR_ACTION_TYP_QP, + DR_ACTION_TYP_FT, + DR_ACTION_TYP_CTR, + DR_ACTION_TYP_TAG, + DR_ACTION_TYP_MODIFY_HDR, + DR_ACTION_TYP_VPORT, + DR_ACTION_TYP_POP_VLAN, + DR_ACTION_TYP_PUSH_VLAN, + DR_ACTION_TYP_MAX, +}; + +struct mlx5dr_icm_pool; +struct mlx5dr_icm_chunk; +struct mlx5dr_icm_bucket; +struct mlx5dr_ste_htbl; +struct mlx5dr_match_param; +struct mlx5dr_cmd_caps; +struct mlx5dr_matcher_rx_tx; + +struct mlx5dr_ste { + u8 *hw_ste; + /* refcount: indicates the num of rules that using this ste */ + refcount_t refcount; + + /* attached to the miss_list head at each htbl entry */ + struct list_head miss_list_node; + + /* each rule member that uses this ste attached here */ + struct list_head rule_list; + + /* this ste is member of htbl */ + struct mlx5dr_ste_htbl *htbl; + + struct mlx5dr_ste_htbl *next_htbl; + + /* this ste is part of a rule, located in ste's chain */ + u8 ste_chain_location; +}; + +struct mlx5dr_ste_htbl_ctrl { + /* total number of valid entries belonging to this hash table. This + * includes the non collision and collision entries + */ + unsigned int num_of_valid_entries; + + /* total number of collisions entries attached to this table */ + unsigned int num_of_collisions; + unsigned int increase_threshold; + u8 may_grow:1; +}; + +struct mlx5dr_ste_htbl { + u8 lu_type; + u16 byte_mask; + refcount_t refcount; + struct mlx5dr_icm_chunk *chunk; + struct mlx5dr_ste *ste_arr; + u8 *hw_ste_arr; + + struct list_head *miss_list; + + enum mlx5dr_icm_chunk_size chunk_size; + struct mlx5dr_ste *pointing_ste; + + struct mlx5dr_ste_htbl_ctrl ctrl; +}; + +struct mlx5dr_ste_send_info { + struct mlx5dr_ste *ste; + struct list_head send_list; + u16 size; + u16 offset; + u8 data_cont[DR_STE_SIZE]; + u8 *data; +}; + +void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, + u16 offset, u8 *data, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_list, + bool copy_data); + +struct mlx5dr_ste_build { + u8 inner:1; + u8 rx:1; + struct mlx5dr_cmd_caps *caps; + u8 lu_type; + u16 byte_mask; + u8 bit_mask[DR_STE_SIZE_MASK]; + int (*ste_build_tag_func)(struct mlx5dr_match_param *spec, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p); +}; + +struct mlx5dr_ste_htbl * +mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + u8 lu_type, u16 byte_mask); + +int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl); + +static inline void mlx5dr_htbl_put(struct mlx5dr_ste_htbl *htbl) +{ + if (refcount_dec_and_test(&htbl->refcount)) + mlx5dr_ste_htbl_free(htbl); +} + +static inline void mlx5dr_htbl_get(struct mlx5dr_ste_htbl *htbl) +{ + refcount_inc(&htbl->refcount); +} + +/* STE utils */ +u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl); +void mlx5dr_ste_init(u8 *hw_ste_p, u8 lu_type, u8 entry_type, u16 gvmi); +void mlx5dr_ste_always_hit_htbl(struct mlx5dr_ste *ste, + struct mlx5dr_ste_htbl *next_htbl); +void mlx5dr_ste_set_miss_addr(u8 *hw_ste, u64 miss_addr); +u64 mlx5dr_ste_get_miss_addr(u8 *hw_ste); +void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi); +void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size); +void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr); +void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask); +bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste); +bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 ste_location); +void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag); +void mlx5dr_ste_set_counter_id(u8 *hw_ste_p, u32 ctr_id); +void mlx5dr_ste_set_tx_encap(void *hw_ste_p, u32 reformat_id, + int size, bool encap_l3); +void mlx5dr_ste_set_rx_decap(u8 *hw_ste_p); +void mlx5dr_ste_set_rx_decap_l3(u8 *hw_ste_p, bool vlan); +void mlx5dr_ste_set_rx_pop_vlan(u8 *hw_ste_p); +void mlx5dr_ste_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_tpid_pcp_dei_vid, + bool go_back); +void mlx5dr_ste_set_entry_type(u8 *hw_ste_p, u8 entry_type); +u8 mlx5dr_ste_get_entry_type(u8 *hw_ste_p); +void mlx5dr_ste_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions, + u32 re_write_index); +void mlx5dr_ste_set_go_back_bit(u8 *hw_ste_p); +u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste); +u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste); +struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste); + +void mlx5dr_ste_free(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher); +static inline void mlx5dr_ste_put(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + if (refcount_dec_and_test(&ste->refcount)) + mlx5dr_ste_free(ste, matcher, nic_matcher); +} + +/* initial as 0, increased only when ste appears in a new rule */ +static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste) +{ + refcount_inc(&ste->refcount); +} + +void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste, + struct mlx5dr_ste_htbl *next_htbl); +bool mlx5dr_ste_equal_tag(void *src, void *dst); +int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *cur_hw_ste, + enum mlx5dr_icm_chunk_size log_table_size); + +/* STE build functions */ +int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, + u8 match_criteria, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value); +int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_match_param *value, + u8 *ste_arr); +int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_tnl(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx); + +/* Actions utils */ +int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_action *actions[], + u32 num_actions, + u8 *ste_arr, + u32 *new_hw_ste_arr_sz); + +struct mlx5dr_match_spec { + u32 smac_47_16; /* Source MAC address of incoming packet */ + /* Incoming packet Ethertype - this is the Ethertype + * following the last VLAN tag of the packet + */ + u32 ethertype:16; + u32 smac_15_0:16; /* Source MAC address of incoming packet */ + u32 dmac_47_16; /* Destination MAC address of incoming packet */ + /* VLAN ID of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_vid:12; + /* CFI bit of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_cfi:1; + /* Priority of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_prio:3; + u32 dmac_15_0:16; /* Destination MAC address of incoming packet */ + /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK; + * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS + */ + u32 tcp_flags:9; + u32 ip_version:4; /* IP version */ + u32 frag:1; /* Packet is an IP fragment */ + /* The first vlan in the packet is s-vlan (0x8a88). + * cvlan_tag and svlan_tag cannot be set together + */ + u32 svlan_tag:1; + /* The first vlan in the packet is c-vlan (0x8100). + * cvlan_tag and svlan_tag cannot be set together + */ + u32 cvlan_tag:1; + /* Explicit Congestion Notification derived from + * Traffic Class/TOS field of IPv6/v4 + */ + u32 ip_ecn:2; + /* Differentiated Services Code Point derived from + * Traffic Class/TOS field of IPv6/v4 + */ + u32 ip_dscp:6; + u32 ip_protocol:8; /* IP protocol */ + /* TCP destination port. + * tcp and udp sport/dport are mutually exclusive + */ + u32 tcp_dport:16; + /* TCP source port.;tcp and udp sport/dport are mutually exclusive */ + u32 tcp_sport:16; + u32 ttl_hoplimit:8; + u32 reserved:24; + /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */ + u32 udp_dport:16; + /* UDP source port.;tcp and udp sport/dport are mutually exclusive */ + u32 udp_sport:16; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_127_96; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_95_64; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_63_32; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_31_0; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_127_96; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_95_64; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_63_32; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_31_0; +}; + +struct mlx5dr_match_misc { + u32 source_sqn:24; /* Source SQN */ + u32 source_vhca_port:4; + /* used with GRE, sequence number exist when gre_s_present == 1 */ + u32 gre_s_present:1; + /* used with GRE, key exist when gre_k_present == 1 */ + u32 gre_k_present:1; + u32 reserved_auto1:1; + /* used with GRE, checksum exist when gre_c_present == 1 */ + u32 gre_c_present:1; + /* Source port.;0xffff determines wire port */ + u32 source_port:16; + u32 reserved_auto2:16; + /* VLAN ID of first VLAN tag the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_vid:12; + /* CFI bit of first VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_cfi:1; + /* Priority of second VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_prio:3; + /* VLAN ID of first VLAN tag the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_vid:12; + /* CFI bit of first VLAN tag in the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_cfi:1; + /* Priority of second VLAN tag in the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_prio:3; + u32 gre_protocol:16; /* GRE Protocol (outer) */ + u32 reserved_auto3:12; + /* The second vlan in the inner header of the packet is s-vlan (0x8a88). + * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + */ + u32 inner_second_svlan_tag:1; + /* The second vlan in the outer header of the packet is s-vlan (0x8a88). + * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together + */ + u32 outer_second_svlan_tag:1; + /* The second vlan in the inner header of the packet is c-vlan (0x8100). + * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + */ + u32 inner_second_cvlan_tag:1; + /* The second vlan in the outer header of the packet is c-vlan (0x8100). + * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together + */ + u32 outer_second_cvlan_tag:1; + u32 gre_key_l:8; /* GRE Key [7:0] (outer) */ + u32 gre_key_h:24; /* GRE Key[31:8] (outer) */ + u32 reserved_auto4:8; + u32 vxlan_vni:24; /* VXLAN VNI (outer) */ + u32 geneve_oam:1; /* GENEVE OAM field (outer) */ + u32 reserved_auto5:7; + u32 geneve_vni:24; /* GENEVE VNI field (outer) */ + u32 outer_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (outer) */ + u32 reserved_auto6:12; + u32 inner_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (inner) */ + u32 reserved_auto7:12; + u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */ + u32 geneve_opt_len:6; /* GENEVE OptLen (outer) */ + u32 reserved_auto8:10; + u32 bth_dst_qp:24; /* Destination QP in BTH header */ + u32 reserved_auto9:8; + u8 reserved_auto10[20]; +}; + +struct mlx5dr_match_misc2 { + u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */ + u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */ + u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */ + u32 outer_first_mpls_label:20; /* First MPLS LABEL (outer) */ + u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */ + u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */ + u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */ + u32 inner_first_mpls_label:20; /* First MPLS LABEL (inner) */ + u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */ + u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_gre_label:20; /* last MPLS LABEL (outer) */ + u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */ + u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_udp_label:20; /* last MPLS LABEL (outer) */ + u32 metadata_reg_c_7; /* metadata_reg_c_7 */ + u32 metadata_reg_c_6; /* metadata_reg_c_6 */ + u32 metadata_reg_c_5; /* metadata_reg_c_5 */ + u32 metadata_reg_c_4; /* metadata_reg_c_4 */ + u32 metadata_reg_c_3; /* metadata_reg_c_3 */ + u32 metadata_reg_c_2; /* metadata_reg_c_2 */ + u32 metadata_reg_c_1; /* metadata_reg_c_1 */ + u32 metadata_reg_c_0; /* metadata_reg_c_0 */ + u32 metadata_reg_a; /* metadata_reg_a */ + u32 metadata_reg_b; /* metadata_reg_b */ + u8 reserved_auto2[8]; +}; + +struct mlx5dr_match_misc3 { + u32 inner_tcp_seq_num; + u32 outer_tcp_seq_num; + u32 inner_tcp_ack_num; + u32 outer_tcp_ack_num; + u32 outer_vxlan_gpe_vni:24; + u32 reserved_auto1:8; + u32 reserved_auto2:16; + u32 outer_vxlan_gpe_flags:8; + u32 outer_vxlan_gpe_next_protocol:8; + u32 icmpv4_header_data; + u32 icmpv6_header_data; + u32 icmpv6_code:8; + u32 icmpv6_type:8; + u32 icmpv4_code:8; + u32 icmpv4_type:8; + u8 reserved_auto3[0x1c]; +}; + +struct mlx5dr_match_param { + struct mlx5dr_match_spec outer; + struct mlx5dr_match_misc misc; + struct mlx5dr_match_spec inner; + struct mlx5dr_match_misc2 misc2; + struct mlx5dr_match_misc3 misc3; +}; + +#define DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ + (_misc3)->icmpv4_code || \ + (_misc3)->icmpv4_header_data) + +struct mlx5dr_esw_caps { + u64 drop_icm_address_rx; + u64 drop_icm_address_tx; + u64 uplink_icm_address_rx; + u64 uplink_icm_address_tx; + bool sw_owner; +}; + +struct mlx5dr_cmd_vport_cap { + u16 vport_gvmi; + u16 vhca_gvmi; + u64 icm_address_rx; + u64 icm_address_tx; + u32 num; +}; + +struct mlx5dr_cmd_caps { + u16 gvmi; + u64 nic_rx_drop_address; + u64 nic_tx_drop_address; + u64 nic_tx_allow_address; + u64 esw_rx_drop_address; + u64 esw_tx_drop_address; + u32 log_icm_size; + u64 hdr_modify_icm_addr; + u32 flex_protocols; + u8 flex_parser_id_icmp_dw0; + u8 flex_parser_id_icmp_dw1; + u8 flex_parser_id_icmpv6_dw0; + u8 flex_parser_id_icmpv6_dw1; + u8 max_ft_level; + u16 roce_min_src_udp; + u8 num_esw_ports; + bool eswitch_manager; + bool rx_sw_owner; + bool tx_sw_owner; + bool fdb_sw_owner; + u32 num_vports; + struct mlx5dr_esw_caps esw_caps; + struct mlx5dr_cmd_vport_cap *vports_caps; + bool prio_tag_required; +}; + +struct mlx5dr_domain_rx_tx { + u64 drop_icm_addr; + u64 default_icm_addr; + enum mlx5dr_ste_entry_type ste_type; +}; + +struct mlx5dr_domain_info { + bool supp_sw_steering; + u32 max_inline_size; + u32 max_send_wr; + u32 max_log_sw_icm_sz; + u32 max_log_action_icm_sz; + struct mlx5dr_domain_rx_tx rx; + struct mlx5dr_domain_rx_tx tx; + struct mlx5dr_cmd_caps caps; +}; + +struct mlx5dr_domain_cache { + struct mlx5dr_fw_recalc_cs_ft **recalc_cs_ft; +}; + +struct mlx5dr_domain { + struct mlx5dr_domain *peer_dmn; + struct mlx5_core_dev *mdev; + u32 pdn; + struct mlx5_uars_page *uar; + enum mlx5dr_domain_type type; + refcount_t refcount; + struct mutex mutex; /* protect domain */ + struct mlx5dr_icm_pool *ste_icm_pool; + struct mlx5dr_icm_pool *action_icm_pool; + struct mlx5dr_send_ring *send_ring; + struct mlx5dr_domain_info info; + struct mlx5dr_domain_cache cache; +}; + +struct mlx5dr_table_rx_tx { + struct mlx5dr_ste_htbl *s_anchor; + struct mlx5dr_domain_rx_tx *nic_dmn; + u64 default_icm_addr; +}; + +struct mlx5dr_table { + struct mlx5dr_domain *dmn; + struct mlx5dr_table_rx_tx rx; + struct mlx5dr_table_rx_tx tx; + u32 level; + u32 table_type; + u32 table_id; + struct list_head matcher_list; + struct mlx5dr_action *miss_action; + refcount_t refcount; +}; + +struct mlx5dr_matcher_rx_tx { + struct mlx5dr_ste_htbl *s_htbl; + struct mlx5dr_ste_htbl *e_anchor; + struct mlx5dr_ste_build *ste_builder; + struct mlx5dr_ste_build ste_builder4[DR_RULE_MAX_STES]; + struct mlx5dr_ste_build ste_builder6[DR_RULE_MAX_STES]; + u8 num_of_builders; + u8 num_of_builders4; + u8 num_of_builders6; + u64 default_icm_addr; + struct mlx5dr_table_rx_tx *nic_tbl; +}; + +struct mlx5dr_matcher { + struct mlx5dr_table *tbl; + struct mlx5dr_matcher_rx_tx rx; + struct mlx5dr_matcher_rx_tx tx; + struct list_head matcher_list; + u16 prio; + struct mlx5dr_match_param mask; + u8 match_criteria; + refcount_t refcount; + struct mlx5dv_flow_matcher *dv_matcher; +}; + +struct mlx5dr_rule_member { + struct mlx5dr_ste *ste; + /* attached to mlx5dr_rule via this */ + struct list_head list; + /* attached to mlx5dr_ste via this */ + struct list_head use_ste_list; +}; + +struct mlx5dr_action { + enum mlx5dr_action_type action_type; + refcount_t refcount; + union { + struct { + struct mlx5dr_domain *dmn; + struct mlx5dr_icm_chunk *chunk; + u8 *data; + u32 data_size; + u16 num_of_actions; + u32 index; + u8 allow_rx:1; + u8 allow_tx:1; + u8 modify_ttl:1; + } rewrite; + struct { + struct mlx5dr_domain *dmn; + u32 reformat_id; + u32 reformat_size; + } reformat; + struct { + u8 is_fw_tbl:1; + union { + struct mlx5dr_table *tbl; + struct { + struct mlx5_flow_table *ft; + u64 rx_icm_addr; + u64 tx_icm_addr; + struct mlx5_core_dev *mdev; + } fw_tbl; + }; + } dest_tbl; + struct { + u32 ctr_id; + u32 offeset; + } ctr; + struct { + struct mlx5dr_domain *dmn; + struct mlx5dr_cmd_vport_cap *caps; + u32 num; + } vport; + struct { + u32 vlan_hdr; /* tpid_pcp_dei_vid */ + } push_vlan; + u32 flow_tag; + }; +}; + +enum mlx5dr_connect_type { + CONNECT_HIT = 1, + CONNECT_MISS = 2, +}; + +struct mlx5dr_htbl_connect_info { + enum mlx5dr_connect_type type; + union { + struct mlx5dr_ste_htbl *hit_next_htbl; + u64 miss_icm_addr; + }; +}; + +struct mlx5dr_rule_rx_tx { + struct list_head rule_members_list; + struct mlx5dr_matcher_rx_tx *nic_matcher; +}; + +struct mlx5dr_rule { + struct mlx5dr_matcher *matcher; + struct mlx5dr_rule_rx_tx rx; + struct mlx5dr_rule_rx_tx tx; + struct list_head rule_actions_list; +}; + +void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste, + struct mlx5dr_ste *ste); + +struct mlx5dr_icm_chunk { + struct mlx5dr_icm_bucket *bucket; + struct list_head chunk_list; + u32 rkey; + u32 num_of_entries; + u32 byte_size; + u64 icm_addr; + u64 mr_addr; + + /* Memory optimisation */ + struct mlx5dr_ste *ste_arr; + u8 *hw_ste_arr; + struct list_head *miss_list; +}; + +static inline int +mlx5dr_matcher_supp_flex_parser_icmp_v4(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED; +} + +static inline int +mlx5dr_matcher_supp_flex_parser_icmp_v6(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED; +} + +int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + bool ipv6); + +static inline u32 +mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size) +{ + return 1 << chunk_size; +} + +static inline int +mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size, + enum mlx5dr_icm_type icm_type) +{ + int num_of_entries; + int entry_size; + + if (icm_type == DR_ICM_TYPE_STE) + entry_size = DR_STE_SIZE; + else + entry_size = DR_MODIFY_ACTION_SIZE; + + num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); + + return entry_size * num_of_entries; +} + +static inline struct mlx5dr_cmd_vport_cap * +mlx5dr_get_vport_cap(struct mlx5dr_cmd_caps *caps, u32 vport) +{ + if (!caps->vports_caps || + (vport >= caps->num_vports && vport != WIRE_PORT)) + return NULL; + + if (vport == WIRE_PORT) + vport = caps->num_vports; + + return &caps->vports_caps[vport]; +} + +struct mlx5dr_cmd_query_flow_table_details { + u8 status; + u8 level; + u64 sw_owner_icm_root_1; + u64 sw_owner_icm_root_0; +}; + +/* internal API functions */ +int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_caps *caps); +int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, + bool other_vport, u16 vport_number, + u64 *icm_address_rx, + u64 *icm_address_tx); +int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, + bool other_vport, u16 vport_number, u16 *gvmi); +int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_esw_caps *caps); +int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev); +int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id, + u32 modify_header_id, + u32 vport_id); +int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id); +int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev, + u32 table_type, + u8 num_of_actions, + u64 *actions, + u32 *modify_header_id); +int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev, + u32 modify_header_id); +int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 *group_id); +int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id); +int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, + u32 table_type, + u64 icm_addr_rx, + u64 icm_addr_tx, + u8 level, + bool sw_owner, + bool term_tbl, + u64 *fdb_rx_icm_addr, + u32 *table_id); +int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev, + u32 table_id, + u32 table_type); +int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, + u32 table_id, + struct mlx5dr_cmd_query_flow_table_details *output); +int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, + enum mlx5_reformat_ctx_type rt, + size_t reformat_size, + void *reformat_data, + u32 *reformat_id); +void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev, + u32 reformat_id); + +struct mlx5dr_cmd_gid_attr { + u8 gid[16]; + u8 mac[6]; + u32 roce_ver; +}; + +struct mlx5dr_cmd_qp_create_attr { + u32 page_id; + u32 pdn; + u32 cqn; + u32 pm_state; + u32 service_type; + u32 buff_umem_id; + u32 db_umem_id; + u32 sq_wqe_cnt; + u32 rq_wqe_cnt; + u32 rq_wqe_shift; +}; + +int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, + u16 index, struct mlx5dr_cmd_gid_attr *attr); + +struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, + enum mlx5dr_icm_type icm_type); +void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool); + +struct mlx5dr_icm_chunk * +mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size); +void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk); +bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste); +int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_htbl_connect_info *connect_info, + bool update_hw_ste); +void mlx5dr_ste_set_formatted_ste(u16 gvmi, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, + struct mlx5dr_htbl_connect_info *connect_info); +void mlx5dr_ste_copy_param(u8 match_criteria, + struct mlx5dr_match_param *set_param, + struct mlx5dr_match_parameters *mask); + +void mlx5dr_crc32_init_table(void); +u32 mlx5dr_crc32_slice8_calc(const void *input_data, size_t length); + +struct mlx5dr_qp { + struct mlx5_core_dev *mdev; + struct mlx5_wq_qp wq; + struct mlx5_uars_page *uar; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_qp mqp; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + unsigned int *wqe_head; + unsigned int wqe_cnt; + } sq; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + unsigned int wqe_cnt; + } rq; + int max_inline_data; +}; + +struct mlx5dr_cq { + struct mlx5_core_dev *mdev; + struct mlx5_cqwq wq; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_cq mcq; + struct mlx5dr_qp *qp; +}; + +struct mlx5dr_mr { + struct mlx5_core_dev *mdev; + struct mlx5_core_mkey mkey; + dma_addr_t dma_addr; + void *addr; + size_t size; +}; + +#define MAX_SEND_CQE 64 +#define MIN_READ_SYNC 64 + +struct mlx5dr_send_ring { + struct mlx5dr_cq *cq; + struct mlx5dr_qp *qp; + struct mlx5dr_mr *mr; + /* How much wqes are waiting for completion */ + u32 pending_wqe; + /* Signal request per this trash hold value */ + u16 signal_th; + /* Each post_send_size less than max_post_send_size */ + u32 max_post_send_size; + /* manage the send queue */ + u32 tx_head; + void *buf; + u32 buf_size; + struct ib_wc wc[MAX_SEND_CQE]; + u8 sync_buff[MIN_READ_SYNC]; + struct mlx5dr_mr *sync_mr; +}; + +int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn); +void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring); +int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn); +int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, + struct mlx5dr_ste *ste, + u8 *data, + u16 size, + u16 offset); +int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, u8 *mask); +int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *ste_init_data, + bool update_hw_ste); +int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action); + +struct mlx5dr_fw_recalc_cs_ft { + u64 rx_icm_addr; + u32 table_id; + u32 group_id; + u32 modify_hdr_id; +}; + +struct mlx5dr_fw_recalc_cs_ft * +mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num); +void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn, + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft); +int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn, + u32 vport_num, + u64 *rx_icm_addr); +#endif /* _DR_TYPES_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c new file mode 100644 index 000000000000..3d587d0bdbbe --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -0,0 +1,600 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" +#include "mlx5dr.h" +#include "fs_dr.h" + +static bool mlx5_dr_is_fw_table(u32 flags) +{ + if (flags & MLX5_FLOW_TABLE_TERMINATION) + return true; + + return false; +} + +static int mlx5_cmd_dr_update_root_ft(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect) +{ + return mlx5_fs_cmd_get_fw_cmds()->update_root_ft(ns, ft, underlay_qpn, + disconnect); +} + +static int set_miss_action(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + struct mlx5dr_action *old_miss_action; + struct mlx5dr_action *action = NULL; + struct mlx5dr_table *next_tbl; + int err; + + next_tbl = next_ft ? next_ft->fs_dr_table.dr_table : NULL; + if (next_tbl) { + action = mlx5dr_action_create_dest_table(next_tbl); + if (!action) + return -EINVAL; + } + old_miss_action = ft->fs_dr_table.miss_action; + err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action); + if (err && action) { + err = mlx5dr_action_destroy(action); + if (err) { + action = NULL; + mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n", + err); + } + } + ft->fs_dr_table.miss_action = action; + if (old_miss_action) { + err = mlx5dr_action_destroy(old_miss_action); + if (err) + mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n", + err); + } + + return err; +} + +static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + unsigned int log_size, + struct mlx5_flow_table *next_ft) +{ + struct mlx5dr_table *tbl; + int err; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, + log_size, + next_ft); + + tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, + ft->level); + if (!tbl) { + mlx5_core_err(ns->dev, "Failed creating dr flow_table\n"); + return -EINVAL; + } + + ft->fs_dr_table.dr_table = tbl; + ft->id = mlx5dr_table_get_id(tbl); + + if (next_ft) { + err = set_miss_action(ns, ft, next_ft); + if (err) { + mlx5dr_table_destroy(tbl); + ft->fs_dr_table.dr_table = NULL; + return err; + } + } + + return 0; +} + +static int mlx5_cmd_dr_destroy_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft) +{ + struct mlx5dr_action *action = ft->fs_dr_table.miss_action; + int err; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft); + + err = mlx5dr_table_destroy(ft->fs_dr_table.dr_table); + if (err) { + mlx5_core_err(ns->dev, "Failed to destroy flow_table (%d)\n", + err); + return err; + } + if (action) { + err = mlx5dr_action_destroy(action); + if (err) { + mlx5_core_err(ns->dev, "Failed to destroy action(%d)\n", + err); + return err; + } + } + + return err; +} + +static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + return set_miss_action(ns, ft, next_ft); +} + +static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 *in, + struct mlx5_flow_group *fg) +{ + struct mlx5dr_matcher *matcher; + u16 priority = MLX5_GET(create_flow_group_in, in, + start_flow_index); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + in, + match_criteria_enable); + struct mlx5dr_match_parameters mask; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in, + fg); + + mask.match_buf = MLX5_ADDR_OF(create_flow_group_in, + in, match_criteria); + mask.match_sz = sizeof(fg->mask.match_criteria); + + matcher = mlx5dr_matcher_create(ft->fs_dr_table.dr_table, + priority, + match_criteria_enable, + &mask); + if (!matcher) { + mlx5_core_err(ns->dev, "Failed creating matcher\n"); + return -EINVAL; + } + + fg->fs_dr_matcher.dr_matcher = matcher; + return 0; +} + +static int mlx5_cmd_dr_destroy_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg); + + return mlx5dr_matcher_destroy(fg->fs_dr_matcher.dr_matcher); +} + +static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_destination *dest_attr = &dst->dest_attr; + + return mlx5dr_action_create_dest_vport(domain, dest_attr->vport.num, + dest_attr->vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID, + dest_attr->vport.vhca_id); +} + +static struct mlx5dr_action *create_ft_action(struct mlx5_core_dev *dev, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_table *dest_ft = dst->dest_attr.ft; + + if (mlx5_dr_is_fw_table(dest_ft->flags)) + return mlx5dr_create_action_dest_flow_fw_table(dest_ft, dev); + return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table); +} + +static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domain, + struct mlx5_fs_vlan *vlan) +{ + u16 n_ethtype = vlan->ethtype; + u8 prio = vlan->prio; + u16 vid = vlan->vid; + u32 vlan_hdr; + + vlan_hdr = (u32)n_ethtype << 16 | (u32)(prio) << 12 | (u32)vid; + return mlx5dr_action_create_push_vlan(domain, htonl(vlan_hdr)); +} + +#define MLX5_FLOW_CONTEXT_ACTION_MAX 20 +static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + struct mlx5dr_domain *domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action *term_action = NULL; + struct mlx5dr_match_parameters params; + struct mlx5_core_dev *dev = ns->dev; + struct mlx5dr_action **fs_dr_actions; + struct mlx5dr_action *tmp_action; + struct mlx5dr_action **actions; + bool delay_encap_set = false; + struct mlx5dr_rule *rule; + struct mlx5_flow_rule *dst; + int fs_dr_num_actions = 0; + int num_actions = 0; + size_t match_sz; + int err = 0; + int i; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte); + + actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions), + GFP_KERNEL); + if (!actions) + return -ENOMEM; + + fs_dr_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, + sizeof(*fs_dr_actions), GFP_KERNEL); + if (!fs_dr_actions) { + kfree(actions); + return -ENOMEM; + } + + match_sz = sizeof(fte->val); + + /* The order of the actions are must to be keep, only the following + * order is supported by SW steering: + * TX: push vlan -> modify header -> encap + * RX: decap -> pop vlan -> modify header + */ + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + tmp_action = create_action_push_vlan(domain, &fte->action.vlan[0]); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + tmp_action = create_action_push_vlan(domain, &fte->action.vlan[1]); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + enum mlx5dr_action_reformat_type decap_type = + DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2; + + tmp_action = mlx5dr_action_create_packet_reformat(domain, + decap_type, 0, + NULL); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { + bool is_decap = fte->action.pkt_reformat->reformat_type == + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + + if (is_decap) + actions[num_actions++] = + fte->action.pkt_reformat->action.dr_action; + else + delay_encap_set = true; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { + tmp_action = + mlx5dr_action_create_pop_vlan(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2) { + tmp_action = + mlx5dr_action_create_pop_vlan(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + actions[num_actions++] = + fte->action.modify_hdr->action.dr_action; + + if (delay_encap_set) + actions[num_actions++] = + fte->action.pkt_reformat->action.dr_action; + + /* The order of the actions below is not important */ + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { + tmp_action = mlx5dr_action_create_drop(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_action = tmp_action; + } + + if (fte->flow_context.flow_tag) { + tmp_action = + mlx5dr_action_create_tag(fte->flow_context.flow_tag); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + list_for_each_entry(dst, &fte->node.children, node.list) { + enum mlx5_flow_destination_type type = dst->dest_attr.type; + u32 id; + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -ENOSPC; + goto free_actions; + } + + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_COUNTER: + id = dst->dest_attr.counter_id; + + tmp_action = + mlx5dr_action_create_flow_counter(id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + tmp_action = create_ft_action(dev, dst); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_action = tmp_action; + break; + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + tmp_action = create_vport_action(domain, dst); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_action = tmp_action; + break; + default: + err = -EOPNOTSUPP; + goto free_actions; + } + } + } + + params.match_sz = match_sz; + params.match_buf = (u64 *)fte->val; + + if (term_action) + actions[num_actions++] = term_action; + + rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher, + ¶ms, + num_actions, + actions); + if (!rule) { + err = -EINVAL; + goto free_actions; + } + + kfree(actions); + fte->fs_dr_rule.dr_rule = rule; + fte->fs_dr_rule.num_actions = fs_dr_num_actions; + fte->fs_dr_rule.dr_actions = fs_dr_actions; + + return 0; + +free_actions: + for (i = 0; i < fs_dr_num_actions; i++) + if (!IS_ERR_OR_NULL(fs_dr_actions[i])) + mlx5dr_action_destroy(fs_dr_actions[i]); + + mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err); + kfree(actions); + kfree(fs_dr_actions); + return err; +} + +static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action *action; + int dr_reformat; + + switch (reformat_type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2; + break; + case MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + dr_reformat = DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2; + break; + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3; + break; + default: + mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n", + reformat_type); + return -EOPNOTSUPP; + } + + action = mlx5dr_action_create_packet_reformat(dr_domain, + dr_reformat, + size, + reformat_data); + if (!action) { + mlx5_core_err(ns->dev, "Failed allocating packet-reformat action\n"); + return -EINVAL; + } + + pkt_reformat->action.dr_action = action; + + return 0; +} + +static void mlx5_cmd_dr_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ + mlx5dr_action_destroy(pkt_reformat->action.dr_action); +} + +static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action *action; + size_t actions_sz; + + actions_sz = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) * + num_actions; + action = mlx5dr_action_create_modify_header(dr_domain, 0, + actions_sz, + modify_actions); + if (!action) { + mlx5_core_err(ns->dev, "Failed allocating modify-header action\n"); + return -EINVAL; + } + + modify_hdr->action.dr_action = action; + + return 0; +} + +static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ + mlx5dr_action_destroy(modify_hdr->action.dr_action); +} + +static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + int modify_mask, + struct fs_fte *fte) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + struct mlx5_fs_dr_rule *rule = &fte->fs_dr_rule; + int err; + int i; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte); + + err = mlx5dr_rule_destroy(rule->dr_rule); + if (err) + return err; + + for (i = 0; i < rule->num_actions; i++) + if (!IS_ERR_OR_NULL(rule->dr_actions[i])) + mlx5dr_action_destroy(rule->dr_actions[i]); + + kfree(rule->dr_actions); + return 0; +} + +static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns) +{ + struct mlx5dr_domain *peer_domain = NULL; + + if (peer_ns) + peer_domain = peer_ns->fs_dr_domain.dr_domain; + mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain, + peer_domain); + return 0; +} + +static int mlx5_cmd_dr_create_ns(struct mlx5_flow_root_namespace *ns) +{ + ns->fs_dr_domain.dr_domain = + mlx5dr_domain_create(ns->dev, + MLX5DR_DOMAIN_TYPE_FDB); + if (!ns->fs_dr_domain.dr_domain) { + mlx5_core_err(ns->dev, "Failed to create dr flow namespace\n"); + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns) +{ + return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain); +} + +bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) +{ + return mlx5dr_is_supported(dev); +} + +static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = { + .create_flow_table = mlx5_cmd_dr_create_flow_table, + .destroy_flow_table = mlx5_cmd_dr_destroy_flow_table, + .modify_flow_table = mlx5_cmd_dr_modify_flow_table, + .create_flow_group = mlx5_cmd_dr_create_flow_group, + .destroy_flow_group = mlx5_cmd_dr_destroy_flow_group, + .create_fte = mlx5_cmd_dr_create_fte, + .update_fte = mlx5_cmd_dr_update_fte, + .delete_fte = mlx5_cmd_dr_delete_fte, + .update_root_ft = mlx5_cmd_dr_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_dr_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_dr_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_dr_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_dr_modify_header_dealloc, + .set_peer = mlx5_cmd_dr_set_peer, + .create_ns = mlx5_cmd_dr_create_ns, + .destroy_ns = mlx5_cmd_dr_destroy_ns, +}; + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) +{ + return &mlx5_flow_cmds_dr; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h new file mode 100644 index 000000000000..1fb185d6ac7f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2019 Mellanox Technologies + */ + +#ifndef _MLX5_FS_DR_ +#define _MLX5_FS_DR_ + +#include "mlx5dr.h" + +struct mlx5_flow_root_namespace; +struct fs_fte; + +struct mlx5_fs_dr_action { + struct mlx5dr_action *dr_action; +}; + +struct mlx5_fs_dr_ns { + struct mlx5_dr_ns *dr_ns; +}; + +struct mlx5_fs_dr_rule { + struct mlx5dr_rule *dr_rule; + /* Only actions created by fs_dr */ + struct mlx5dr_action **dr_actions; + int num_actions; +}; + +struct mlx5_fs_dr_domain { + struct mlx5dr_domain *dr_domain; +}; + +struct mlx5_fs_dr_matcher { + struct mlx5dr_matcher *dr_matcher; +}; + +struct mlx5_fs_dr_table { + struct mlx5dr_table *dr_table; + struct mlx5dr_action *miss_action; +}; + +#ifdef CONFIG_MLX5_SW_STEERING + +bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev); + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void); + +#else + +static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) +{ + return NULL; +} + +static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) +{ + return false; +} + +#endif /* CONFIG_MLX5_SW_STEERING */ +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h new file mode 100644 index 000000000000..596c927220d9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h @@ -0,0 +1,604 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef MLX5_IFC_DR_H +#define MLX5_IFC_DR_H + +enum { + MLX5DR_ACTION_MDFY_HW_FLD_L2_0 = 0, + MLX5DR_ACTION_MDFY_HW_FLD_L2_1 = 1, + MLX5DR_ACTION_MDFY_HW_FLD_L2_2 = 2, + MLX5DR_ACTION_MDFY_HW_FLD_L3_0 = 3, + MLX5DR_ACTION_MDFY_HW_FLD_L3_1 = 4, + MLX5DR_ACTION_MDFY_HW_FLD_L3_2 = 5, + MLX5DR_ACTION_MDFY_HW_FLD_L3_3 = 6, + MLX5DR_ACTION_MDFY_HW_FLD_L3_4 = 7, + MLX5DR_ACTION_MDFY_HW_FLD_L4_0 = 8, + MLX5DR_ACTION_MDFY_HW_FLD_L4_1 = 9, + MLX5DR_ACTION_MDFY_HW_FLD_MPLS = 10, + MLX5DR_ACTION_MDFY_HW_FLD_L2_TNL_0 = 11, + MLX5DR_ACTION_MDFY_HW_FLD_REG_0 = 12, + MLX5DR_ACTION_MDFY_HW_FLD_REG_1 = 13, + MLX5DR_ACTION_MDFY_HW_FLD_REG_2 = 14, + MLX5DR_ACTION_MDFY_HW_FLD_REG_3 = 15, + MLX5DR_ACTION_MDFY_HW_FLD_L4_2 = 16, + MLX5DR_ACTION_MDFY_HW_FLD_FLEX_0 = 17, + MLX5DR_ACTION_MDFY_HW_FLD_FLEX_1 = 18, + MLX5DR_ACTION_MDFY_HW_FLD_FLEX_2 = 19, + MLX5DR_ACTION_MDFY_HW_FLD_FLEX_3 = 20, + MLX5DR_ACTION_MDFY_HW_FLD_L2_TNL_1 = 21, + MLX5DR_ACTION_MDFY_HW_FLD_METADATA = 22, + MLX5DR_ACTION_MDFY_HW_FLD_RESERVED = 23, +}; + +enum { + MLX5DR_ACTION_MDFY_HW_OP_SET = 0x2, + MLX5DR_ACTION_MDFY_HW_OP_ADD = 0x3, +}; + +enum { + MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE = 0x0, + MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4 = 0x1, + MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6 = 0x2, +}; + +enum { + MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE = 0x0, + MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP = 0x1, + MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP = 0x2, +}; + +enum { + MLX5DR_STE_LU_TYPE_NOP = 0x00, + MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP = 0x05, + MLX5DR_STE_LU_TYPE_ETHL2_TUNNELING_I = 0x0a, + MLX5DR_STE_LU_TYPE_ETHL2_DST_O = 0x06, + MLX5DR_STE_LU_TYPE_ETHL2_DST_I = 0x07, + MLX5DR_STE_LU_TYPE_ETHL2_DST_D = 0x1b, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_O = 0x08, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_I = 0x09, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_D = 0x1c, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_O = 0x36, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_I = 0x37, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_D = 0x38, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_O = 0x0d, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_I = 0x0e, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_D = 0x1e, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_O = 0x0f, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_I = 0x10, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_D = 0x1f, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x11, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x12, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_D = 0x20, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_O = 0x29, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_I = 0x2a, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_D = 0x2b, + MLX5DR_STE_LU_TYPE_ETHL4_O = 0x13, + MLX5DR_STE_LU_TYPE_ETHL4_I = 0x14, + MLX5DR_STE_LU_TYPE_ETHL4_D = 0x21, + MLX5DR_STE_LU_TYPE_ETHL4_MISC_O = 0x2c, + MLX5DR_STE_LU_TYPE_ETHL4_MISC_I = 0x2d, + MLX5DR_STE_LU_TYPE_ETHL4_MISC_D = 0x2e, + MLX5DR_STE_LU_TYPE_MPLS_FIRST_O = 0x15, + MLX5DR_STE_LU_TYPE_MPLS_FIRST_I = 0x24, + MLX5DR_STE_LU_TYPE_MPLS_FIRST_D = 0x25, + MLX5DR_STE_LU_TYPE_GRE = 0x16, + MLX5DR_STE_LU_TYPE_FLEX_PARSER_0 = 0x22, + MLX5DR_STE_LU_TYPE_FLEX_PARSER_1 = 0x23, + MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x19, + MLX5DR_STE_LU_TYPE_GENERAL_PURPOSE = 0x18, + MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_0 = 0x2f, + MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_1 = 0x30, + MLX5DR_STE_LU_TYPE_DONT_CARE = 0x0f, +}; + +enum mlx5dr_ste_entry_type { + MLX5DR_STE_TYPE_TX = 1, + MLX5DR_STE_TYPE_RX = 2, + MLX5DR_STE_TYPE_MODIFY_PKT = 6, +}; + +struct mlx5_ifc_ste_general_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_5c[0x2]; + u8 next_table_rank[0x2]; + + u8 reserved_at_60[0xa0]; + u8 tag_value[0x60]; + u8 bit_mask[0x60]; +}; + +struct mlx5_ifc_ste_sx_transmit_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_5c[0x2]; + u8 next_table_rank[0x2]; + + u8 sx_wire[0x1]; + u8 sx_func_lb[0x1]; + u8 sx_sniffer[0x1]; + u8 sx_wire_enable[0x1]; + u8 sx_func_lb_enable[0x1]; + u8 sx_sniffer_enable[0x1]; + u8 action_type[0x3]; + u8 reserved_at_69[0x1]; + u8 action_description[0x6]; + u8 gvmi[0x10]; + + u8 encap_pointer_vlan_data[0x20]; + + u8 loopback_syndome_en[0x8]; + u8 loopback_syndome[0x8]; + u8 counter_trigger[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 go_back[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_rx_steering_mult_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_[0x2]; + u8 next_table_rank[0x2]; + + u8 member_count[0x10]; + u8 gvmi[0x10]; + + u8 qp_list_pointer[0x20]; + + u8 reserved_at_a0[0x1]; + u8 tunneling_action[0x3]; + u8 action_description[0x4]; + u8 reserved_at_a8[0x8]; + u8 counter_trigger_15_0[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x08]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 fail_on_error[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_modify_packet_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_[0x2]; + u8 next_table_rank[0x2]; + + u8 number_of_re_write_actions[0x10]; + u8 gvmi[0x10]; + + u8 header_re_write_actions_pointer[0x20]; + + u8 reserved_at_a0[0x1]; + u8 tunneling_action[0x3]; + u8 action_description[0x4]; + u8 reserved_at_a8[0x8]; + u8 counter_trigger_15_0[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x08]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 fail_on_error[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_eth_l2_src_bits { + u8 smac_47_16[0x20]; + + u8 smac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 qp_type[0x2]; + u8 ethertype_filter[0x1]; + u8 reserved_at_43[0x1]; + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 reserved_at_48[0x4]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_52[0x2]; + u8 first_vlan_id[0xc]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 reserved_at_68[0x4]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_dst_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 qp_type[0x2]; + u8 ethertype_filter[0x1]; + u8 reserved_at_43[0x1]; + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 reserved_at_48[0x4]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_52[0x2]; + u8 first_vlan_id[0xc]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 reserved_at_68[0x4]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_src_dst_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 smac_47_32[0x10]; + + u8 smac_31_0[0x20]; + + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 l3_type[0x2]; + u8 reserved_at_66[0x6]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_bits { + u8 destination_address[0x20]; + + u8 source_address[0x20]; + + u8 source_port[0x10]; + u8 destination_port[0x10]; + + u8 fragmented[0x1]; + u8 first_fragment[0x1]; + u8 reserved_at_62[0x2]; + u8 reserved_at_64[0x1]; + u8 ecn[0x2]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 dscp[0x6]; + u8 reserved_at_76[0x2]; + u8 protocol[0x8]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv6_dst_bits { + u8 dst_ip_127_96[0x20]; + + u8 dst_ip_95_64[0x20]; + + u8 dst_ip_63_32[0x20]; + + u8 dst_ip_31_0[0x20]; +}; + +struct mlx5_ifc_ste_eth_l2_tnl_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 l2_tunneling_network_id[0x20]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 reserved_at_6c[0x3]; + u8 gre_key_flag[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv6_src_bits { + u8 src_ip_127_96[0x20]; + + u8 src_ip_95_64[0x20]; + + u8 src_ip_63_32[0x20]; + + u8 src_ip_31_0[0x20]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_misc_bits { + u8 version[0x4]; + u8 ihl[0x4]; + u8 reserved_at_8[0x8]; + u8 total_length[0x10]; + + u8 identification[0x10]; + u8 flags[0x3]; + u8 fragment_offset[0xd]; + + u8 time_to_live[0x8]; + u8 reserved_at_48[0x8]; + u8 checksum[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_eth_l4_bits { + u8 fragmented[0x1]; + u8 first_fragment[0x1]; + u8 reserved_at_2[0x6]; + u8 protocol[0x8]; + u8 dst_port[0x10]; + + u8 ipv6_version[0x4]; + u8 reserved_at_24[0x1]; + u8 ecn[0x2]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 src_port[0x10]; + + u8 ipv6_payload_length[0x10]; + u8 ipv6_hop_limit[0x8]; + u8 dscp[0x6]; + u8 reserved_at_5e[0x2]; + + u8 tcp_data_offset[0x4]; + u8 reserved_at_64[0x8]; + u8 flow_label[0x14]; +}; + +struct mlx5_ifc_ste_eth_l4_misc_bits { + u8 checksum[0x10]; + u8 length[0x10]; + + u8 seq_num[0x20]; + + u8 ack_num[0x20]; + + u8 urgent_pointer[0x10]; + u8 window_size[0x10]; +}; + +struct mlx5_ifc_ste_mpls_bits { + u8 mpls0_label[0x14]; + u8 mpls0_exp[0x3]; + u8 mpls0_s_bos[0x1]; + u8 mpls0_ttl[0x8]; + + u8 mpls1_label[0x20]; + + u8 mpls2_label[0x20]; + + u8 reserved_at_60[0x16]; + u8 mpls4_s_bit[0x1]; + u8 mpls4_qualifier[0x1]; + u8 mpls3_s_bit[0x1]; + u8 mpls3_qualifier[0x1]; + u8 mpls2_s_bit[0x1]; + u8 mpls2_qualifier[0x1]; + u8 mpls1_s_bit[0x1]; + u8 mpls1_qualifier[0x1]; + u8 mpls0_s_bit[0x1]; + u8 mpls0_qualifier[0x1]; +}; + +struct mlx5_ifc_ste_register_0_bits { + u8 register_0_h[0x20]; + + u8 register_0_l[0x20]; + + u8 register_1_h[0x20]; + + u8 register_1_l[0x20]; +}; + +struct mlx5_ifc_ste_register_1_bits { + u8 register_2_h[0x20]; + + u8 register_2_l[0x20]; + + u8 register_3_h[0x20]; + + u8 register_3_l[0x20]; +}; + +struct mlx5_ifc_ste_gre_bits { + u8 gre_c_present[0x1]; + u8 reserved_at_30[0x1]; + u8 gre_k_present[0x1]; + u8 gre_s_present[0x1]; + u8 strict_src_route[0x1]; + u8 recur[0x3]; + u8 flags[0x5]; + u8 version[0x3]; + u8 gre_protocol[0x10]; + + u8 checksum[0x10]; + u8 offset[0x10]; + + u8 gre_key_h[0x18]; + u8 gre_key_l[0x8]; + + u8 seq_num[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_0_bits { + u8 parser_3_label[0x14]; + u8 parser_3_exp[0x3]; + u8 parser_3_s_bos[0x1]; + u8 parser_3_ttl[0x8]; + + u8 flex_parser_2[0x20]; + + u8 flex_parser_1[0x20]; + + u8 flex_parser_0[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_1_bits { + u8 flex_parser_7[0x20]; + + u8 flex_parser_6[0x20]; + + u8 flex_parser_5[0x20]; + + u8 flex_parser_4[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_bits { + u8 flex_parser_tunneling_header_63_32[0x20]; + + u8 flex_parser_tunneling_header_31_0[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_general_purpose_bits { + u8 general_purpose_lookup_field[0x20]; + + u8 reserved_at_20[0x20]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_src_gvmi_qp_bits { + u8 loopback_syndrome[0x8]; + u8 reserved_at_8[0x8]; + u8 source_gvmi[0x10]; + + u8 reserved_at_20[0x5]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 source_is_requestor[0x1]; + u8 source_qp[0x18]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_l2_hdr_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 smac_47_32[0x10]; + + u8 smac_31_0[0x20]; + + u8 ethertype[0x10]; + u8 vlan_type[0x10]; + + u8 vlan[0x10]; + u8 reserved_at_90[0x10]; +}; + +/* Both HW set and HW add share the same HW format with different opcodes */ +struct mlx5_ifc_dr_action_hw_set_bits { + u8 opcode[0x8]; + u8 destination_field_code[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x3]; + u8 destination_length[0x5]; + + u8 inline_data[0x20]; +}; + +#endif /* MLX5_IFC_DR_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h new file mode 100644 index 000000000000..adda9cbfba45 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef _MLX5DR_H_ +#define _MLX5DR_H_ + +struct mlx5dr_domain; +struct mlx5dr_table; +struct mlx5dr_matcher; +struct mlx5dr_rule; +struct mlx5dr_action; + +enum mlx5dr_domain_type { + MLX5DR_DOMAIN_TYPE_NIC_RX, + MLX5DR_DOMAIN_TYPE_NIC_TX, + MLX5DR_DOMAIN_TYPE_FDB, +}; + +enum mlx5dr_domain_sync_flags { + MLX5DR_DOMAIN_SYNC_FLAGS_SW = 1 << 0, + MLX5DR_DOMAIN_SYNC_FLAGS_HW = 1 << 1, +}; + +enum mlx5dr_action_reformat_type { + DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2, + DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2, + DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2, + DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3, +}; + +struct mlx5dr_match_parameters { + size_t match_sz; + u64 *match_buf; /* Device spec format */ +}; + +#ifdef CONFIG_MLX5_SW_STEERING + +struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type); + +int mlx5dr_domain_destroy(struct mlx5dr_domain *domain); + +int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); + +void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn); + +struct mlx5dr_table * +mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level); + +int mlx5dr_table_destroy(struct mlx5dr_table *table); + +u32 mlx5dr_table_get_id(struct mlx5dr_table *table); + +struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *table, + u16 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask); + +int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher); + +struct mlx5dr_rule * +mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[]); + +int mlx5dr_rule_destroy(struct mlx5dr_rule *rule); + +int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action); + +struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *table); + +struct mlx5dr_action * +mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft, + struct mlx5_core_dev *mdev); + +struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, + u32 vport, u8 vhca_id_valid, + u16 vhca_id); + +struct mlx5dr_action *mlx5dr_action_create_drop(void); + +struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value); + +struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id); + +struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + size_t data_sz, + void *data); + +struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain, + u32 flags, + size_t actions_sz, + __be64 actions[]); + +struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void); + +struct mlx5dr_action * +mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, __be32 vlan_hdr); + +int mlx5dr_action_destroy(struct mlx5dr_action *action); + +static inline bool +mlx5dr_is_supported(struct mlx5_core_dev *dev) +{ + return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner); +} + +#else /* CONFIG_MLX5_SW_STEERING */ + +static inline struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) { return NULL; } + +static inline int +mlx5dr_domain_destroy(struct mlx5dr_domain *domain) { return 0; } + +static inline int +mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags) { return 0; } + +static inline void +mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn) { } + +static inline struct mlx5dr_table * +mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level) { return NULL; } + +static inline int +mlx5dr_table_destroy(struct mlx5dr_table *table) { return 0; } + +static inline u32 +mlx5dr_table_get_id(struct mlx5dr_table *table) { return 0; } + +static inline struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *table, + u16 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask) { return NULL; } + +static inline int +mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) { return 0; } + +static inline struct mlx5dr_rule * +mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[]) { return NULL; } + +static inline int +mlx5dr_rule_destroy(struct mlx5dr_rule *rule) { return 0; } + +static inline int +mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action) { return 0; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *table) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft, + struct mlx5_core_dev *mdev) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, + u32 vport, u8 vhca_id_valid, + u16 vhca_id) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_drop(void) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_tag(u32 tag_value) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + size_t data_sz, + void *data) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain, + u32 flags, + size_t actions_sz, + __be64 actions[]) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_pop_vlan(void) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, + __be32 vlan_hdr) { return NULL; } + +static inline int +mlx5dr_action_destroy(struct mlx5dr_action *action) { return 0; } + +static inline bool +mlx5dr_is_supported(struct mlx5_core_dev *dev) { return false; } + +#endif /* CONFIG_MLX5_SW_STEERING */ + +#endif /* _MLX5DR_H_ */ diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 15a8be6bad27..a43140f7b5eb 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -2172,9 +2172,8 @@ static int lan743x_rx_napi_poll(struct napi_struct *napi, int weight) } count = 0; while (count < weight) { - int rx_process_result = -1; + int rx_process_result = lan743x_rx_process_packet(rx); - rx_process_result = lan743x_rx_process_packet(rx); if (rx_process_result == RX_PROCESS_RESULT_PACKET_RECEIVED) { count++; } else if (rx_process_result == diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c index bc9850e4ec5e..0e2db6ea79e9 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c @@ -6,6 +6,7 @@ #include <linux/bug.h> #include <linux/jiffies.h> #include <linux/skbuff.h> +#include <linux/timekeeping.h> #include "../ccm.h" #include "../nfp_app.h" @@ -175,29 +176,151 @@ nfp_bpf_ctrl_reply_val(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply, return &reply->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n]; } +static bool nfp_bpf_ctrl_op_cache_invalidate(enum nfp_ccm_type op) +{ + return op == NFP_CCM_TYPE_BPF_MAP_UPDATE || + op == NFP_CCM_TYPE_BPF_MAP_DELETE; +} + +static bool nfp_bpf_ctrl_op_cache_capable(enum nfp_ccm_type op) +{ + return op == NFP_CCM_TYPE_BPF_MAP_LOOKUP || + op == NFP_CCM_TYPE_BPF_MAP_GETNEXT; +} + +static bool nfp_bpf_ctrl_op_cache_fill(enum nfp_ccm_type op) +{ + return op == NFP_CCM_TYPE_BPF_MAP_GETFIRST || + op == NFP_CCM_TYPE_BPF_MAP_GETNEXT; +} + +static unsigned int +nfp_bpf_ctrl_op_cache_get(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op, + const u8 *key, u8 *out_key, u8 *out_value, + u32 *cache_gen) +{ + struct bpf_map *map = &nfp_map->offmap->map; + struct nfp_app_bpf *bpf = nfp_map->bpf; + unsigned int i, count, n_entries; + struct cmsg_reply_map_op *reply; + + n_entries = nfp_bpf_ctrl_op_cache_fill(op) ? bpf->cmsg_cache_cnt : 1; + + spin_lock(&nfp_map->cache_lock); + *cache_gen = nfp_map->cache_gen; + if (nfp_map->cache_blockers) + n_entries = 1; + + if (nfp_bpf_ctrl_op_cache_invalidate(op)) + goto exit_block; + if (!nfp_bpf_ctrl_op_cache_capable(op)) + goto exit_unlock; + + if (!nfp_map->cache) + goto exit_unlock; + if (nfp_map->cache_to < ktime_get_ns()) + goto exit_invalidate; + + reply = (void *)nfp_map->cache->data; + count = be32_to_cpu(reply->count); + + for (i = 0; i < count; i++) { + void *cached_key; + + cached_key = nfp_bpf_ctrl_reply_key(bpf, reply, i); + if (memcmp(cached_key, key, map->key_size)) + continue; + + if (op == NFP_CCM_TYPE_BPF_MAP_LOOKUP) + memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, i), + map->value_size); + if (op == NFP_CCM_TYPE_BPF_MAP_GETNEXT) { + if (i + 1 == count) + break; + + memcpy(out_key, + nfp_bpf_ctrl_reply_key(bpf, reply, i + 1), + map->key_size); + } + + n_entries = 0; + goto exit_unlock; + } + goto exit_unlock; + +exit_block: + nfp_map->cache_blockers++; +exit_invalidate: + dev_consume_skb_any(nfp_map->cache); + nfp_map->cache = NULL; +exit_unlock: + spin_unlock(&nfp_map->cache_lock); + return n_entries; +} + +static void +nfp_bpf_ctrl_op_cache_put(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op, + struct sk_buff *skb, u32 cache_gen) +{ + bool blocker, filler; + + blocker = nfp_bpf_ctrl_op_cache_invalidate(op); + filler = nfp_bpf_ctrl_op_cache_fill(op); + if (blocker || filler) { + u64 to = 0; + + if (filler) + to = ktime_get_ns() + NFP_BPF_MAP_CACHE_TIME_NS; + + spin_lock(&nfp_map->cache_lock); + if (blocker) { + nfp_map->cache_blockers--; + nfp_map->cache_gen++; + } + if (filler && !nfp_map->cache_blockers && + nfp_map->cache_gen == cache_gen) { + nfp_map->cache_to = to; + swap(nfp_map->cache, skb); + } + spin_unlock(&nfp_map->cache_lock); + } + + dev_consume_skb_any(skb); +} + static int nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op, u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value) { struct nfp_bpf_map *nfp_map = offmap->dev_priv; + unsigned int n_entries, reply_entries, count; struct nfp_app_bpf *bpf = nfp_map->bpf; struct bpf_map *map = &offmap->map; struct cmsg_reply_map_op *reply; struct cmsg_req_map_op *req; struct sk_buff *skb; + u32 cache_gen; int err; /* FW messages have no space for more than 32 bits of flags */ if (flags >> 32) return -EOPNOTSUPP; + /* Handle op cache */ + n_entries = nfp_bpf_ctrl_op_cache_get(nfp_map, op, key, out_key, + out_value, &cache_gen); + if (!n_entries) + return 0; + skb = nfp_bpf_cmsg_map_req_alloc(bpf, 1); - if (!skb) - return -ENOMEM; + if (!skb) { + err = -ENOMEM; + goto err_cache_put; + } req = (void *)skb->data; req->tid = cpu_to_be32(nfp_map->tid); - req->count = cpu_to_be32(1); + req->count = cpu_to_be32(n_entries); req->flags = cpu_to_be32(flags); /* Copy inputs */ @@ -207,16 +330,38 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op, memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value, map->value_size); - skb = nfp_ccm_communicate(&bpf->ccm, skb, op, - nfp_bpf_cmsg_map_reply_size(bpf, 1)); - if (IS_ERR(skb)) - return PTR_ERR(skb); + skb = nfp_ccm_communicate(&bpf->ccm, skb, op, 0); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto err_cache_put; + } + + if (skb->len < sizeof(*reply)) { + cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d!\n", + op, skb->len); + err = -EIO; + goto err_free; + } reply = (void *)skb->data; + count = be32_to_cpu(reply->count); err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr); + /* FW responds with message sized to hold the good entries, + * plus one extra entry if there was an error. + */ + reply_entries = count + !!err; + if (n_entries > 1 && count) + err = 0; if (err) goto err_free; + if (skb->len != nfp_bpf_cmsg_map_reply_size(bpf, reply_entries)) { + cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d for %d entries!\n", + op, skb->len, reply_entries); + err = -EIO; + goto err_free; + } + /* Copy outputs */ if (out_key) memcpy(out_key, nfp_bpf_ctrl_reply_key(bpf, reply, 0), @@ -225,11 +370,13 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op, memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, 0), map->value_size); - dev_consume_skb_any(skb); + nfp_bpf_ctrl_op_cache_put(nfp_map, op, skb, cache_gen); return 0; err_free: dev_kfree_skb_any(skb); +err_cache_put: + nfp_bpf_ctrl_op_cache_put(nfp_map, op, NULL, cache_gen); return err; } @@ -267,11 +414,29 @@ int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, key, NULL, 0, next_key, NULL); } +unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf) +{ + return max(nfp_bpf_cmsg_map_req_size(bpf, 1), + nfp_bpf_cmsg_map_reply_size(bpf, 1)); +} + unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf) { - return max3((unsigned int)NFP_NET_DEFAULT_MTU, - nfp_bpf_cmsg_map_req_size(bpf, 1), - nfp_bpf_cmsg_map_reply_size(bpf, 1)); + return max3(NFP_NET_DEFAULT_MTU, + nfp_bpf_cmsg_map_req_size(bpf, NFP_BPF_MAP_CACHE_CNT), + nfp_bpf_cmsg_map_reply_size(bpf, NFP_BPF_MAP_CACHE_CNT)); +} + +unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf) +{ + unsigned int mtu, req_max, reply_max, entry_sz; + + mtu = bpf->app->ctrl->dp.mtu; + entry_sz = bpf->cmsg_key_sz + bpf->cmsg_val_sz; + req_max = (mtu - sizeof(struct cmsg_req_map_op)) / entry_sz; + reply_max = (mtu - sizeof(struct cmsg_reply_map_op)) / entry_sz; + + return min3(req_max, reply_max, NFP_BPF_MAP_CACHE_CNT); } void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h index 06c4286bd79e..a83a0ad5e27d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h @@ -24,6 +24,7 @@ enum bpf_cap_tlv_type { NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5, NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6, NFP_BPF_CAP_TYPE_ABI_VERSION = 7, + NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT = 8, }; struct nfp_bpf_cap_tlv_func { diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 1c9fb11470df..8f732771d3fa 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -300,6 +300,14 @@ nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value, } static int +nfp_bpf_parse_cap_cmsg_multi_ent(struct nfp_app_bpf *bpf, void __iomem *value, + u32 length) +{ + bpf->cmsg_multi_ent = true; + return 0; +} + +static int nfp_bpf_parse_cap_abi_version(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) { @@ -375,6 +383,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app) length)) goto err_release_free; break; + case NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT: + if (nfp_bpf_parse_cap_cmsg_multi_ent(app->priv, value, + length)) + goto err_release_free; + break; default: nfp_dbg(cpp, "unknown BPF capability: %d\n", type); break; @@ -415,6 +428,25 @@ static void nfp_bpf_ndo_uninit(struct nfp_app *app, struct net_device *netdev) bpf_offload_dev_netdev_unregister(bpf->bpf_dev, netdev); } +static int nfp_bpf_start(struct nfp_app *app) +{ + struct nfp_app_bpf *bpf = app->priv; + + if (app->ctrl->dp.mtu < nfp_bpf_ctrl_cmsg_min_mtu(bpf)) { + nfp_err(bpf->app->cpp, + "ctrl channel MTU below min required %u < %u\n", + app->ctrl->dp.mtu, nfp_bpf_ctrl_cmsg_min_mtu(bpf)); + return -EINVAL; + } + + if (bpf->cmsg_multi_ent) + bpf->cmsg_cache_cnt = nfp_bpf_ctrl_cmsg_cache_cnt(bpf); + else + bpf->cmsg_cache_cnt = 1; + + return 0; +} + static int nfp_bpf_init(struct nfp_app *app) { struct nfp_app_bpf *bpf; @@ -488,6 +520,7 @@ const struct nfp_app_type app_bpf = { .init = nfp_bpf_init, .clean = nfp_bpf_clean, + .start = nfp_bpf_start, .check_mtu = nfp_bpf_check_mtu, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 57d6ff51e980..fac9c6f9e197 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -99,6 +99,7 @@ enum pkt_vec { * @maps_neutral: hash table of offload-neutral maps (on pointer) * * @abi_version: global BPF ABI version + * @cmsg_cache_cnt: number of entries to read for caching * * @adjust_head: adjust head capability * @adjust_head.flags: extra flags for adjust head @@ -124,6 +125,7 @@ enum pkt_vec { * @pseudo_random: FW initialized the pseudo-random machinery (CSRs) * @queue_select: BPF can set the RX queue ID in packet vector * @adjust_tail: BPF can simply trunc packet size for adjust tail + * @cmsg_multi_ent: FW can pack multiple map entries in a single cmsg */ struct nfp_app_bpf { struct nfp_app *app; @@ -134,6 +136,8 @@ struct nfp_app_bpf { unsigned int cmsg_key_sz; unsigned int cmsg_val_sz; + unsigned int cmsg_cache_cnt; + struct list_head map_list; unsigned int maps_in_use; unsigned int map_elems_in_use; @@ -169,6 +173,7 @@ struct nfp_app_bpf { bool pseudo_random; bool queue_select; bool adjust_tail; + bool cmsg_multi_ent; }; enum nfp_bpf_map_use { @@ -183,11 +188,21 @@ struct nfp_bpf_map_word { unsigned char non_zero_update :1; }; +#define NFP_BPF_MAP_CACHE_CNT 4U +#define NFP_BPF_MAP_CACHE_TIME_NS (250 * 1000) + /** * struct nfp_bpf_map - private per-map data attached to BPF maps for offload * @offmap: pointer to the offloaded BPF map * @bpf: back pointer to bpf app private structure * @tid: table id identifying map on datapath + * + * @cache_lock: protects @cache_blockers, @cache_to, @cache + * @cache_blockers: number of ops in flight which block caching + * @cache_gen: counter incremented by every blocker on exit + * @cache_to: time when cache will no longer be valid (ns) + * @cache: skb with cached response + * * @l: link on the nfp_app_bpf->map_list list * @use_map: map of how the value is used (in 4B chunks) */ @@ -195,6 +210,13 @@ struct nfp_bpf_map { struct bpf_offloaded_map *offmap; struct nfp_app_bpf *bpf; u32 tid; + + spinlock_t cache_lock; + u32 cache_blockers; + u32 cache_gen; + u64 cache_to; + struct sk_buff *cache; + struct list_head l; struct nfp_bpf_map_word use_map[]; }; @@ -564,7 +586,9 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv); +unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf); unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf); +unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf); long long int nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map); void diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 39c9fec222b4..88fab6a82acf 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -385,6 +385,7 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) offmap->dev_priv = nfp_map; nfp_map->offmap = offmap; nfp_map->bpf = bpf; + spin_lock_init(&nfp_map->cache_lock); res = nfp_bpf_ctrl_alloc_map(bpf, &offmap->map); if (res < 0) { @@ -407,6 +408,8 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) struct nfp_bpf_map *nfp_map = offmap->dev_priv; nfp_bpf_ctrl_free_map(bpf, nfp_map); + dev_consume_skb_any(nfp_map->cache); + WARN_ON_ONCE(nfp_map->cache_blockers); list_del_init(&nfp_map->l); bpf->map_elems_in_use -= offmap->map.max_entries; bpf->maps_in_use--; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 5d6c3738b494..250f510b1d21 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -66,7 +66,7 @@ #define NFP_NET_MAX_DMA_BITS 40 /* Default size for MTU and freelist buffer sizes */ -#define NFP_NET_DEFAULT_MTU 1500 +#define NFP_NET_DEFAULT_MTU 1500U /* Maximum number of bytes prepended to a packet */ #define NFP_NET_MAX_PREPEND 64 diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 6f97b554f7da..61aabffc8888 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -4116,14 +4116,7 @@ int nfp_net_init(struct nfp_net *nn) /* Set default MTU and Freelist buffer size */ if (!nfp_net_is_data_vnic(nn) && nn->app->ctrl_mtu) { - if (nn->app->ctrl_mtu <= nn->max_mtu) { - nn->dp.mtu = nn->app->ctrl_mtu; - } else { - if (nn->app->ctrl_mtu != NFP_APP_CTRL_MTU_MAX) - nn_warn(nn, "app requested MTU above max supported %u > %u\n", - nn->app->ctrl_mtu, nn->max_mtu); - nn->dp.mtu = nn->max_mtu; - } + nn->dp.mtu = min(nn->app->ctrl_mtu, nn->max_mtu); } else if (nn->max_mtu < NFP_NET_DEFAULT_MTU) { nn->dp.mtu = nn->max_mtu; } else { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 986464d4a206..68db47d8e8b6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -205,10 +205,8 @@ nfp_net_pf_alloc_vnics(struct nfp_pf *pf, void __iomem *ctrl_bar, ctrl_bar += NFP_PF_CSR_SLICE_SIZE; /* Kill the vNIC if app init marked it as invalid */ - if (nn->port && nn->port->type == NFP_PORT_INVALID) { + if (nn->port && nn->port->type == NFP_PORT_INVALID) nfp_net_pf_free_vnic(pf, nn); - continue; - } } if (list_empty(&pf->vnics)) diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig new file mode 100644 index 000000000000..5ea570be8379 --- /dev/null +++ b/drivers/net/ethernet/pensando/Kconfig @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2019 Pensando Systems, Inc +# +# Pensando device configuration +# + +config NET_VENDOR_PENSANDO + bool "Pensando devices" + default y + help + If you have a network (Ethernet) card belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Pensando cards. If you say Y, you will be asked + for your specific card in the following questions. + +if NET_VENDOR_PENSANDO + +config IONIC + tristate "Pensando Ethernet IONIC Support" + depends on 64BIT && PCI + help + This enables the support for the Pensando family of Ethernet + adapters. More specific information on this driver can be + found in + <file:Documentation/networking/device_drivers/pensando/ionic.rst>. + + To compile this driver as a module, choose M here. The module + will be called ionic. + +endif # NET_VENDOR_PENSANDO diff --git a/drivers/net/ethernet/pensando/Makefile b/drivers/net/ethernet/pensando/Makefile new file mode 100644 index 000000000000..21ce7499c122 --- /dev/null +++ b/drivers/net/ethernet/pensando/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Pensando network device drivers. +# + +obj-$(CONFIG_IONIC) += ionic/ diff --git a/drivers/net/ethernet/pensando/ionic/Makefile b/drivers/net/ethernet/pensando/ionic/Makefile new file mode 100644 index 000000000000..29f304d75261 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2017 - 2019 Pensando Systems, Inc + +obj-$(CONFIG_IONIC) := ionic.o + +ionic-y := ionic_main.o ionic_bus_pci.o ionic_devlink.o ionic_dev.o \ + ionic_debugfs.o ionic_lif.o ionic_rx_filter.o ionic_ethtool.o \ + ionic_txrx.o ionic_stats.o diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h new file mode 100644 index 000000000000..7a7060677f15 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#ifndef _IONIC_H_ +#define _IONIC_H_ + +struct ionic_lif; + +#include "ionic_if.h" +#include "ionic_dev.h" +#include "ionic_devlink.h" + +#define IONIC_DRV_NAME "ionic" +#define IONIC_DRV_DESCRIPTION "Pensando Ethernet NIC Driver" +#define IONIC_DRV_VERSION "0.15.0-k" + +#define PCI_VENDOR_ID_PENSANDO 0x1dd8 + +#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF 0x1002 +#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003 + +#define IONIC_SUBDEV_ID_NAPLES_25 0x4000 +#define IONIC_SUBDEV_ID_NAPLES_100_4 0x4001 +#define IONIC_SUBDEV_ID_NAPLES_100_8 0x4002 + +#define DEVCMD_TIMEOUT 10 + +struct ionic { + struct pci_dev *pdev; + struct device *dev; + struct devlink_port dl_port; + struct ionic_dev idev; + struct mutex dev_cmd_lock; /* lock for dev_cmd operations */ + struct dentry *dentry; + struct ionic_dev_bar bars[IONIC_BARS_MAX]; + unsigned int num_bars; + struct ionic_identity ident; + struct list_head lifs; + struct ionic_lif *master_lif; + unsigned int nnqs_per_lif; + unsigned int neqs_per_lif; + unsigned int ntxqs_per_lif; + unsigned int nrxqs_per_lif; + DECLARE_BITMAP(lifbits, IONIC_LIFS_MAX); + unsigned int nintrs; + DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX); + struct work_struct nb_work; + struct notifier_block nb; +}; + +struct ionic_admin_ctx { + struct completion work; + union ionic_adminq_cmd cmd; + union ionic_adminq_comp comp; +}; + +int ionic_napi(struct napi_struct *napi, int budget, ionic_cq_cb cb, + ionic_cq_done_cb done_cb, void *done_arg); + +int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx); +int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait); +int ionic_set_dma_mask(struct ionic *ionic); +int ionic_setup(struct ionic *ionic); + +int ionic_identify(struct ionic *ionic); +int ionic_init(struct ionic *ionic); +int ionic_reset(struct ionic *ionic); + +int ionic_port_identify(struct ionic *ionic); +int ionic_port_init(struct ionic *ionic); +int ionic_port_reset(struct ionic *ionic); + +#endif /* _IONIC_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus.h b/drivers/net/ethernet/pensando/ionic/ionic_bus.h new file mode 100644 index 000000000000..2f4d08c64910 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#ifndef _IONIC_BUS_H_ +#define _IONIC_BUS_H_ + +int ionic_bus_get_irq(struct ionic *ionic, unsigned int num); +const char *ionic_bus_info(struct ionic *ionic); +int ionic_bus_alloc_irq_vectors(struct ionic *ionic, unsigned int nintrs); +void ionic_bus_free_irq_vectors(struct ionic *ionic); +int ionic_bus_register_driver(void); +void ionic_bus_unregister_driver(void); +void __iomem *ionic_bus_map_dbpage(struct ionic *ionic, int page_num); +void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page); + +#endif /* _IONIC_BUS_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c new file mode 100644 index 000000000000..9a9ab8cb2cb3 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/pci.h> + +#include "ionic.h" +#include "ionic_bus.h" +#include "ionic_lif.h" +#include "ionic_debugfs.h" + +/* Supported devices */ +static const struct pci_device_id ionic_id_table[] = { + { PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF) }, + { PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF) }, + { 0, } /* end of table */ +}; +MODULE_DEVICE_TABLE(pci, ionic_id_table); + +int ionic_bus_get_irq(struct ionic *ionic, unsigned int num) +{ + return pci_irq_vector(ionic->pdev, num); +} + +const char *ionic_bus_info(struct ionic *ionic) +{ + return pci_name(ionic->pdev); +} + +int ionic_bus_alloc_irq_vectors(struct ionic *ionic, unsigned int nintrs) +{ + return pci_alloc_irq_vectors(ionic->pdev, nintrs, nintrs, + PCI_IRQ_MSIX); +} + +void ionic_bus_free_irq_vectors(struct ionic *ionic) +{ + pci_free_irq_vectors(ionic->pdev); +} + +static int ionic_map_bars(struct ionic *ionic) +{ + struct pci_dev *pdev = ionic->pdev; + struct device *dev = ionic->dev; + struct ionic_dev_bar *bars; + unsigned int i, j; + + bars = ionic->bars; + ionic->num_bars = 0; + + for (i = 0, j = 0; i < IONIC_BARS_MAX; i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; + bars[j].len = pci_resource_len(pdev, i); + + /* only map the whole bar 0 */ + if (j > 0) { + bars[j].vaddr = NULL; + } else { + bars[j].vaddr = pci_iomap(pdev, i, bars[j].len); + if (!bars[j].vaddr) { + dev_err(dev, + "Cannot memory-map BAR %d, aborting\n", + i); + return -ENODEV; + } + } + + bars[j].bus_addr = pci_resource_start(pdev, i); + bars[j].res_index = i; + ionic->num_bars++; + j++; + } + + return 0; +} + +static void ionic_unmap_bars(struct ionic *ionic) +{ + struct ionic_dev_bar *bars = ionic->bars; + unsigned int i; + + for (i = 0; i < IONIC_BARS_MAX; i++) { + if (bars[i].vaddr) { + iounmap(bars[i].vaddr); + bars[i].bus_addr = 0; + bars[i].vaddr = NULL; + bars[i].len = 0; + } + } +} + +void __iomem *ionic_bus_map_dbpage(struct ionic *ionic, int page_num) +{ + return pci_iomap_range(ionic->pdev, + ionic->bars[IONIC_PCI_BAR_DBELL].res_index, + (u64)page_num << PAGE_SHIFT, PAGE_SIZE); +} + +void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page) +{ + iounmap(page); +} + +static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct ionic *ionic; + int err; + + ionic = ionic_devlink_alloc(dev); + if (!ionic) + return -ENOMEM; + + ionic->pdev = pdev; + ionic->dev = dev; + pci_set_drvdata(pdev, ionic); + mutex_init(&ionic->dev_cmd_lock); + + /* Query system for DMA addressing limitation for the device. */ + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(IONIC_ADDR_LEN)); + if (err) { + dev_err(dev, "Unable to obtain 64-bit DMA for consistent allocations, aborting. err=%d\n", + err); + goto err_out_clear_drvdata; + } + + ionic_debugfs_add_dev(ionic); + + /* Setup PCI device */ + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(dev, "Cannot enable PCI device: %d, aborting\n", err); + goto err_out_debugfs_del_dev; + } + + err = pci_request_regions(pdev, IONIC_DRV_NAME); + if (err) { + dev_err(dev, "Cannot request PCI regions: %d, aborting\n", err); + goto err_out_pci_disable_device; + } + + pci_set_master(pdev); + + err = ionic_map_bars(ionic); + if (err) + goto err_out_pci_clear_master; + + /* Configure the device */ + err = ionic_setup(ionic); + if (err) { + dev_err(dev, "Cannot setup device: %d, aborting\n", err); + goto err_out_unmap_bars; + } + + err = ionic_identify(ionic); + if (err) { + dev_err(dev, "Cannot identify device: %d, aborting\n", err); + goto err_out_teardown; + } + + err = ionic_init(ionic); + if (err) { + dev_err(dev, "Cannot init device: %d, aborting\n", err); + goto err_out_teardown; + } + + /* Configure the ports */ + err = ionic_port_identify(ionic); + if (err) { + dev_err(dev, "Cannot identify port: %d, aborting\n", err); + goto err_out_reset; + } + + err = ionic_port_init(ionic); + if (err) { + dev_err(dev, "Cannot init port: %d, aborting\n", err); + goto err_out_reset; + } + + /* Configure LIFs */ + err = ionic_lif_identify(ionic, IONIC_LIF_TYPE_CLASSIC, + &ionic->ident.lif); + if (err) { + dev_err(dev, "Cannot identify LIFs: %d, aborting\n", err); + goto err_out_port_reset; + } + + err = ionic_lifs_size(ionic); + if (err) { + dev_err(dev, "Cannot size LIFs: %d, aborting\n", err); + goto err_out_port_reset; + } + + err = ionic_lifs_alloc(ionic); + if (err) { + dev_err(dev, "Cannot allocate LIFs: %d, aborting\n", err); + goto err_out_free_irqs; + } + + err = ionic_lifs_init(ionic); + if (err) { + dev_err(dev, "Cannot init LIFs: %d, aborting\n", err); + goto err_out_free_lifs; + } + + err = ionic_lifs_register(ionic); + if (err) { + dev_err(dev, "Cannot register LIFs: %d, aborting\n", err); + goto err_out_deinit_lifs; + } + + err = ionic_devlink_register(ionic); + if (err) { + dev_err(dev, "Cannot register devlink: %d\n", err); + goto err_out_deregister_lifs; + } + + return 0; + +err_out_deregister_lifs: + ionic_lifs_unregister(ionic); +err_out_deinit_lifs: + ionic_lifs_deinit(ionic); +err_out_free_lifs: + ionic_lifs_free(ionic); +err_out_free_irqs: + ionic_bus_free_irq_vectors(ionic); +err_out_port_reset: + ionic_port_reset(ionic); +err_out_reset: + ionic_reset(ionic); +err_out_teardown: + ionic_dev_teardown(ionic); +err_out_unmap_bars: + ionic_unmap_bars(ionic); + pci_release_regions(pdev); +err_out_pci_clear_master: + pci_clear_master(pdev); +err_out_pci_disable_device: + pci_disable_device(pdev); +err_out_debugfs_del_dev: + ionic_debugfs_del_dev(ionic); +err_out_clear_drvdata: + mutex_destroy(&ionic->dev_cmd_lock); + ionic_devlink_free(ionic); + + return err; +} + +static void ionic_remove(struct pci_dev *pdev) +{ + struct ionic *ionic = pci_get_drvdata(pdev); + + if (!ionic) + return; + + ionic_devlink_unregister(ionic); + ionic_lifs_unregister(ionic); + ionic_lifs_deinit(ionic); + ionic_lifs_free(ionic); + ionic_bus_free_irq_vectors(ionic); + ionic_port_reset(ionic); + ionic_reset(ionic); + ionic_dev_teardown(ionic); + ionic_unmap_bars(ionic); + pci_release_regions(pdev); + pci_clear_master(pdev); + pci_disable_device(pdev); + ionic_debugfs_del_dev(ionic); + mutex_destroy(&ionic->dev_cmd_lock); + ionic_devlink_free(ionic); +} + +static struct pci_driver ionic_driver = { + .name = IONIC_DRV_NAME, + .id_table = ionic_id_table, + .probe = ionic_probe, + .remove = ionic_remove, +}; + +int ionic_bus_register_driver(void) +{ + return pci_register_driver(&ionic_driver); +} + +void ionic_bus_unregister_driver(void) +{ + pci_unregister_driver(&ionic_driver); +} diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c new file mode 100644 index 000000000000..7afc4a365b75 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#include <linux/pci.h> +#include <linux/netdevice.h> + +#include "ionic.h" +#include "ionic_bus.h" +#include "ionic_lif.h" +#include "ionic_debugfs.h" + +#ifdef CONFIG_DEBUG_FS + +static struct dentry *ionic_dir; + +void ionic_debugfs_create(void) +{ + ionic_dir = debugfs_create_dir(IONIC_DRV_NAME, NULL); +} + +void ionic_debugfs_destroy(void) +{ + debugfs_remove_recursive(ionic_dir); +} + +void ionic_debugfs_add_dev(struct ionic *ionic) +{ + ionic->dentry = debugfs_create_dir(ionic_bus_info(ionic), ionic_dir); +} + +void ionic_debugfs_del_dev(struct ionic *ionic) +{ + debugfs_remove_recursive(ionic->dentry); + ionic->dentry = NULL; +} + +static int identity_show(struct seq_file *seq, void *v) +{ + struct ionic *ionic = seq->private; + struct ionic_identity *ident; + + ident = &ionic->ident; + + seq_printf(seq, "nlifs: %d\n", ident->dev.nlifs); + seq_printf(seq, "nintrs: %d\n", ident->dev.nintrs); + seq_printf(seq, "ndbpgs_per_lif: %d\n", ident->dev.ndbpgs_per_lif); + seq_printf(seq, "intr_coal_mult: %d\n", ident->dev.intr_coal_mult); + seq_printf(seq, "intr_coal_div: %d\n", ident->dev.intr_coal_div); + + seq_printf(seq, "max_ucast_filters: %d\n", ident->lif.eth.max_ucast_filters); + seq_printf(seq, "max_mcast_filters: %d\n", ident->lif.eth.max_mcast_filters); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(identity); + +void ionic_debugfs_add_ident(struct ionic *ionic) +{ + debugfs_create_file("identity", 0400, ionic->dentry, + ionic, &identity_fops) ? 0 : -EOPNOTSUPP; +} + +void ionic_debugfs_add_sizes(struct ionic *ionic) +{ + debugfs_create_u32("nlifs", 0400, ionic->dentry, + (u32 *)&ionic->ident.dev.nlifs); + debugfs_create_u32("nintrs", 0400, ionic->dentry, &ionic->nintrs); + + debugfs_create_u32("ntxqs_per_lif", 0400, ionic->dentry, + (u32 *)&ionic->ident.lif.eth.config.queue_count[IONIC_QTYPE_TXQ]); + debugfs_create_u32("nrxqs_per_lif", 0400, ionic->dentry, + (u32 *)&ionic->ident.lif.eth.config.queue_count[IONIC_QTYPE_RXQ]); +} + +static int q_tail_show(struct seq_file *seq, void *v) +{ + struct ionic_queue *q = seq->private; + + seq_printf(seq, "%d\n", q->tail->index); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(q_tail); + +static int q_head_show(struct seq_file *seq, void *v) +{ + struct ionic_queue *q = seq->private; + + seq_printf(seq, "%d\n", q->head->index); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(q_head); + +static int cq_tail_show(struct seq_file *seq, void *v) +{ + struct ionic_cq *cq = seq->private; + + seq_printf(seq, "%d\n", cq->tail->index); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(cq_tail); + +static const struct debugfs_reg32 intr_ctrl_regs[] = { + { .name = "coal_init", .offset = 0, }, + { .name = "mask", .offset = 4, }, + { .name = "credits", .offset = 8, }, + { .name = "mask_on_assert", .offset = 12, }, + { .name = "coal_timer", .offset = 16, }, +}; + +void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq) +{ + struct dentry *q_dentry, *cq_dentry, *intr_dentry, *stats_dentry; + struct ionic_dev *idev = &lif->ionic->idev; + struct debugfs_regset32 *intr_ctrl_regset; + struct ionic_intr_info *intr = &qcq->intr; + struct debugfs_blob_wrapper *desc_blob; + struct device *dev = lif->ionic->dev; + struct ionic_queue *q = &qcq->q; + struct ionic_cq *cq = &qcq->cq; + + qcq->dentry = debugfs_create_dir(q->name, lif->dentry); + + debugfs_create_x32("total_size", 0400, qcq->dentry, &qcq->total_size); + debugfs_create_x64("base_pa", 0400, qcq->dentry, &qcq->base_pa); + + q_dentry = debugfs_create_dir("q", qcq->dentry); + + debugfs_create_u32("index", 0400, q_dentry, &q->index); + debugfs_create_x64("base_pa", 0400, q_dentry, &q->base_pa); + if (qcq->flags & IONIC_QCQ_F_SG) { + debugfs_create_x64("sg_base_pa", 0400, q_dentry, + &q->sg_base_pa); + debugfs_create_u32("sg_desc_size", 0400, q_dentry, + &q->sg_desc_size); + } + debugfs_create_u32("num_descs", 0400, q_dentry, &q->num_descs); + debugfs_create_u32("desc_size", 0400, q_dentry, &q->desc_size); + debugfs_create_u32("pid", 0400, q_dentry, &q->pid); + debugfs_create_u32("qid", 0400, q_dentry, &q->hw_index); + debugfs_create_u32("qtype", 0400, q_dentry, &q->hw_type); + debugfs_create_u64("drop", 0400, q_dentry, &q->drop); + debugfs_create_u64("stop", 0400, q_dentry, &q->stop); + debugfs_create_u64("wake", 0400, q_dentry, &q->wake); + + debugfs_create_file("tail", 0400, q_dentry, q, &q_tail_fops); + debugfs_create_file("head", 0400, q_dentry, q, &q_head_fops); + + desc_blob = devm_kzalloc(dev, sizeof(*desc_blob), GFP_KERNEL); + if (!desc_blob) + return; + desc_blob->data = q->base; + desc_blob->size = (unsigned long)q->num_descs * q->desc_size; + debugfs_create_blob("desc_blob", 0400, q_dentry, desc_blob); + + if (qcq->flags & IONIC_QCQ_F_SG) { + desc_blob = devm_kzalloc(dev, sizeof(*desc_blob), GFP_KERNEL); + if (!desc_blob) + return; + desc_blob->data = q->sg_base; + desc_blob->size = (unsigned long)q->num_descs * q->sg_desc_size; + debugfs_create_blob("sg_desc_blob", 0400, q_dentry, + desc_blob); + } + + cq_dentry = debugfs_create_dir("cq", qcq->dentry); + + debugfs_create_x64("base_pa", 0400, cq_dentry, &cq->base_pa); + debugfs_create_u32("num_descs", 0400, cq_dentry, &cq->num_descs); + debugfs_create_u32("desc_size", 0400, cq_dentry, &cq->desc_size); + debugfs_create_u8("done_color", 0400, cq_dentry, + (u8 *)&cq->done_color); + + debugfs_create_file("tail", 0400, cq_dentry, cq, &cq_tail_fops); + + desc_blob = devm_kzalloc(dev, sizeof(*desc_blob), GFP_KERNEL); + if (!desc_blob) + return; + desc_blob->data = cq->base; + desc_blob->size = (unsigned long)cq->num_descs * cq->desc_size; + debugfs_create_blob("desc_blob", 0400, cq_dentry, desc_blob); + + if (qcq->flags & IONIC_QCQ_F_INTR) { + intr_dentry = debugfs_create_dir("intr", qcq->dentry); + + debugfs_create_u32("index", 0400, intr_dentry, + &intr->index); + debugfs_create_u32("vector", 0400, intr_dentry, + &intr->vector); + + intr_ctrl_regset = devm_kzalloc(dev, sizeof(*intr_ctrl_regset), + GFP_KERNEL); + if (!intr_ctrl_regset) + return; + intr_ctrl_regset->regs = intr_ctrl_regs; + intr_ctrl_regset->nregs = ARRAY_SIZE(intr_ctrl_regs); + intr_ctrl_regset->base = &idev->intr_ctrl[intr->index]; + + debugfs_create_regset32("intr_ctrl", 0400, intr_dentry, + intr_ctrl_regset); + } + + if (qcq->flags & IONIC_QCQ_F_NOTIFYQ) { + stats_dentry = debugfs_create_dir("notifyblock", qcq->dentry); + + debugfs_create_u64("eid", 0400, stats_dentry, + (u64 *)&lif->info->status.eid); + debugfs_create_u16("link_status", 0400, stats_dentry, + (u16 *)&lif->info->status.link_status); + debugfs_create_u32("link_speed", 0400, stats_dentry, + (u32 *)&lif->info->status.link_speed); + debugfs_create_u16("link_down_count", 0400, stats_dentry, + (u16 *)&lif->info->status.link_down_count); + } +} + +static int netdev_show(struct seq_file *seq, void *v) +{ + struct net_device *netdev = seq->private; + + seq_printf(seq, "%s\n", netdev->name); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(netdev); + +void ionic_debugfs_add_lif(struct ionic_lif *lif) +{ + lif->dentry = debugfs_create_dir(lif->name, lif->ionic->dentry); + debugfs_create_file("netdev", 0400, lif->dentry, + lif->netdev, &netdev_fops); +} + +void ionic_debugfs_del_lif(struct ionic_lif *lif) +{ + debugfs_remove_recursive(lif->dentry); + lif->dentry = NULL; +} + +void ionic_debugfs_del_qcq(struct ionic_qcq *qcq) +{ + debugfs_remove_recursive(qcq->dentry); + qcq->dentry = NULL; +} + +#endif diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.h b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.h new file mode 100644 index 000000000000..c44ebde170b6 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#ifndef _IONIC_DEBUGFS_H_ +#define _IONIC_DEBUGFS_H_ + +#include <linux/debugfs.h> + +#ifdef CONFIG_DEBUG_FS + +void ionic_debugfs_create(void); +void ionic_debugfs_destroy(void); +void ionic_debugfs_add_dev(struct ionic *ionic); +void ionic_debugfs_del_dev(struct ionic *ionic); +void ionic_debugfs_add_ident(struct ionic *ionic); +void ionic_debugfs_add_sizes(struct ionic *ionic); +void ionic_debugfs_add_lif(struct ionic_lif *lif); +void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq); +void ionic_debugfs_del_lif(struct ionic_lif *lif); +void ionic_debugfs_del_qcq(struct ionic_qcq *qcq); +#else +static inline void ionic_debugfs_create(void) { } +static inline void ionic_debugfs_destroy(void) { } +static inline void ionic_debugfs_add_dev(struct ionic *ionic) { } +static inline void ionic_debugfs_del_dev(struct ionic *ionic) { } +static inline void ionic_debugfs_add_ident(struct ionic *ionic) { } +static inline void ionic_debugfs_add_sizes(struct ionic *ionic) { } +static inline void ionic_debugfs_add_lif(struct ionic_lif *lif) { } +static inline void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq) { } +static inline void ionic_debugfs_del_lif(struct ionic_lif *lif) { } +static inline void ionic_debugfs_del_qcq(struct ionic_qcq *qcq) { } +#endif + +#endif /* _IONIC_DEBUGFS_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c new file mode 100644 index 000000000000..d168a6435322 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -0,0 +1,500 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/etherdevice.h> +#include "ionic.h" +#include "ionic_dev.h" +#include "ionic_lif.h" + +void ionic_init_devinfo(struct ionic *ionic) +{ + struct ionic_dev *idev = &ionic->idev; + + idev->dev_info.asic_type = ioread8(&idev->dev_info_regs->asic_type); + idev->dev_info.asic_rev = ioread8(&idev->dev_info_regs->asic_rev); + + memcpy_fromio(idev->dev_info.fw_version, + idev->dev_info_regs->fw_version, + IONIC_DEVINFO_FWVERS_BUFLEN); + + memcpy_fromio(idev->dev_info.serial_num, + idev->dev_info_regs->serial_num, + IONIC_DEVINFO_SERIAL_BUFLEN); + + idev->dev_info.fw_version[IONIC_DEVINFO_FWVERS_BUFLEN] = 0; + idev->dev_info.serial_num[IONIC_DEVINFO_SERIAL_BUFLEN] = 0; + + dev_dbg(ionic->dev, "fw_version %s\n", idev->dev_info.fw_version); +} + +int ionic_dev_setup(struct ionic *ionic) +{ + struct ionic_dev_bar *bar = ionic->bars; + unsigned int num_bars = ionic->num_bars; + struct ionic_dev *idev = &ionic->idev; + struct device *dev = ionic->dev; + u32 sig; + + /* BAR0: dev_cmd and interrupts */ + if (num_bars < 1) { + dev_err(dev, "No bars found, aborting\n"); + return -EFAULT; + } + + if (bar->len < IONIC_BAR0_SIZE) { + dev_err(dev, "Resource bar size %lu too small, aborting\n", + bar->len); + return -EFAULT; + } + + idev->dev_info_regs = bar->vaddr + IONIC_BAR0_DEV_INFO_REGS_OFFSET; + idev->dev_cmd_regs = bar->vaddr + IONIC_BAR0_DEV_CMD_REGS_OFFSET; + idev->intr_status = bar->vaddr + IONIC_BAR0_INTR_STATUS_OFFSET; + idev->intr_ctrl = bar->vaddr + IONIC_BAR0_INTR_CTRL_OFFSET; + + sig = ioread32(&idev->dev_info_regs->signature); + if (sig != IONIC_DEV_INFO_SIGNATURE) { + dev_err(dev, "Incompatible firmware signature %x", sig); + return -EFAULT; + } + + ionic_init_devinfo(ionic); + + /* BAR1: doorbells */ + bar++; + if (num_bars < 2) { + dev_err(dev, "Doorbell bar missing, aborting\n"); + return -EFAULT; + } + + idev->db_pages = bar->vaddr; + idev->phy_db_pages = bar->bus_addr; + + return 0; +} + +void ionic_dev_teardown(struct ionic *ionic) +{ + /* place holder */ +} + +/* Devcmd Interface */ +u8 ionic_dev_cmd_status(struct ionic_dev *idev) +{ + return ioread8(&idev->dev_cmd_regs->comp.comp.status); +} + +bool ionic_dev_cmd_done(struct ionic_dev *idev) +{ + return ioread32(&idev->dev_cmd_regs->done) & IONIC_DEV_CMD_DONE; +} + +void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp) +{ + memcpy_fromio(comp, &idev->dev_cmd_regs->comp, sizeof(*comp)); +} + +void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd) +{ + memcpy_toio(&idev->dev_cmd_regs->cmd, cmd, sizeof(*cmd)); + iowrite32(0, &idev->dev_cmd_regs->done); + iowrite32(1, &idev->dev_cmd_regs->doorbell); +} + +/* Device commands */ +void ionic_dev_cmd_identify(struct ionic_dev *idev, u8 ver) +{ + union ionic_dev_cmd cmd = { + .identify.opcode = IONIC_CMD_IDENTIFY, + .identify.ver = ver, + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +void ionic_dev_cmd_init(struct ionic_dev *idev) +{ + union ionic_dev_cmd cmd = { + .init.opcode = IONIC_CMD_INIT, + .init.type = 0, + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +void ionic_dev_cmd_reset(struct ionic_dev *idev) +{ + union ionic_dev_cmd cmd = { + .reset.opcode = IONIC_CMD_RESET, + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +/* Port commands */ +void ionic_dev_cmd_port_identify(struct ionic_dev *idev) +{ + union ionic_dev_cmd cmd = { + .port_init.opcode = IONIC_CMD_PORT_IDENTIFY, + .port_init.index = 0, + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +void ionic_dev_cmd_port_init(struct ionic_dev *idev) +{ + union ionic_dev_cmd cmd = { + .port_init.opcode = IONIC_CMD_PORT_INIT, + .port_init.index = 0, + .port_init.info_pa = cpu_to_le64(idev->port_info_pa), + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +void ionic_dev_cmd_port_reset(struct ionic_dev *idev) +{ + union ionic_dev_cmd cmd = { + .port_reset.opcode = IONIC_CMD_PORT_RESET, + .port_reset.index = 0, + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +void ionic_dev_cmd_port_state(struct ionic_dev *idev, u8 state) +{ + union ionic_dev_cmd cmd = { + .port_setattr.opcode = IONIC_CMD_PORT_SETATTR, + .port_setattr.index = 0, + .port_setattr.attr = IONIC_PORT_ATTR_STATE, + .port_setattr.state = state, + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +void ionic_dev_cmd_port_speed(struct ionic_dev *idev, u32 speed) +{ + union ionic_dev_cmd cmd = { + .port_setattr.opcode = IONIC_CMD_PORT_SETATTR, + .port_setattr.index = 0, + .port_setattr.attr = IONIC_PORT_ATTR_SPEED, + .port_setattr.speed = cpu_to_le32(speed), + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable) +{ + union ionic_dev_cmd cmd = { + .port_setattr.opcode = IONIC_CMD_PORT_SETATTR, + .port_setattr.index = 0, + .port_setattr.attr = IONIC_PORT_ATTR_AUTONEG, + .port_setattr.an_enable = an_enable, + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type) +{ + union ionic_dev_cmd cmd = { + .port_setattr.opcode = IONIC_CMD_PORT_SETATTR, + .port_setattr.index = 0, + .port_setattr.attr = IONIC_PORT_ATTR_FEC, + .port_setattr.fec_type = fec_type, + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type) +{ + union ionic_dev_cmd cmd = { + .port_setattr.opcode = IONIC_CMD_PORT_SETATTR, + .port_setattr.index = 0, + .port_setattr.attr = IONIC_PORT_ATTR_PAUSE, + .port_setattr.pause_type = pause_type, + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +/* LIF commands */ +void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver) +{ + union ionic_dev_cmd cmd = { + .lif_identify.opcode = IONIC_CMD_LIF_IDENTIFY, + .lif_identify.type = type, + .lif_identify.ver = ver, + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +void ionic_dev_cmd_lif_init(struct ionic_dev *idev, u16 lif_index, + dma_addr_t info_pa) +{ + union ionic_dev_cmd cmd = { + .lif_init.opcode = IONIC_CMD_LIF_INIT, + .lif_init.index = cpu_to_le16(lif_index), + .lif_init.info_pa = cpu_to_le64(info_pa), + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +void ionic_dev_cmd_lif_reset(struct ionic_dev *idev, u16 lif_index) +{ + union ionic_dev_cmd cmd = { + .lif_init.opcode = IONIC_CMD_LIF_RESET, + .lif_init.index = cpu_to_le16(lif_index), + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +void ionic_dev_cmd_adminq_init(struct ionic_dev *idev, struct ionic_qcq *qcq, + u16 lif_index, u16 intr_index) +{ + struct ionic_queue *q = &qcq->q; + struct ionic_cq *cq = &qcq->cq; + + union ionic_dev_cmd cmd = { + .q_init.opcode = IONIC_CMD_Q_INIT, + .q_init.lif_index = cpu_to_le16(lif_index), + .q_init.type = q->type, + .q_init.index = cpu_to_le32(q->index), + .q_init.flags = cpu_to_le16(IONIC_QINIT_F_IRQ | + IONIC_QINIT_F_ENA), + .q_init.pid = cpu_to_le16(q->pid), + .q_init.intr_index = cpu_to_le16(intr_index), + .q_init.ring_size = ilog2(q->num_descs), + .q_init.ring_base = cpu_to_le64(q->base_pa), + .q_init.cq_ring_base = cpu_to_le64(cq->base_pa), + }; + + ionic_dev_cmd_go(idev, &cmd); +} + +int ionic_db_page_num(struct ionic_lif *lif, int pid) +{ + return (lif->hw_index * lif->dbid_count) + pid; +} + +int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq, + struct ionic_intr_info *intr, + unsigned int num_descs, size_t desc_size) +{ + struct ionic_cq_info *cur; + unsigned int ring_size; + unsigned int i; + + if (desc_size == 0 || !is_power_of_2(num_descs)) + return -EINVAL; + + ring_size = ilog2(num_descs); + if (ring_size < 2 || ring_size > 16) + return -EINVAL; + + cq->lif = lif; + cq->bound_intr = intr; + cq->num_descs = num_descs; + cq->desc_size = desc_size; + cq->tail = cq->info; + cq->done_color = 1; + + cur = cq->info; + + for (i = 0; i < num_descs; i++) { + if (i + 1 == num_descs) { + cur->next = cq->info; + cur->last = true; + } else { + cur->next = cur + 1; + } + cur->index = i; + cur++; + } + + return 0; +} + +void ionic_cq_map(struct ionic_cq *cq, void *base, dma_addr_t base_pa) +{ + struct ionic_cq_info *cur; + unsigned int i; + + cq->base = base; + cq->base_pa = base_pa; + + for (i = 0, cur = cq->info; i < cq->num_descs; i++, cur++) + cur->cq_desc = base + (i * cq->desc_size); +} + +void ionic_cq_bind(struct ionic_cq *cq, struct ionic_queue *q) +{ + cq->bound_q = q; +} + +unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do, + ionic_cq_cb cb, ionic_cq_done_cb done_cb, + void *done_arg) +{ + unsigned int work_done = 0; + + if (work_to_do == 0) + return 0; + + while (cb(cq, cq->tail)) { + if (cq->tail->last) + cq->done_color = !cq->done_color; + cq->tail = cq->tail->next; + DEBUG_STATS_CQE_CNT(cq); + + if (++work_done >= work_to_do) + break; + } + + if (work_done && done_cb) + done_cb(done_arg); + + return work_done; +} + +int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev, + struct ionic_queue *q, unsigned int index, const char *name, + unsigned int num_descs, size_t desc_size, + size_t sg_desc_size, unsigned int pid) +{ + struct ionic_desc_info *cur; + unsigned int ring_size; + unsigned int i; + + if (desc_size == 0 || !is_power_of_2(num_descs)) + return -EINVAL; + + ring_size = ilog2(num_descs); + if (ring_size < 2 || ring_size > 16) + return -EINVAL; + + q->lif = lif; + q->idev = idev; + q->index = index; + q->num_descs = num_descs; + q->desc_size = desc_size; + q->sg_desc_size = sg_desc_size; + q->tail = q->info; + q->head = q->tail; + q->pid = pid; + + snprintf(q->name, sizeof(q->name), "L%d-%s%u", lif->index, name, index); + + cur = q->info; + + for (i = 0; i < num_descs; i++) { + if (i + 1 == num_descs) + cur->next = q->info; + else + cur->next = cur + 1; + cur->index = i; + cur->left = num_descs - i; + cur++; + } + + return 0; +} + +void ionic_q_map(struct ionic_queue *q, void *base, dma_addr_t base_pa) +{ + struct ionic_desc_info *cur; + unsigned int i; + + q->base = base; + q->base_pa = base_pa; + + for (i = 0, cur = q->info; i < q->num_descs; i++, cur++) + cur->desc = base + (i * q->desc_size); +} + +void ionic_q_sg_map(struct ionic_queue *q, void *base, dma_addr_t base_pa) +{ + struct ionic_desc_info *cur; + unsigned int i; + + q->sg_base = base; + q->sg_base_pa = base_pa; + + for (i = 0, cur = q->info; i < q->num_descs; i++, cur++) + cur->sg_desc = base + (i * q->sg_desc_size); +} + +void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb, + void *cb_arg) +{ + struct device *dev = q->lif->ionic->dev; + struct ionic_lif *lif = q->lif; + + q->head->cb = cb; + q->head->cb_arg = cb_arg; + q->head = q->head->next; + + dev_dbg(dev, "lif=%d qname=%s qid=%d qtype=%d p_index=%d ringdb=%d\n", + q->lif->index, q->name, q->hw_type, q->hw_index, + q->head->index, ring_doorbell); + + if (ring_doorbell) + ionic_dbell_ring(lif->kern_dbpage, q->hw_type, + q->dbval | q->head->index); +} + +static bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos) +{ + unsigned int mask, tail, head; + + mask = q->num_descs - 1; + tail = q->tail->index; + head = q->head->index; + + return ((pos - tail) & mask) < ((head - tail) & mask); +} + +void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info, + unsigned int stop_index) +{ + struct ionic_desc_info *desc_info; + ionic_desc_cb cb; + void *cb_arg; + + /* check for empty queue */ + if (q->tail->index == q->head->index) + return; + + /* stop index must be for a descriptor that is not yet completed */ + if (unlikely(!ionic_q_is_posted(q, stop_index))) + dev_err(q->lif->ionic->dev, + "ionic stop is not posted %s stop %u tail %u head %u\n", + q->name, stop_index, q->tail->index, q->head->index); + + do { + desc_info = q->tail; + q->tail = desc_info->next; + + cb = desc_info->cb; + cb_arg = desc_info->cb_arg; + + desc_info->cb = NULL; + desc_info->cb_arg = NULL; + + if (cb) + cb(q, desc_info, cq_info, cb_arg); + } while (desc_info->index != stop_index); +} diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h new file mode 100644 index 000000000000..9610aeb7d5f4 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -0,0 +1,299 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#ifndef _IONIC_DEV_H_ +#define _IONIC_DEV_H_ + +#include <linux/mutex.h> +#include <linux/workqueue.h> + +#include "ionic_if.h" +#include "ionic_regs.h" + +#define IONIC_MIN_MTU ETH_MIN_MTU +#define IONIC_MAX_MTU 9194 +#define IONIC_MAX_TXRX_DESC 16384 +#define IONIC_MIN_TXRX_DESC 16 +#define IONIC_DEF_TXRX_DESC 4096 +#define IONIC_LIFS_MAX 1024 +#define IONIC_ITR_COAL_USEC_DEFAULT 64 + +#define IONIC_DEV_CMD_REG_VERSION 1 +#define IONIC_DEV_INFO_REG_COUNT 32 +#define IONIC_DEV_CMD_REG_COUNT 32 + +struct ionic_dev_bar { + void __iomem *vaddr; + phys_addr_t bus_addr; + unsigned long len; + int res_index; +}; + +/* Registers */ +static_assert(sizeof(struct ionic_intr) == 32); + +static_assert(sizeof(struct ionic_doorbell) == 8); +static_assert(sizeof(struct ionic_intr_status) == 8); +static_assert(sizeof(union ionic_dev_regs) == 4096); +static_assert(sizeof(union ionic_dev_info_regs) == 2048); +static_assert(sizeof(union ionic_dev_cmd_regs) == 2048); +static_assert(sizeof(struct ionic_lif_stats) == 1024); + +static_assert(sizeof(struct ionic_admin_cmd) == 64); +static_assert(sizeof(struct ionic_admin_comp) == 16); +static_assert(sizeof(struct ionic_nop_cmd) == 64); +static_assert(sizeof(struct ionic_nop_comp) == 16); + +/* Device commands */ +static_assert(sizeof(struct ionic_dev_identify_cmd) == 64); +static_assert(sizeof(struct ionic_dev_identify_comp) == 16); +static_assert(sizeof(struct ionic_dev_init_cmd) == 64); +static_assert(sizeof(struct ionic_dev_init_comp) == 16); +static_assert(sizeof(struct ionic_dev_reset_cmd) == 64); +static_assert(sizeof(struct ionic_dev_reset_comp) == 16); +static_assert(sizeof(struct ionic_dev_getattr_cmd) == 64); +static_assert(sizeof(struct ionic_dev_getattr_comp) == 16); +static_assert(sizeof(struct ionic_dev_setattr_cmd) == 64); +static_assert(sizeof(struct ionic_dev_setattr_comp) == 16); + +/* Port commands */ +static_assert(sizeof(struct ionic_port_identify_cmd) == 64); +static_assert(sizeof(struct ionic_port_identify_comp) == 16); +static_assert(sizeof(struct ionic_port_init_cmd) == 64); +static_assert(sizeof(struct ionic_port_init_comp) == 16); +static_assert(sizeof(struct ionic_port_reset_cmd) == 64); +static_assert(sizeof(struct ionic_port_reset_comp) == 16); +static_assert(sizeof(struct ionic_port_getattr_cmd) == 64); +static_assert(sizeof(struct ionic_port_getattr_comp) == 16); +static_assert(sizeof(struct ionic_port_setattr_cmd) == 64); +static_assert(sizeof(struct ionic_port_setattr_comp) == 16); + +/* LIF commands */ +static_assert(sizeof(struct ionic_lif_init_cmd) == 64); +static_assert(sizeof(struct ionic_lif_init_comp) == 16); +static_assert(sizeof(struct ionic_lif_reset_cmd) == 64); +static_assert(sizeof(ionic_lif_reset_comp) == 16); +static_assert(sizeof(struct ionic_lif_getattr_cmd) == 64); +static_assert(sizeof(struct ionic_lif_getattr_comp) == 16); +static_assert(sizeof(struct ionic_lif_setattr_cmd) == 64); +static_assert(sizeof(struct ionic_lif_setattr_comp) == 16); + +static_assert(sizeof(struct ionic_q_init_cmd) == 64); +static_assert(sizeof(struct ionic_q_init_comp) == 16); +static_assert(sizeof(struct ionic_q_control_cmd) == 64); +static_assert(sizeof(ionic_q_control_comp) == 16); + +static_assert(sizeof(struct ionic_rx_mode_set_cmd) == 64); +static_assert(sizeof(ionic_rx_mode_set_comp) == 16); +static_assert(sizeof(struct ionic_rx_filter_add_cmd) == 64); +static_assert(sizeof(struct ionic_rx_filter_add_comp) == 16); +static_assert(sizeof(struct ionic_rx_filter_del_cmd) == 64); +static_assert(sizeof(ionic_rx_filter_del_comp) == 16); + +/* RDMA commands */ +static_assert(sizeof(struct ionic_rdma_reset_cmd) == 64); +static_assert(sizeof(struct ionic_rdma_queue_cmd) == 64); + +/* Events */ +static_assert(sizeof(struct ionic_notifyq_cmd) == 4); +static_assert(sizeof(union ionic_notifyq_comp) == 64); +static_assert(sizeof(struct ionic_notifyq_event) == 64); +static_assert(sizeof(struct ionic_link_change_event) == 64); +static_assert(sizeof(struct ionic_reset_event) == 64); +static_assert(sizeof(struct ionic_heartbeat_event) == 64); +static_assert(sizeof(struct ionic_log_event) == 64); + +/* I/O */ +static_assert(sizeof(struct ionic_txq_desc) == 16); +static_assert(sizeof(struct ionic_txq_sg_desc) == 128); +static_assert(sizeof(struct ionic_txq_comp) == 16); + +static_assert(sizeof(struct ionic_rxq_desc) == 16); +static_assert(sizeof(struct ionic_rxq_sg_desc) == 128); +static_assert(sizeof(struct ionic_rxq_comp) == 16); + +struct ionic_devinfo { + u8 asic_type; + u8 asic_rev; + char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN + 1]; + char serial_num[IONIC_DEVINFO_SERIAL_BUFLEN + 1]; +}; + +struct ionic_dev { + union ionic_dev_info_regs __iomem *dev_info_regs; + union ionic_dev_cmd_regs __iomem *dev_cmd_regs; + + u64 __iomem *db_pages; + dma_addr_t phy_db_pages; + + struct ionic_intr __iomem *intr_ctrl; + u64 __iomem *intr_status; + + u32 port_info_sz; + struct ionic_port_info *port_info; + dma_addr_t port_info_pa; + + struct ionic_devinfo dev_info; +}; + +struct ionic_cq_info { + void *cq_desc; + struct ionic_cq_info *next; + unsigned int index; + bool last; +}; + +struct ionic_queue; +struct ionic_qcq; +struct ionic_desc_info; + +typedef void (*ionic_desc_cb)(struct ionic_queue *q, + struct ionic_desc_info *desc_info, + struct ionic_cq_info *cq_info, void *cb_arg); + +struct ionic_desc_info { + void *desc; + void *sg_desc; + struct ionic_desc_info *next; + unsigned int index; + unsigned int left; + ionic_desc_cb cb; + void *cb_arg; +}; + +#define QUEUE_NAME_MAX_SZ 32 + +struct ionic_queue { + u64 dbell_count; + u64 drop; + u64 stop; + u64 wake; + struct ionic_lif *lif; + struct ionic_desc_info *info; + struct ionic_desc_info *tail; + struct ionic_desc_info *head; + struct ionic_dev *idev; + unsigned int index; + unsigned int type; + unsigned int hw_index; + unsigned int hw_type; + u64 dbval; + void *base; + void *sg_base; + dma_addr_t base_pa; + dma_addr_t sg_base_pa; + unsigned int num_descs; + unsigned int desc_size; + unsigned int sg_desc_size; + unsigned int pid; + char name[QUEUE_NAME_MAX_SZ]; +}; + +#define INTR_INDEX_NOT_ASSIGNED -1 +#define INTR_NAME_MAX_SZ 32 + +struct ionic_intr_info { + char name[INTR_NAME_MAX_SZ]; + unsigned int index; + unsigned int vector; + u64 rearm_count; + unsigned int cpu; + cpumask_t affinity_mask; +}; + +struct ionic_cq { + void *base; + dma_addr_t base_pa; + struct ionic_lif *lif; + struct ionic_cq_info *info; + struct ionic_cq_info *tail; + struct ionic_queue *bound_q; + struct ionic_intr_info *bound_intr; + bool done_color; + unsigned int num_descs; + u64 compl_count; + unsigned int desc_size; +}; + +struct ionic; + +static inline void ionic_intr_init(struct ionic_dev *idev, + struct ionic_intr_info *intr, + unsigned long index) +{ + ionic_intr_clean(idev->intr_ctrl, index); + intr->index = index; +} + +static inline unsigned int ionic_q_space_avail(struct ionic_queue *q) +{ + unsigned int avail = q->tail->index; + + if (q->head->index >= avail) + avail += q->head->left - 1; + else + avail -= q->head->index + 1; + + return avail; +} + +static inline bool ionic_q_has_space(struct ionic_queue *q, unsigned int want) +{ + return ionic_q_space_avail(q) >= want; +} + +void ionic_init_devinfo(struct ionic *ionic); +int ionic_dev_setup(struct ionic *ionic); +void ionic_dev_teardown(struct ionic *ionic); + +void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd); +u8 ionic_dev_cmd_status(struct ionic_dev *idev); +bool ionic_dev_cmd_done(struct ionic_dev *idev); +void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp); + +void ionic_dev_cmd_identify(struct ionic_dev *idev, u8 ver); +void ionic_dev_cmd_init(struct ionic_dev *idev); +void ionic_dev_cmd_reset(struct ionic_dev *idev); + +void ionic_dev_cmd_port_identify(struct ionic_dev *idev); +void ionic_dev_cmd_port_init(struct ionic_dev *idev); +void ionic_dev_cmd_port_reset(struct ionic_dev *idev); +void ionic_dev_cmd_port_state(struct ionic_dev *idev, u8 state); +void ionic_dev_cmd_port_speed(struct ionic_dev *idev, u32 speed); +void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable); +void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type); +void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type); + +void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver); +void ionic_dev_cmd_lif_init(struct ionic_dev *idev, u16 lif_index, + dma_addr_t addr); +void ionic_dev_cmd_lif_reset(struct ionic_dev *idev, u16 lif_index); +void ionic_dev_cmd_adminq_init(struct ionic_dev *idev, struct ionic_qcq *qcq, + u16 lif_index, u16 intr_index); + +int ionic_db_page_num(struct ionic_lif *lif, int pid); + +int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq, + struct ionic_intr_info *intr, + unsigned int num_descs, size_t desc_size); +void ionic_cq_map(struct ionic_cq *cq, void *base, dma_addr_t base_pa); +void ionic_cq_bind(struct ionic_cq *cq, struct ionic_queue *q); +typedef bool (*ionic_cq_cb)(struct ionic_cq *cq, struct ionic_cq_info *cq_info); +typedef void (*ionic_cq_done_cb)(void *done_arg); +unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do, + ionic_cq_cb cb, ionic_cq_done_cb done_cb, + void *done_arg); + +int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev, + struct ionic_queue *q, unsigned int index, const char *name, + unsigned int num_descs, size_t desc_size, + size_t sg_desc_size, unsigned int pid); +void ionic_q_map(struct ionic_queue *q, void *base, dma_addr_t base_pa); +void ionic_q_sg_map(struct ionic_queue *q, void *base, dma_addr_t base_pa); +void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb, + void *cb_arg); +void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start); +void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info, + unsigned int stop_index); + +#endif /* _IONIC_DEV_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c new file mode 100644 index 000000000000..af1647afa4e8 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#include <linux/module.h> +#include <linux/netdevice.h> + +#include "ionic.h" +#include "ionic_bus.h" +#include "ionic_lif.h" +#include "ionic_devlink.h" + +static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct ionic *ionic = devlink_priv(dl); + struct ionic_dev *idev = &ionic->idev; + char buf[16]; + int err = 0; + + err = devlink_info_driver_name_put(req, IONIC_DRV_NAME); + if (err) + goto info_out; + + err = devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + idev->dev_info.fw_version); + if (err) + goto info_out; + + snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_type); + err = devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, + buf); + if (err) + goto info_out; + + snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_rev); + err = devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_ASIC_REV, + buf); + if (err) + goto info_out; + + err = devlink_info_serial_number_put(req, idev->dev_info.serial_num); + +info_out: + return err; +} + +static const struct devlink_ops ionic_dl_ops = { + .info_get = ionic_dl_info_get, +}; + +struct ionic *ionic_devlink_alloc(struct device *dev) +{ + struct devlink *dl; + + dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic)); + + return devlink_priv(dl); +} + +void ionic_devlink_free(struct ionic *ionic) +{ + struct devlink *dl = priv_to_devlink(ionic); + + devlink_free(dl); +} + +int ionic_devlink_register(struct ionic *ionic) +{ + struct devlink *dl = priv_to_devlink(ionic); + int err; + + err = devlink_register(dl, ionic->dev); + if (err) { + dev_warn(ionic->dev, "devlink_register failed: %d\n", err); + return err; + } + + devlink_port_attrs_set(&ionic->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, + 0, false, 0, NULL, 0); + err = devlink_port_register(dl, &ionic->dl_port, 0); + if (err) + dev_err(ionic->dev, "devlink_port_register failed: %d\n", err); + else + devlink_port_type_eth_set(&ionic->dl_port, + ionic->master_lif->netdev); + + return err; +} + +void ionic_devlink_unregister(struct ionic *ionic) +{ + struct devlink *dl = priv_to_devlink(ionic); + + devlink_port_unregister(&ionic->dl_port); + devlink_unregister(dl); +} diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.h b/drivers/net/ethernet/pensando/ionic/ionic_devlink.h new file mode 100644 index 000000000000..0690172fc57a --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#ifndef _IONIC_DEVLINK_H_ +#define _IONIC_DEVLINK_H_ + +#include <net/devlink.h> + +struct ionic *ionic_devlink_alloc(struct device *dev); +void ionic_devlink_free(struct ionic *ionic); +int ionic_devlink_register(struct ionic *ionic); +void ionic_devlink_unregister(struct ionic *ionic); + +#endif /* _IONIC_DEVLINK_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c new file mode 100644 index 000000000000..7d10265f782a --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -0,0 +1,779 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#include <linux/module.h> +#include <linux/netdevice.h> + +#include "ionic.h" +#include "ionic_bus.h" +#include "ionic_lif.h" +#include "ionic_ethtool.h" +#include "ionic_stats.h" + +static const char ionic_priv_flags_strings[][ETH_GSTRING_LEN] = { +#define PRIV_F_SW_DBG_STATS BIT(0) + "sw-dbg-stats", +}; +#define PRIV_FLAGS_COUNT ARRAY_SIZE(ionic_priv_flags_strings) + +static void ionic_get_stats_strings(struct ionic_lif *lif, u8 *buf) +{ + u32 i; + + for (i = 0; i < ionic_num_stats_grps; i++) + ionic_stats_groups[i].get_strings(lif, &buf); +} + +static void ionic_get_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *buf) +{ + struct ionic_lif *lif; + u32 i; + + lif = netdev_priv(netdev); + + memset(buf, 0, stats->n_stats * sizeof(*buf)); + for (i = 0; i < ionic_num_stats_grps; i++) + ionic_stats_groups[i].get_values(lif, &buf); +} + +static int ionic_get_stats_count(struct ionic_lif *lif) +{ + int i, num_stats = 0; + + for (i = 0; i < ionic_num_stats_grps; i++) + num_stats += ionic_stats_groups[i].get_count(lif); + + return num_stats; +} + +static int ionic_get_sset_count(struct net_device *netdev, int sset) +{ + struct ionic_lif *lif = netdev_priv(netdev); + int count = 0; + + switch (sset) { + case ETH_SS_STATS: + count = ionic_get_stats_count(lif); + break; + case ETH_SS_PRIV_FLAGS: + count = PRIV_FLAGS_COUNT; + break; + } + return count; +} + +static void ionic_get_strings(struct net_device *netdev, + u32 sset, u8 *buf) +{ + struct ionic_lif *lif = netdev_priv(netdev); + + switch (sset) { + case ETH_SS_STATS: + ionic_get_stats_strings(lif, buf); + break; + case ETH_SS_PRIV_FLAGS: + memcpy(buf, ionic_priv_flags_strings, + PRIV_FLAGS_COUNT * ETH_GSTRING_LEN); + break; + } +} + +static void ionic_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + + strlcpy(drvinfo->driver, IONIC_DRV_NAME, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, IONIC_DRV_VERSION, sizeof(drvinfo->version)); + strlcpy(drvinfo->fw_version, ionic->idev.dev_info.fw_version, + sizeof(drvinfo->fw_version)); + strlcpy(drvinfo->bus_info, ionic_bus_info(ionic), + sizeof(drvinfo->bus_info)); +} + +static int ionic_get_regs_len(struct net_device *netdev) +{ + return (IONIC_DEV_INFO_REG_COUNT + IONIC_DEV_CMD_REG_COUNT) * sizeof(u32); +} + +static void ionic_get_regs(struct net_device *netdev, struct ethtool_regs *regs, + void *p) +{ + struct ionic_lif *lif = netdev_priv(netdev); + unsigned int size; + + regs->version = IONIC_DEV_CMD_REG_VERSION; + + size = IONIC_DEV_INFO_REG_COUNT * sizeof(u32); + memcpy_fromio(p, lif->ionic->idev.dev_info_regs->words, size); + + size = IONIC_DEV_CMD_REG_COUNT * sizeof(u32); + memcpy_fromio(p, lif->ionic->idev.dev_cmd_regs->words, size); +} + +static int ionic_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *ks) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_dev *idev = &lif->ionic->idev; + int copper_seen = 0; + + ethtool_link_ksettings_zero_link_mode(ks, supported); + + /* The port_info data is found in a DMA space that the NIC keeps + * up-to-date, so there's no need to request the data from the + * NIC, we already have it in our memory space. + */ + + switch (le16_to_cpu(idev->port_info->status.xcvr.pid)) { + /* Copper */ + case IONIC_XCVR_PID_QSFP_100G_CR4: + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseCR4_Full); + copper_seen++; + break; + case IONIC_XCVR_PID_QSFP_40GBASE_CR4: + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseCR4_Full); + copper_seen++; + break; + case IONIC_XCVR_PID_SFP_25GBASE_CR_S: + case IONIC_XCVR_PID_SFP_25GBASE_CR_L: + case IONIC_XCVR_PID_SFP_25GBASE_CR_N: + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseCR_Full); + copper_seen++; + break; + case IONIC_XCVR_PID_SFP_10GBASE_AOC: + case IONIC_XCVR_PID_SFP_10GBASE_CU: + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseCR_Full); + copper_seen++; + break; + + /* Fibre */ + case IONIC_XCVR_PID_QSFP_100G_SR4: + case IONIC_XCVR_PID_QSFP_100G_AOC: + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseSR4_Full); + break; + case IONIC_XCVR_PID_QSFP_100G_LR4: + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseLR4_ER4_Full); + break; + case IONIC_XCVR_PID_QSFP_100G_ER4: + ethtool_link_ksettings_add_link_mode(ks, supported, + 100000baseLR4_ER4_Full); + break; + case IONIC_XCVR_PID_QSFP_40GBASE_SR4: + case IONIC_XCVR_PID_QSFP_40GBASE_AOC: + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseSR4_Full); + break; + case IONIC_XCVR_PID_QSFP_40GBASE_LR4: + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseLR4_Full); + break; + case IONIC_XCVR_PID_SFP_25GBASE_SR: + case IONIC_XCVR_PID_SFP_25GBASE_AOC: + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseSR_Full); + break; + case IONIC_XCVR_PID_SFP_10GBASE_SR: + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseSR_Full); + break; + case IONIC_XCVR_PID_SFP_10GBASE_LR: + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseLR_Full); + break; + case IONIC_XCVR_PID_SFP_10GBASE_LRM: + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseLRM_Full); + break; + case IONIC_XCVR_PID_SFP_10GBASE_ER: + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseER_Full); + break; + case IONIC_XCVR_PID_UNKNOWN: + /* This means there's no module plugged in */ + break; + default: + dev_info(lif->ionic->dev, "unknown xcvr type pid=%d / 0x%x\n", + idev->port_info->status.xcvr.pid, + idev->port_info->status.xcvr.pid); + break; + } + + bitmap_copy(ks->link_modes.advertising, ks->link_modes.supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); + + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); + if (idev->port_info->config.fec_type == IONIC_PORT_FEC_TYPE_FC) + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_BASER); + else if (idev->port_info->config.fec_type == IONIC_PORT_FEC_TYPE_RS) + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); + + ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(ks, supported, Pause); + + if (idev->port_info->status.xcvr.phy == IONIC_PHY_TYPE_COPPER || + copper_seen) + ks->base.port = PORT_DA; + else if (idev->port_info->status.xcvr.phy == IONIC_PHY_TYPE_FIBER) + ks->base.port = PORT_FIBRE; + else + ks->base.port = PORT_NONE; + + if (ks->base.port != PORT_NONE) { + ks->base.speed = le32_to_cpu(lif->info->status.link_speed); + + if (le16_to_cpu(lif->info->status.link_status)) + ks->base.duplex = DUPLEX_FULL; + else + ks->base.duplex = DUPLEX_UNKNOWN; + + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + + if (idev->port_info->config.an_enable) { + ethtool_link_ksettings_add_link_mode(ks, advertising, + Autoneg); + ks->base.autoneg = AUTONEG_ENABLE; + } + } + + return 0; +} + +static int ionic_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *ks) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + struct ionic_dev *idev; + u32 req_rs, req_fc; + u8 fec_type; + int err = 0; + + idev = &lif->ionic->idev; + fec_type = IONIC_PORT_FEC_TYPE_NONE; + + /* set autoneg */ + if (ks->base.autoneg != idev->port_info->config.an_enable) { + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_port_autoneg(idev, ks->base.autoneg); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + if (err) + return err; + } + + /* set speed */ + if (ks->base.speed != le32_to_cpu(idev->port_info->config.speed)) { + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_port_speed(idev, ks->base.speed); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + if (err) + return err; + } + + /* set FEC */ + req_rs = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_RS); + req_fc = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_BASER); + if (req_rs && req_fc) { + netdev_info(netdev, "Only select one FEC mode at a time\n"); + return -EINVAL; + } else if (req_fc) { + fec_type = IONIC_PORT_FEC_TYPE_FC; + } else if (req_rs) { + fec_type = IONIC_PORT_FEC_TYPE_RS; + } else if (!(req_rs | req_fc)) { + fec_type = IONIC_PORT_FEC_TYPE_NONE; + } + + if (fec_type != idev->port_info->config.fec_type) { + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_port_fec(idev, fec_type); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + if (err) + return err; + } + + return 0; +} + +static void ionic_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct ionic_lif *lif = netdev_priv(netdev); + u8 pause_type; + + pause->autoneg = 0; + + pause_type = lif->ionic->idev.port_info->config.pause_type; + if (pause_type) { + pause->rx_pause = pause_type & IONIC_PAUSE_F_RX ? 1 : 0; + pause->tx_pause = pause_type & IONIC_PAUSE_F_TX ? 1 : 0; + } +} + +static int ionic_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + u32 requested_pause; + int err; + + if (pause->autoneg) + return -EOPNOTSUPP; + + /* change both at the same time */ + requested_pause = IONIC_PORT_PAUSE_TYPE_LINK; + if (pause->rx_pause) + requested_pause |= IONIC_PAUSE_F_RX; + if (pause->tx_pause) + requested_pause |= IONIC_PAUSE_F_TX; + + if (requested_pause == lif->ionic->idev.port_info->config.pause_type) + return 0; + + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_port_pause(&lif->ionic->idev, requested_pause); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + if (err) + return err; + + return 0; +} + +static int ionic_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coalesce) +{ + struct ionic_lif *lif = netdev_priv(netdev); + + /* Tx uses Rx interrupt */ + coalesce->tx_coalesce_usecs = lif->rx_coalesce_usecs; + coalesce->rx_coalesce_usecs = lif->rx_coalesce_usecs; + + return 0; +} + +static int ionic_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coalesce) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_identity *ident; + struct ionic_qcq *qcq; + unsigned int i; + u32 usecs; + u32 coal; + + if (coalesce->rx_max_coalesced_frames || + coalesce->rx_coalesce_usecs_irq || + coalesce->rx_max_coalesced_frames_irq || + coalesce->tx_max_coalesced_frames || + coalesce->tx_coalesce_usecs_irq || + coalesce->tx_max_coalesced_frames_irq || + coalesce->stats_block_coalesce_usecs || + coalesce->use_adaptive_rx_coalesce || + coalesce->use_adaptive_tx_coalesce || + coalesce->pkt_rate_low || + coalesce->rx_coalesce_usecs_low || + coalesce->rx_max_coalesced_frames_low || + coalesce->tx_coalesce_usecs_low || + coalesce->tx_max_coalesced_frames_low || + coalesce->pkt_rate_high || + coalesce->rx_coalesce_usecs_high || + coalesce->rx_max_coalesced_frames_high || + coalesce->tx_coalesce_usecs_high || + coalesce->tx_max_coalesced_frames_high || + coalesce->rate_sample_interval) + return -EINVAL; + + ident = &lif->ionic->ident; + if (ident->dev.intr_coal_div == 0) { + netdev_warn(netdev, "bad HW value in dev.intr_coal_div = %d\n", + ident->dev.intr_coal_div); + return -EIO; + } + + /* Tx uses Rx interrupt, so only change Rx */ + if (coalesce->tx_coalesce_usecs != lif->rx_coalesce_usecs) { + netdev_warn(netdev, "only the rx-usecs can be changed\n"); + return -EINVAL; + } + + coal = ionic_coal_usec_to_hw(lif->ionic, coalesce->rx_coalesce_usecs); + + if (coal > IONIC_INTR_CTRL_COAL_MAX) + return -ERANGE; + + /* If they asked for non-zero and it resolved to zero, bump it up */ + if (!coal && coalesce->rx_coalesce_usecs) + coal = 1; + + /* Convert it back to get device resolution */ + usecs = ionic_coal_hw_to_usec(lif->ionic, coal); + + if (usecs != lif->rx_coalesce_usecs) { + lif->rx_coalesce_usecs = usecs; + + if (test_bit(IONIC_LIF_UP, lif->state)) { + for (i = 0; i < lif->nxqs; i++) { + qcq = lif->rxqcqs[i].qcq; + ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, + qcq->intr.index, coal); + } + } + } + + return 0; +} + +static void ionic_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ionic_lif *lif = netdev_priv(netdev); + + ring->tx_max_pending = IONIC_MAX_TXRX_DESC; + ring->tx_pending = lif->ntxq_descs; + ring->rx_max_pending = IONIC_MAX_TXRX_DESC; + ring->rx_pending = lif->nrxq_descs; +} + +static int ionic_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ionic_lif *lif = netdev_priv(netdev); + bool running; + + if (ring->rx_mini_pending || ring->rx_jumbo_pending) { + netdev_info(netdev, "Changing jumbo or mini descriptors not supported\n"); + return -EINVAL; + } + + if (!is_power_of_2(ring->tx_pending) || + !is_power_of_2(ring->rx_pending)) { + netdev_info(netdev, "Descriptor count must be a power of 2\n"); + return -EINVAL; + } + + /* if nothing to do return success */ + if (ring->tx_pending == lif->ntxq_descs && + ring->rx_pending == lif->nrxq_descs) + return 0; + + if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET)) + return -EBUSY; + + running = test_bit(IONIC_LIF_UP, lif->state); + if (running) + ionic_stop(netdev); + + lif->ntxq_descs = ring->tx_pending; + lif->nrxq_descs = ring->rx_pending; + + if (running) + ionic_open(netdev); + clear_bit(IONIC_LIF_QUEUE_RESET, lif->state); + + return 0; +} + +static void ionic_get_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct ionic_lif *lif = netdev_priv(netdev); + + /* report maximum channels */ + ch->max_combined = lif->ionic->ntxqs_per_lif; + + /* report current channels */ + ch->combined_count = lif->nxqs; +} + +static int ionic_set_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct ionic_lif *lif = netdev_priv(netdev); + bool running; + + if (!ch->combined_count || ch->other_count || + ch->rx_count || ch->tx_count) + return -EINVAL; + + if (ch->combined_count == lif->nxqs) + return 0; + + if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET)) + return -EBUSY; + + running = test_bit(IONIC_LIF_UP, lif->state); + if (running) + ionic_stop(netdev); + + lif->nxqs = ch->combined_count; + + if (running) + ionic_open(netdev); + clear_bit(IONIC_LIF_QUEUE_RESET, lif->state); + + return 0; +} + +static u32 ionic_get_priv_flags(struct net_device *netdev) +{ + struct ionic_lif *lif = netdev_priv(netdev); + u32 priv_flags = 0; + + if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) + priv_flags |= PRIV_F_SW_DBG_STATS; + + return priv_flags; +} + +static int ionic_set_priv_flags(struct net_device *netdev, u32 priv_flags) +{ + struct ionic_lif *lif = netdev_priv(netdev); + u32 flags = lif->flags; + + clear_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state); + if (priv_flags & PRIV_F_SW_DBG_STATS) + set_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state); + + if (flags != lif->flags) + lif->flags = flags; + + return 0; +} + +static int ionic_get_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *info, u32 *rules) +{ + struct ionic_lif *lif = netdev_priv(netdev); + int err = 0; + + switch (info->cmd) { + case ETHTOOL_GRXRINGS: + info->data = lif->nxqs; + break; + default: + netdev_err(netdev, "Command parameter %d is not supported\n", + info->cmd); + err = -EOPNOTSUPP; + } + + return err; +} + +static u32 ionic_get_rxfh_indir_size(struct net_device *netdev) +{ + struct ionic_lif *lif = netdev_priv(netdev); + + return le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz); +} + +static u32 ionic_get_rxfh_key_size(struct net_device *netdev) +{ + return IONIC_RSS_HASH_KEY_SIZE; +} + +static int ionic_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct ionic_lif *lif = netdev_priv(netdev); + unsigned int i, tbl_sz; + + if (indir) { + tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz); + for (i = 0; i < tbl_sz; i++) + indir[i] = lif->rss_ind_tbl[i]; + } + + if (key) + memcpy(key, lif->rss_hash_key, IONIC_RSS_HASH_KEY_SIZE); + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + return 0; +} + +static int ionic_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct ionic_lif *lif = netdev_priv(netdev); + int err; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + + err = ionic_lif_rss_config(lif, lif->rss_types, key, indir); + if (err) + return err; + + return 0; +} + +static int ionic_set_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct ionic_lif *lif = netdev_priv(dev); + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + lif->rx_copybreak = *(u32 *)data; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int ionic_get_tunable(struct net_device *netdev, + const struct ethtool_tunable *tuna, void *data) +{ + struct ionic_lif *lif = netdev_priv(netdev); + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + *(u32 *)data = lif->rx_copybreak; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int ionic_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) + +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_dev *idev = &lif->ionic->idev; + struct ionic_xcvr_status *xcvr; + + xcvr = &idev->port_info->status.xcvr; + + /* report the module data type and length */ + switch (xcvr->sprom[0]) { + case 0x03: /* SFP */ + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + break; + case 0x0D: /* QSFP */ + case 0x11: /* QSFP28 */ + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + default: + netdev_info(netdev, "unknown xcvr type 0x%02x\n", + xcvr->sprom[0]); + break; + } + + return 0; +} + +static int ionic_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_dev *idev = &lif->ionic->idev; + struct ionic_xcvr_status *xcvr; + char tbuf[sizeof(xcvr->sprom)]; + int count = 10; + u32 len; + + /* The NIC keeps the module prom up-to-date in the DMA space + * so we can simply copy the module bytes into the data buffer. + */ + xcvr = &idev->port_info->status.xcvr; + len = min_t(u32, sizeof(xcvr->sprom), ee->len); + + do { + memcpy(data, xcvr->sprom, len); + memcpy(tbuf, xcvr->sprom, len); + + /* Let's make sure we got a consistent copy */ + if (!memcmp(data, tbuf, len)) + break; + + } while (--count); + + if (!count) + return -ETIMEDOUT; + + return 0; +} + +static int ionic_nway_reset(struct net_device *netdev) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + int err = 0; + + /* flap the link to force auto-negotiation */ + + mutex_lock(&ionic->dev_cmd_lock); + + ionic_dev_cmd_port_state(&ionic->idev, IONIC_PORT_ADMIN_STATE_DOWN); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + + if (!err) { + ionic_dev_cmd_port_state(&ionic->idev, IONIC_PORT_ADMIN_STATE_UP); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + } + + mutex_unlock(&ionic->dev_cmd_lock); + + return err; +} + +static const struct ethtool_ops ionic_ethtool_ops = { + .get_drvinfo = ionic_get_drvinfo, + .get_regs_len = ionic_get_regs_len, + .get_regs = ionic_get_regs, + .get_link = ethtool_op_get_link, + .get_link_ksettings = ionic_get_link_ksettings, + .get_coalesce = ionic_get_coalesce, + .set_coalesce = ionic_set_coalesce, + .get_ringparam = ionic_get_ringparam, + .set_ringparam = ionic_set_ringparam, + .get_channels = ionic_get_channels, + .set_channels = ionic_set_channels, + .get_strings = ionic_get_strings, + .get_ethtool_stats = ionic_get_stats, + .get_sset_count = ionic_get_sset_count, + .get_priv_flags = ionic_get_priv_flags, + .set_priv_flags = ionic_set_priv_flags, + .get_rxnfc = ionic_get_rxnfc, + .get_rxfh_indir_size = ionic_get_rxfh_indir_size, + .get_rxfh_key_size = ionic_get_rxfh_key_size, + .get_rxfh = ionic_get_rxfh, + .set_rxfh = ionic_set_rxfh, + .get_tunable = ionic_get_tunable, + .set_tunable = ionic_set_tunable, + .get_module_info = ionic_get_module_info, + .get_module_eeprom = ionic_get_module_eeprom, + .get_pauseparam = ionic_get_pauseparam, + .set_pauseparam = ionic_set_pauseparam, + .set_link_ksettings = ionic_set_link_ksettings, + .nway_reset = ionic_nway_reset, +}; + +void ionic_ethtool_set_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &ionic_ethtool_ops; +} diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.h b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.h new file mode 100644 index 000000000000..38b91b1d70ae --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#ifndef _IONIC_ETHTOOL_H_ +#define _IONIC_ETHTOOL_H_ + +void ionic_ethtool_set_ops(struct net_device *netdev); + +#endif /* _IONIC_ETHTOOL_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h new file mode 100644 index 000000000000..5bfdda19f64d --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -0,0 +1,2482 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB OR BSD-2-Clause */ +/* Copyright (c) 2017-2019 Pensando Systems, Inc. All rights reserved. */ + +#ifndef _IONIC_IF_H_ +#define _IONIC_IF_H_ + +#pragma pack(push, 1) + +#define IONIC_DEV_INFO_SIGNATURE 0x44455649 /* 'DEVI' */ +#define IONIC_DEV_INFO_VERSION 1 +#define IONIC_IFNAMSIZ 16 + +/** + * Commands + */ +enum ionic_cmd_opcode { + IONIC_CMD_NOP = 0, + + /* Device commands */ + IONIC_CMD_IDENTIFY = 1, + IONIC_CMD_INIT = 2, + IONIC_CMD_RESET = 3, + IONIC_CMD_GETATTR = 4, + IONIC_CMD_SETATTR = 5, + + /* Port commands */ + IONIC_CMD_PORT_IDENTIFY = 10, + IONIC_CMD_PORT_INIT = 11, + IONIC_CMD_PORT_RESET = 12, + IONIC_CMD_PORT_GETATTR = 13, + IONIC_CMD_PORT_SETATTR = 14, + + /* LIF commands */ + IONIC_CMD_LIF_IDENTIFY = 20, + IONIC_CMD_LIF_INIT = 21, + IONIC_CMD_LIF_RESET = 22, + IONIC_CMD_LIF_GETATTR = 23, + IONIC_CMD_LIF_SETATTR = 24, + + IONIC_CMD_RX_MODE_SET = 30, + IONIC_CMD_RX_FILTER_ADD = 31, + IONIC_CMD_RX_FILTER_DEL = 32, + + /* Queue commands */ + IONIC_CMD_Q_INIT = 40, + IONIC_CMD_Q_CONTROL = 41, + + /* RDMA commands */ + IONIC_CMD_RDMA_RESET_LIF = 50, + IONIC_CMD_RDMA_CREATE_EQ = 51, + IONIC_CMD_RDMA_CREATE_CQ = 52, + IONIC_CMD_RDMA_CREATE_ADMINQ = 53, + + /* QoS commands */ + IONIC_CMD_QOS_CLASS_IDENTIFY = 240, + IONIC_CMD_QOS_CLASS_INIT = 241, + IONIC_CMD_QOS_CLASS_RESET = 242, + + /* Firmware commands */ + IONIC_CMD_FW_DOWNLOAD = 254, + IONIC_CMD_FW_CONTROL = 255, +}; + +/** + * Command Return codes + */ +enum ionic_status_code { + IONIC_RC_SUCCESS = 0, /* Success */ + IONIC_RC_EVERSION = 1, /* Incorrect version for request */ + IONIC_RC_EOPCODE = 2, /* Invalid cmd opcode */ + IONIC_RC_EIO = 3, /* I/O error */ + IONIC_RC_EPERM = 4, /* Permission denied */ + IONIC_RC_EQID = 5, /* Bad qid */ + IONIC_RC_EQTYPE = 6, /* Bad qtype */ + IONIC_RC_ENOENT = 7, /* No such element */ + IONIC_RC_EINTR = 8, /* operation interrupted */ + IONIC_RC_EAGAIN = 9, /* Try again */ + IONIC_RC_ENOMEM = 10, /* Out of memory */ + IONIC_RC_EFAULT = 11, /* Bad address */ + IONIC_RC_EBUSY = 12, /* Device or resource busy */ + IONIC_RC_EEXIST = 13, /* object already exists */ + IONIC_RC_EINVAL = 14, /* Invalid argument */ + IONIC_RC_ENOSPC = 15, /* No space left or alloc failure */ + IONIC_RC_ERANGE = 16, /* Parameter out of range */ + IONIC_RC_BAD_ADDR = 17, /* Descriptor contains a bad ptr */ + IONIC_RC_DEV_CMD = 18, /* Device cmd attempted on AdminQ */ + IONIC_RC_ENOSUPP = 19, /* Operation not supported */ + IONIC_RC_ERROR = 29, /* Generic error */ + + IONIC_RC_ERDMA = 30, /* Generic RDMA error */ +}; + +enum ionic_notifyq_opcode { + IONIC_EVENT_LINK_CHANGE = 1, + IONIC_EVENT_RESET = 2, + IONIC_EVENT_HEARTBEAT = 3, + IONIC_EVENT_LOG = 4, +}; + +/** + * struct cmd - General admin command format + * @opcode: Opcode for the command + * @lif_index: LIF index + * @cmd_data: Opcode-specific command bytes + */ +struct ionic_admin_cmd { + u8 opcode; + u8 rsvd; + __le16 lif_index; + u8 cmd_data[60]; +}; + +/** + * struct admin_comp - General admin command completion format + * @status: The status of the command (enum status_code) + * @comp_index: The index in the descriptor ring for which this + * is the completion. + * @cmd_data: Command-specific bytes. + * @color: Color bit. (Always 0 for commands issued to the + * Device Cmd Registers.) + */ +struct ionic_admin_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 cmd_data[11]; + u8 color; +#define IONIC_COMP_COLOR_MASK 0x80 +}; + +static inline u8 color_match(u8 color, u8 done_color) +{ + return (!!(color & IONIC_COMP_COLOR_MASK)) == done_color; +} + +/** + * struct nop_cmd - NOP command + * @opcode: opcode + */ +struct ionic_nop_cmd { + u8 opcode; + u8 rsvd[63]; +}; + +/** + * struct nop_comp - NOP command completion + * @status: The status of the command (enum status_code) + */ +struct ionic_nop_comp { + u8 status; + u8 rsvd[15]; +}; + +/** + * struct dev_init_cmd - Device init command + * @opcode: opcode + * @type: device type + */ +struct ionic_dev_init_cmd { + u8 opcode; + u8 type; + u8 rsvd[62]; +}; + +/** + * struct init_comp - Device init command completion + * @status: The status of the command (enum status_code) + */ +struct ionic_dev_init_comp { + u8 status; + u8 rsvd[15]; +}; + +/** + * struct dev_reset_cmd - Device reset command + * @opcode: opcode + */ +struct ionic_dev_reset_cmd { + u8 opcode; + u8 rsvd[63]; +}; + +/** + * struct reset_comp - Reset command completion + * @status: The status of the command (enum status_code) + */ +struct ionic_dev_reset_comp { + u8 status; + u8 rsvd[15]; +}; + +#define IONIC_IDENTITY_VERSION_1 1 + +/** + * struct dev_identify_cmd - Driver/device identify command + * @opcode: opcode + * @ver: Highest version of identify supported by driver + */ +struct ionic_dev_identify_cmd { + u8 opcode; + u8 ver; + u8 rsvd[62]; +}; + +/** + * struct dev_identify_comp - Driver/device identify command completion + * @status: The status of the command (enum status_code) + * @ver: Version of identify returned by device + */ +struct ionic_dev_identify_comp { + u8 status; + u8 ver; + u8 rsvd[14]; +}; + +enum ionic_os_type { + IONIC_OS_TYPE_LINUX = 1, + IONIC_OS_TYPE_WIN = 2, + IONIC_OS_TYPE_DPDK = 3, + IONIC_OS_TYPE_FREEBSD = 4, + IONIC_OS_TYPE_IPXE = 5, + IONIC_OS_TYPE_ESXI = 6, +}; + +/** + * union drv_identity - driver identity information + * @os_type: OS type (see enum os_type) + * @os_dist: OS distribution, numeric format + * @os_dist_str: OS distribution, string format + * @kernel_ver: Kernel version, numeric format + * @kernel_ver_str: Kernel version, string format + * @driver_ver_str: Driver version, string format + */ +union ionic_drv_identity { + struct { + __le32 os_type; + __le32 os_dist; + char os_dist_str[128]; + __le32 kernel_ver; + char kernel_ver_str[32]; + char driver_ver_str[32]; + }; + __le32 words[512]; +}; + +/** + * union dev_identity - device identity information + * @version: Version of device identify + * @type: Identify type (0 for now) + * @nports: Number of ports provisioned + * @nlifs: Number of LIFs provisioned + * @nintrs: Number of interrupts provisioned + * @ndbpgs_per_lif: Number of doorbell pages per LIF + * @intr_coal_mult: Interrupt coalescing multiplication factor. + * Scale user-supplied interrupt coalescing + * value in usecs to device units using: + * device units = usecs * mult / div + * @intr_coal_div: Interrupt coalescing division factor. + * Scale user-supplied interrupt coalescing + * value in usecs to device units using: + * device units = usecs * mult / div + * + */ +union ionic_dev_identity { + struct { + u8 version; + u8 type; + u8 rsvd[2]; + u8 nports; + u8 rsvd2[3]; + __le32 nlifs; + __le32 nintrs; + __le32 ndbpgs_per_lif; + __le32 intr_coal_mult; + __le32 intr_coal_div; + }; + __le32 words[512]; +}; + +enum ionic_lif_type { + IONIC_LIF_TYPE_CLASSIC = 0, + IONIC_LIF_TYPE_MACVLAN = 1, + IONIC_LIF_TYPE_NETQUEUE = 2, +}; + +/** + * struct lif_identify_cmd - lif identify command + * @opcode: opcode + * @type: lif type (enum lif_type) + * @ver: version of identify returned by device + */ +struct ionic_lif_identify_cmd { + u8 opcode; + u8 type; + u8 ver; + u8 rsvd[61]; +}; + +/** + * struct lif_identify_comp - lif identify command completion + * @status: status of the command (enum status_code) + * @ver: version of identify returned by device + */ +struct ionic_lif_identify_comp { + u8 status; + u8 ver; + u8 rsvd2[14]; +}; + +enum ionic_lif_capability { + IONIC_LIF_CAP_ETH = BIT(0), + IONIC_LIF_CAP_RDMA = BIT(1), +}; + +/** + * Logical Queue Types + */ +enum ionic_logical_qtype { + IONIC_QTYPE_ADMINQ = 0, + IONIC_QTYPE_NOTIFYQ = 1, + IONIC_QTYPE_RXQ = 2, + IONIC_QTYPE_TXQ = 3, + IONIC_QTYPE_EQ = 4, + IONIC_QTYPE_MAX = 16, +}; + +/** + * struct lif_logical_qtype - Descriptor of logical to hardware queue type. + * @qtype: Hardware Queue Type. + * @qid_count: Number of Queue IDs of the logical type. + * @qid_base: Minimum Queue ID of the logical type. + */ +struct ionic_lif_logical_qtype { + u8 qtype; + u8 rsvd[3]; + __le32 qid_count; + __le32 qid_base; +}; + +enum ionic_lif_state { + IONIC_LIF_DISABLE = 0, + IONIC_LIF_ENABLE = 1, + IONIC_LIF_HANG_RESET = 2, +}; + +/** + * LIF configuration + * @state: lif state (enum lif_state) + * @name: lif name + * @mtu: mtu + * @mac: station mac address + * @features: features (enum eth_hw_features) + * @queue_count: queue counts per queue-type + */ +union ionic_lif_config { + struct { + u8 state; + u8 rsvd[3]; + char name[IONIC_IFNAMSIZ]; + __le32 mtu; + u8 mac[6]; + u8 rsvd2[2]; + __le64 features; + __le32 queue_count[IONIC_QTYPE_MAX]; + }; + __le32 words[64]; +}; + +/** + * struct lif_identity - lif identity information (type-specific) + * + * @capabilities LIF capabilities + * + * Ethernet: + * @version: Ethernet identify structure version. + * @features: Ethernet features supported on this lif type. + * @max_ucast_filters: Number of perfect unicast addresses supported. + * @max_mcast_filters: Number of perfect multicast addresses supported. + * @min_frame_size: Minimum size of frames to be sent + * @max_frame_size: Maximim size of frames to be sent + * @config: LIF config struct with features, mtu, mac, q counts + * + * RDMA: + * @version: RDMA version of opcodes and queue descriptors. + * @qp_opcodes: Number of rdma queue pair opcodes supported. + * @admin_opcodes: Number of rdma admin opcodes supported. + * @npts_per_lif: Page table size per lif + * @nmrs_per_lif: Number of memory regions per lif + * @nahs_per_lif: Number of address handles per lif + * @max_stride: Max work request stride. + * @cl_stride: Cache line stride. + * @pte_stride: Page table entry stride. + * @rrq_stride: Remote RQ work request stride. + * @rsq_stride: Remote SQ work request stride. + * @dcqcn_profiles: Number of DCQCN profiles + * @aq_qtype: RDMA Admin Qtype. + * @sq_qtype: RDMA Send Qtype. + * @rq_qtype: RDMA Receive Qtype. + * @cq_qtype: RDMA Completion Qtype. + * @eq_qtype: RDMA Event Qtype. + */ +union ionic_lif_identity { + struct { + __le64 capabilities; + + struct { + u8 version; + u8 rsvd[3]; + __le32 max_ucast_filters; + __le32 max_mcast_filters; + __le16 rss_ind_tbl_sz; + __le32 min_frame_size; + __le32 max_frame_size; + u8 rsvd2[106]; + union ionic_lif_config config; + } eth; + + struct { + u8 version; + u8 qp_opcodes; + u8 admin_opcodes; + u8 rsvd; + __le32 npts_per_lif; + __le32 nmrs_per_lif; + __le32 nahs_per_lif; + u8 max_stride; + u8 cl_stride; + u8 pte_stride; + u8 rrq_stride; + u8 rsq_stride; + u8 dcqcn_profiles; + u8 rsvd_dimensions[10]; + struct ionic_lif_logical_qtype aq_qtype; + struct ionic_lif_logical_qtype sq_qtype; + struct ionic_lif_logical_qtype rq_qtype; + struct ionic_lif_logical_qtype cq_qtype; + struct ionic_lif_logical_qtype eq_qtype; + } rdma; + }; + __le32 words[512]; +}; + +/** + * struct lif_init_cmd - LIF init command + * @opcode: opcode + * @type: LIF type (enum lif_type) + * @index: LIF index + * @info_pa: destination address for lif info (struct lif_info) + */ +struct ionic_lif_init_cmd { + u8 opcode; + u8 type; + __le16 index; + __le32 rsvd; + __le64 info_pa; + u8 rsvd2[48]; +}; + +/** + * struct lif_init_comp - LIF init command completion + * @status: The status of the command (enum status_code) + */ +struct ionic_lif_init_comp { + u8 status; + u8 rsvd; + __le16 hw_index; + u8 rsvd2[12]; +}; + +/** + * struct q_init_cmd - Queue init command + * @opcode: opcode + * @type: Logical queue type + * @ver: Queue version (defines opcode/descriptor scope) + * @lif_index: LIF index + * @index: (lif, qtype) relative admin queue index + * @intr_index: Interrupt control register index + * @pid: Process ID + * @flags: + * IRQ: Interrupt requested on completion + * ENA: Enable the queue. If ENA=0 the queue is initialized + * but remains disabled, to be later enabled with the + * Queue Enable command. If ENA=1, then queue is + * initialized and then enabled. + * SG: Enable Scatter-Gather on the queue. + * in number of descs. The actual ring size is + * (1 << ring_size). For example, to + * select a ring size of 64 descriptors write + * ring_size = 6. The minimum ring_size value is 2 + * for a ring size of 4 descriptors. The maximum + * ring_size value is 16 for a ring size of 64k + * descriptors. Values of ring_size <2 and >16 are + * reserved. + * EQ: Enable the Event Queue + * @cos: Class of service for this queue. + * @ring_size: Queue ring size, encoded as a log2(size) + * @ring_base: Queue ring base address + * @cq_ring_base: Completion queue ring base address + * @sg_ring_base: Scatter/Gather ring base address + * @eq_index: Event queue index + */ +struct ionic_q_init_cmd { + u8 opcode; + u8 rsvd; + __le16 lif_index; + u8 type; + u8 ver; + u8 rsvd1[2]; + __le32 index; + __le16 pid; + __le16 intr_index; + __le16 flags; +#define IONIC_QINIT_F_IRQ 0x01 /* Request interrupt on completion */ +#define IONIC_QINIT_F_ENA 0x02 /* Enable the queue */ +#define IONIC_QINIT_F_SG 0x04 /* Enable scatter/gather on the queue */ +#define IONIC_QINIT_F_EQ 0x08 /* Enable event queue */ +#define IONIC_QINIT_F_DEBUG 0x80 /* Enable queue debugging */ + u8 cos; + u8 ring_size; + __le64 ring_base; + __le64 cq_ring_base; + __le64 sg_ring_base; + __le32 eq_index; + u8 rsvd2[16]; +}; + +/** + * struct q_init_comp - Queue init command completion + * @status: The status of the command (enum status_code) + * @ver: Queue version (defines opcode/descriptor scope) + * @comp_index: The index in the descriptor ring for which this + * is the completion. + * @hw_index: Hardware Queue ID + * @hw_type: Hardware Queue type + * @color: Color + */ +struct ionic_q_init_comp { + u8 status; + u8 ver; + __le16 comp_index; + __le32 hw_index; + u8 hw_type; + u8 rsvd2[6]; + u8 color; +}; + +/* the device's internal addressing uses up to 52 bits */ +#define IONIC_ADDR_LEN 52 +#define IONIC_ADDR_MASK (BIT_ULL(IONIC_ADDR_LEN) - 1) + +enum ionic_txq_desc_opcode { + IONIC_TXQ_DESC_OPCODE_CSUM_NONE = 0, + IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL = 1, + IONIC_TXQ_DESC_OPCODE_CSUM_HW = 2, + IONIC_TXQ_DESC_OPCODE_TSO = 3, +}; + +/** + * struct txq_desc - Ethernet Tx queue descriptor format + * @opcode: Tx operation, see TXQ_DESC_OPCODE_*: + * + * IONIC_TXQ_DESC_OPCODE_CSUM_NONE: + * + * Non-offload send. No segmentation, + * fragmentation or checksum calc/insertion is + * performed by device; packet is prepared + * to send by software stack and requires + * no further manipulation from device. + * + * IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL: + * + * Offload 16-bit L4 checksum + * calculation/insertion. The device will + * calculate the L4 checksum value and + * insert the result in the packet's L4 + * header checksum field. The L4 checksum + * is calculated starting at @csum_start bytes + * into the packet to the end of the packet. + * The checksum insertion position is given + * in @csum_offset. This feature is only + * applicable to protocols such as TCP, UDP + * and ICMP where a standard (i.e. the + * 'IP-style' checksum) one's complement + * 16-bit checksum is used, using an IP + * pseudo-header to seed the calculation. + * Software will preload the L4 checksum + * field with the IP pseudo-header checksum. + * + * For tunnel encapsulation, @csum_start and + * @csum_offset refer to the inner L4 + * header. Supported tunnels encapsulations + * are: IPIP, GRE, and UDP. If the @encap + * is clear, no further processing by the + * device is required; software will + * calculate the outer header checksums. If + * the @encap is set, the device will + * offload the outer header checksums using + * LCO (local checksum offload) (see + * Documentation/networking/checksum- + * offloads.txt for more info). + * + * IONIC_TXQ_DESC_OPCODE_CSUM_HW: + * + * Offload 16-bit checksum computation to hardware. + * If @csum_l3 is set then the packet's L3 checksum is + * updated. Similarly, if @csum_l4 is set the the L4 + * checksum is updated. If @encap is set then encap header + * checksums are also updated. + * + * IONIC_TXQ_DESC_OPCODE_TSO: + * + * Device preforms TCP segmentation offload + * (TSO). @hdr_len is the number of bytes + * to the end of TCP header (the offset to + * the TCP payload). @mss is the desired + * MSS, the TCP payload length for each + * segment. The device will calculate/ + * insert IP (IPv4 only) and TCP checksums + * for each segment. In the first data + * buffer containing the header template, + * the driver will set IPv4 checksum to 0 + * and preload TCP checksum with the IP + * pseudo header calculated with IP length = 0. + * + * Supported tunnel encapsulations are IPIP, + * layer-3 GRE, and UDP. @hdr_len includes + * both outer and inner headers. The driver + * will set IPv4 checksum to zero and + * preload TCP checksum with IP pseudo + * header on the inner header. + * + * TCP ECN offload is supported. The device + * will set CWR flag in the first segment if + * CWR is set in the template header, and + * clear CWR in remaining segments. + * @flags: + * vlan: + * Insert an L2 VLAN header using @vlan_tci. + * encap: + * Calculate encap header checksum. + * csum_l3: + * Compute L3 header checksum. + * csum_l4: + * Compute L4 header checksum. + * tso_sot: + * TSO start + * tso_eot: + * TSO end + * @num_sg_elems: Number of scatter-gather elements in SG + * descriptor + * @addr: First data buffer's DMA address. + * (Subsequent data buffers are on txq_sg_desc). + * @len: First data buffer's length, in bytes + * @vlan_tci: VLAN tag to insert in the packet (if requested + * by @V-bit). Includes .1p and .1q tags + * @hdr_len: Length of packet headers, including + * encapsulating outer header, if applicable. + * Valid for opcodes TXQ_DESC_OPCODE_CALC_CSUM and + * TXQ_DESC_OPCODE_TSO. Should be set to zero for + * all other modes. For + * TXQ_DESC_OPCODE_CALC_CSUM, @hdr_len is length + * of headers up to inner-most L4 header. For + * TXQ_DESC_OPCODE_TSO, @hdr_len is up to + * inner-most L4 payload, so inclusive of + * inner-most L4 header. + * @mss: Desired MSS value for TSO. Only applicable for + * TXQ_DESC_OPCODE_TSO. + * @csum_start: Offset into inner-most L3 header of checksum + * @csum_offset: Offset into inner-most L4 header of checksum + */ + +#define IONIC_TXQ_DESC_OPCODE_MASK 0xf +#define IONIC_TXQ_DESC_OPCODE_SHIFT 4 +#define IONIC_TXQ_DESC_FLAGS_MASK 0xf +#define IONIC_TXQ_DESC_FLAGS_SHIFT 0 +#define IONIC_TXQ_DESC_NSGE_MASK 0xf +#define IONIC_TXQ_DESC_NSGE_SHIFT 8 +#define IONIC_TXQ_DESC_ADDR_MASK (BIT_ULL(IONIC_ADDR_LEN) - 1) +#define IONIC_TXQ_DESC_ADDR_SHIFT 12 + +/* common flags */ +#define IONIC_TXQ_DESC_FLAG_VLAN 0x1 +#define IONIC_TXQ_DESC_FLAG_ENCAP 0x2 + +/* flags for csum_hw opcode */ +#define IONIC_TXQ_DESC_FLAG_CSUM_L3 0x4 +#define IONIC_TXQ_DESC_FLAG_CSUM_L4 0x8 + +/* flags for tso opcode */ +#define IONIC_TXQ_DESC_FLAG_TSO_SOT 0x4 +#define IONIC_TXQ_DESC_FLAG_TSO_EOT 0x8 + +struct ionic_txq_desc { + __le64 cmd; + __le16 len; + union { + __le16 vlan_tci; + __le16 hword0; + }; + union { + __le16 csum_start; + __le16 hdr_len; + __le16 hword1; + }; + union { + __le16 csum_offset; + __le16 mss; + __le16 hword2; + }; +}; + +static inline u64 encode_txq_desc_cmd(u8 opcode, u8 flags, + u8 nsge, u64 addr) +{ + u64 cmd; + + cmd = (opcode & IONIC_TXQ_DESC_OPCODE_MASK) << IONIC_TXQ_DESC_OPCODE_SHIFT; + cmd |= (flags & IONIC_TXQ_DESC_FLAGS_MASK) << IONIC_TXQ_DESC_FLAGS_SHIFT; + cmd |= (nsge & IONIC_TXQ_DESC_NSGE_MASK) << IONIC_TXQ_DESC_NSGE_SHIFT; + cmd |= (addr & IONIC_TXQ_DESC_ADDR_MASK) << IONIC_TXQ_DESC_ADDR_SHIFT; + + return cmd; +}; + +static inline void decode_txq_desc_cmd(u64 cmd, u8 *opcode, u8 *flags, + u8 *nsge, u64 *addr) +{ + *opcode = (cmd >> IONIC_TXQ_DESC_OPCODE_SHIFT) & IONIC_TXQ_DESC_OPCODE_MASK; + *flags = (cmd >> IONIC_TXQ_DESC_FLAGS_SHIFT) & IONIC_TXQ_DESC_FLAGS_MASK; + *nsge = (cmd >> IONIC_TXQ_DESC_NSGE_SHIFT) & IONIC_TXQ_DESC_NSGE_MASK; + *addr = (cmd >> IONIC_TXQ_DESC_ADDR_SHIFT) & IONIC_TXQ_DESC_ADDR_MASK; +}; + +#define IONIC_TX_MAX_SG_ELEMS 8 +#define IONIC_RX_MAX_SG_ELEMS 8 + +/** + * struct txq_sg_desc - Transmit scatter-gather (SG) list + * @addr: DMA address of SG element data buffer + * @len: Length of SG element data buffer, in bytes + */ +struct ionic_txq_sg_desc { + struct ionic_txq_sg_elem { + __le64 addr; + __le16 len; + __le16 rsvd[3]; + } elems[IONIC_TX_MAX_SG_ELEMS]; +}; + +/** + * struct txq_comp - Ethernet transmit queue completion descriptor + * @status: The status of the command (enum status_code) + * @comp_index: The index in the descriptor ring for which this + * is the completion. + * @color: Color bit. + */ +struct ionic_txq_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 rsvd2[11]; + u8 color; +}; + +enum ionic_rxq_desc_opcode { + IONIC_RXQ_DESC_OPCODE_SIMPLE = 0, + IONIC_RXQ_DESC_OPCODE_SG = 1, +}; + +/** + * struct rxq_desc - Ethernet Rx queue descriptor format + * @opcode: Rx operation, see RXQ_DESC_OPCODE_*: + * + * RXQ_DESC_OPCODE_SIMPLE: + * + * Receive full packet into data buffer + * starting at @addr. Results of + * receive, including actual bytes received, + * are recorded in Rx completion descriptor. + * + * @len: Data buffer's length, in bytes. + * @addr: Data buffer's DMA address + */ +struct ionic_rxq_desc { + u8 opcode; + u8 rsvd[5]; + __le16 len; + __le64 addr; +}; + +/** + * struct rxq_sg_desc - Receive scatter-gather (SG) list + * @addr: DMA address of SG element data buffer + * @len: Length of SG element data buffer, in bytes + */ +struct ionic_rxq_sg_desc { + struct ionic_rxq_sg_elem { + __le64 addr; + __le16 len; + __le16 rsvd[3]; + } elems[IONIC_RX_MAX_SG_ELEMS]; +}; + +/** + * struct rxq_comp - Ethernet receive queue completion descriptor + * @status: The status of the command (enum status_code) + * @num_sg_elems: Number of SG elements used by this descriptor + * @comp_index: The index in the descriptor ring for which this + * is the completion. + * @rss_hash: 32-bit RSS hash + * @csum: 16-bit sum of the packet's L2 payload. + * If the packet's L2 payload is odd length, an extra + * zero-value byte is included in the @csum calculation but + * not included in @len. + * @vlan_tci: VLAN tag stripped from the packet. Valid if @VLAN is + * set. Includes .1p and .1q tags. + * @len: Received packet length, in bytes. Excludes FCS. + * @csum_calc L2 payload checksum is computed or not + * @csum_tcp_ok: The TCP checksum calculated by the device + * matched the checksum in the receive packet's + * TCP header + * @csum_tcp_bad: The TCP checksum calculated by the device did + * not match the checksum in the receive packet's + * TCP header. + * @csum_udp_ok: The UDP checksum calculated by the device + * matched the checksum in the receive packet's + * UDP header + * @csum_udp_bad: The UDP checksum calculated by the device did + * not match the checksum in the receive packet's + * UDP header. + * @csum_ip_ok: The IPv4 checksum calculated by the device + * matched the checksum in the receive packet's + * first IPv4 header. If the receive packet + * contains both a tunnel IPv4 header and a + * transport IPv4 header, the device validates the + * checksum for the both IPv4 headers. + * @csum_ip_bad: The IPv4 checksum calculated by the device did + * not match the checksum in the receive packet's + * first IPv4 header. If the receive packet + * contains both a tunnel IPv4 header and a + * transport IPv4 header, the device validates the + * checksum for both IP headers. + * @VLAN: VLAN header was stripped and placed in @vlan_tci. + * @pkt_type: Packet type + * @color: Color bit. + */ +struct ionic_rxq_comp { + u8 status; + u8 num_sg_elems; + __le16 comp_index; + __le32 rss_hash; + __le16 csum; + __le16 vlan_tci; + __le16 len; + u8 csum_flags; +#define IONIC_RXQ_COMP_CSUM_F_TCP_OK 0x01 +#define IONIC_RXQ_COMP_CSUM_F_TCP_BAD 0x02 +#define IONIC_RXQ_COMP_CSUM_F_UDP_OK 0x04 +#define IONIC_RXQ_COMP_CSUM_F_UDP_BAD 0x08 +#define IONIC_RXQ_COMP_CSUM_F_IP_OK 0x10 +#define IONIC_RXQ_COMP_CSUM_F_IP_BAD 0x20 +#define IONIC_RXQ_COMP_CSUM_F_VLAN 0x40 +#define IONIC_RXQ_COMP_CSUM_F_CALC 0x80 + u8 pkt_type_color; +#define IONIC_RXQ_COMP_PKT_TYPE_MASK 0x0f +}; + +enum ionic_pkt_type { + IONIC_PKT_TYPE_NON_IP = 0x000, + IONIC_PKT_TYPE_IPV4 = 0x001, + IONIC_PKT_TYPE_IPV4_TCP = 0x003, + IONIC_PKT_TYPE_IPV4_UDP = 0x005, + IONIC_PKT_TYPE_IPV6 = 0x008, + IONIC_PKT_TYPE_IPV6_TCP = 0x018, + IONIC_PKT_TYPE_IPV6_UDP = 0x028, +}; + +enum ionic_eth_hw_features { + IONIC_ETH_HW_VLAN_TX_TAG = BIT(0), + IONIC_ETH_HW_VLAN_RX_STRIP = BIT(1), + IONIC_ETH_HW_VLAN_RX_FILTER = BIT(2), + IONIC_ETH_HW_RX_HASH = BIT(3), + IONIC_ETH_HW_RX_CSUM = BIT(4), + IONIC_ETH_HW_TX_SG = BIT(5), + IONIC_ETH_HW_RX_SG = BIT(6), + IONIC_ETH_HW_TX_CSUM = BIT(7), + IONIC_ETH_HW_TSO = BIT(8), + IONIC_ETH_HW_TSO_IPV6 = BIT(9), + IONIC_ETH_HW_TSO_ECN = BIT(10), + IONIC_ETH_HW_TSO_GRE = BIT(11), + IONIC_ETH_HW_TSO_GRE_CSUM = BIT(12), + IONIC_ETH_HW_TSO_IPXIP4 = BIT(13), + IONIC_ETH_HW_TSO_IPXIP6 = BIT(14), + IONIC_ETH_HW_TSO_UDP = BIT(15), + IONIC_ETH_HW_TSO_UDP_CSUM = BIT(16), +}; + +/** + * struct q_control_cmd - Queue control command + * @opcode: opcode + * @type: Queue type + * @lif_index: LIF index + * @index: Queue index + * @oper: Operation (enum q_control_oper) + */ +struct ionic_q_control_cmd { + u8 opcode; + u8 type; + __le16 lif_index; + __le32 index; + u8 oper; + u8 rsvd[55]; +}; + +typedef struct ionic_admin_comp ionic_q_control_comp; + +enum q_control_oper { + IONIC_Q_DISABLE = 0, + IONIC_Q_ENABLE = 1, + IONIC_Q_HANG_RESET = 2, +}; + +/** + * Physical connection type + */ +enum ionic_phy_type { + IONIC_PHY_TYPE_NONE = 0, + IONIC_PHY_TYPE_COPPER = 1, + IONIC_PHY_TYPE_FIBER = 2, +}; + +/** + * Transceiver status + */ +enum ionic_xcvr_state { + IONIC_XCVR_STATE_REMOVED = 0, + IONIC_XCVR_STATE_INSERTED = 1, + IONIC_XCVR_STATE_PENDING = 2, + IONIC_XCVR_STATE_SPROM_READ = 3, + IONIC_XCVR_STATE_SPROM_READ_ERR = 4, +}; + +/** + * Supported link modes + */ +enum ionic_xcvr_pid { + IONIC_XCVR_PID_UNKNOWN = 0, + + /* CU */ + IONIC_XCVR_PID_QSFP_100G_CR4 = 1, + IONIC_XCVR_PID_QSFP_40GBASE_CR4 = 2, + IONIC_XCVR_PID_SFP_25GBASE_CR_S = 3, + IONIC_XCVR_PID_SFP_25GBASE_CR_L = 4, + IONIC_XCVR_PID_SFP_25GBASE_CR_N = 5, + + /* Fiber */ + IONIC_XCVR_PID_QSFP_100G_AOC = 50, + IONIC_XCVR_PID_QSFP_100G_ACC = 51, + IONIC_XCVR_PID_QSFP_100G_SR4 = 52, + IONIC_XCVR_PID_QSFP_100G_LR4 = 53, + IONIC_XCVR_PID_QSFP_100G_ER4 = 54, + IONIC_XCVR_PID_QSFP_40GBASE_ER4 = 55, + IONIC_XCVR_PID_QSFP_40GBASE_SR4 = 56, + IONIC_XCVR_PID_QSFP_40GBASE_LR4 = 57, + IONIC_XCVR_PID_QSFP_40GBASE_AOC = 58, + IONIC_XCVR_PID_SFP_25GBASE_SR = 59, + IONIC_XCVR_PID_SFP_25GBASE_LR = 60, + IONIC_XCVR_PID_SFP_25GBASE_ER = 61, + IONIC_XCVR_PID_SFP_25GBASE_AOC = 62, + IONIC_XCVR_PID_SFP_10GBASE_SR = 63, + IONIC_XCVR_PID_SFP_10GBASE_LR = 64, + IONIC_XCVR_PID_SFP_10GBASE_LRM = 65, + IONIC_XCVR_PID_SFP_10GBASE_ER = 66, + IONIC_XCVR_PID_SFP_10GBASE_AOC = 67, + IONIC_XCVR_PID_SFP_10GBASE_CU = 68, + IONIC_XCVR_PID_QSFP_100G_CWDM4 = 69, + IONIC_XCVR_PID_QSFP_100G_PSM4 = 70, +}; + +/** + * Port types + */ +enum ionic_port_type { + IONIC_PORT_TYPE_NONE = 0, /* port type not configured */ + IONIC_PORT_TYPE_ETH = 1, /* port carries ethernet traffic (inband) */ + IONIC_PORT_TYPE_MGMT = 2, /* port carries mgmt traffic (out-of-band) */ +}; + +/** + * Port config state + */ +enum ionic_port_admin_state { + IONIC_PORT_ADMIN_STATE_NONE = 0, /* port admin state not configured */ + IONIC_PORT_ADMIN_STATE_DOWN = 1, /* port is admin disabled */ + IONIC_PORT_ADMIN_STATE_UP = 2, /* port is admin enabled */ +}; + +/** + * Port operational status + */ +enum ionic_port_oper_status { + IONIC_PORT_OPER_STATUS_NONE = 0, /* port is disabled */ + IONIC_PORT_OPER_STATUS_UP = 1, /* port is linked up */ + IONIC_PORT_OPER_STATUS_DOWN = 2, /* port link status is down */ +}; + +/** + * Ethernet Forward error correction (fec) modes + */ +enum ionic_port_fec_type { + IONIC_PORT_FEC_TYPE_NONE = 0, /* Disabled */ + IONIC_PORT_FEC_TYPE_FC = 1, /* FireCode */ + IONIC_PORT_FEC_TYPE_RS = 2, /* ReedSolomon */ +}; + +/** + * Ethernet pause (flow control) modes + */ +enum ionic_port_pause_type { + IONIC_PORT_PAUSE_TYPE_NONE = 0, /* Disable Pause */ + IONIC_PORT_PAUSE_TYPE_LINK = 1, /* Link level pause */ + IONIC_PORT_PAUSE_TYPE_PFC = 2, /* Priority-Flow control */ +}; + +/** + * Loopback modes + */ +enum ionic_port_loopback_mode { + IONIC_PORT_LOOPBACK_MODE_NONE = 0, /* Disable loopback */ + IONIC_PORT_LOOPBACK_MODE_MAC = 1, /* MAC loopback */ + IONIC_PORT_LOOPBACK_MODE_PHY = 2, /* PHY/Serdes loopback */ +}; + +/** + * Transceiver Status information + * @state: Transceiver status (enum xcvr_state) + * @phy: Physical connection type (enum phy_type) + * @pid: Transceiver link mode (enum pid) + * @sprom: Transceiver sprom contents + */ +struct ionic_xcvr_status { + u8 state; + u8 phy; + __le16 pid; + u8 sprom[256]; +}; + +/** + * Port configuration + * @speed: port speed (in Mbps) + * @mtu: mtu + * @state: port admin state (enum port_admin_state) + * @an_enable: autoneg enable + * @fec_type: fec type (enum port_fec_type) + * @pause_type: pause type (enum port_pause_type) + * @loopback_mode: loopback mode (enum port_loopback_mode) + */ +union ionic_port_config { + struct { +#define IONIC_SPEED_100G 100000 /* 100G in Mbps */ +#define IONIC_SPEED_50G 50000 /* 50G in Mbps */ +#define IONIC_SPEED_40G 40000 /* 40G in Mbps */ +#define IONIC_SPEED_25G 25000 /* 25G in Mbps */ +#define IONIC_SPEED_10G 10000 /* 10G in Mbps */ +#define IONIC_SPEED_1G 1000 /* 1G in Mbps */ + __le32 speed; + __le32 mtu; + u8 state; + u8 an_enable; + u8 fec_type; +#define IONIC_PAUSE_TYPE_MASK 0x0f +#define IONIC_PAUSE_FLAGS_MASK 0xf0 +#define IONIC_PAUSE_F_TX 0x10 +#define IONIC_PAUSE_F_RX 0x20 + u8 pause_type; + u8 loopback_mode; + }; + __le32 words[64]; +}; + +/** + * Port Status information + * @status: link status (enum port_oper_status) + * @id: port id + * @speed: link speed (in Mbps) + * @xcvr: tranceiver status + */ +struct ionic_port_status { + __le32 id; + __le32 speed; + u8 status; + u8 rsvd[51]; + struct ionic_xcvr_status xcvr; +}; + +/** + * struct port_identify_cmd - Port identify command + * @opcode: opcode + * @index: port index + * @ver: Highest version of identify supported by driver + */ +struct ionic_port_identify_cmd { + u8 opcode; + u8 index; + u8 ver; + u8 rsvd[61]; +}; + +/** + * struct port_identify_comp - Port identify command completion + * @status: The status of the command (enum status_code) + * @ver: Version of identify returned by device + */ +struct ionic_port_identify_comp { + u8 status; + u8 ver; + u8 rsvd[14]; +}; + +/** + * struct port_init_cmd - Port initialization command + * @opcode: opcode + * @index: port index + * @info_pa: destination address for port info (struct port_info) + */ +struct ionic_port_init_cmd { + u8 opcode; + u8 index; + u8 rsvd[6]; + __le64 info_pa; + u8 rsvd2[48]; +}; + +/** + * struct port_init_comp - Port initialization command completion + * @status: The status of the command (enum status_code) + */ +struct ionic_port_init_comp { + u8 status; + u8 rsvd[15]; +}; + +/** + * struct port_reset_cmd - Port reset command + * @opcode: opcode + * @index: port index + */ +struct ionic_port_reset_cmd { + u8 opcode; + u8 index; + u8 rsvd[62]; +}; + +/** + * struct port_reset_comp - Port reset command completion + * @status: The status of the command (enum status_code) + */ +struct ionic_port_reset_comp { + u8 status; + u8 rsvd[15]; +}; + +/** + * enum stats_ctl_cmd - List of commands for stats control + */ +enum ionic_stats_ctl_cmd { + IONIC_STATS_CTL_RESET = 0, +}; + + +/** + * enum ionic_port_attr - List of device attributes + */ +enum ionic_port_attr { + IONIC_PORT_ATTR_STATE = 0, + IONIC_PORT_ATTR_SPEED = 1, + IONIC_PORT_ATTR_MTU = 2, + IONIC_PORT_ATTR_AUTONEG = 3, + IONIC_PORT_ATTR_FEC = 4, + IONIC_PORT_ATTR_PAUSE = 5, + IONIC_PORT_ATTR_LOOPBACK = 6, + IONIC_PORT_ATTR_STATS_CTRL = 7, +}; + +/** + * struct port_setattr_cmd - Set port attributes on the NIC + * @opcode: Opcode + * @index: port index + * @attr: Attribute type (enum ionic_port_attr) + */ +struct ionic_port_setattr_cmd { + u8 opcode; + u8 index; + u8 attr; + u8 rsvd; + union { + u8 state; + __le32 speed; + __le32 mtu; + u8 an_enable; + u8 fec_type; + u8 pause_type; + u8 loopback_mode; + u8 stats_ctl; + u8 rsvd2[60]; + }; +}; + +/** + * struct port_setattr_comp - Port set attr command completion + * @status: The status of the command (enum status_code) + * @color: Color bit + */ +struct ionic_port_setattr_comp { + u8 status; + u8 rsvd[14]; + u8 color; +}; + +/** + * struct port_getattr_cmd - Get port attributes from the NIC + * @opcode: Opcode + * @index: port index + * @attr: Attribute type (enum ionic_port_attr) + */ +struct ionic_port_getattr_cmd { + u8 opcode; + u8 index; + u8 attr; + u8 rsvd[61]; +}; + +/** + * struct port_getattr_comp - Port get attr command completion + * @status: The status of the command (enum status_code) + * @color: Color bit + */ +struct ionic_port_getattr_comp { + u8 status; + u8 rsvd[3]; + union { + u8 state; + __le32 speed; + __le32 mtu; + u8 an_enable; + u8 fec_type; + u8 pause_type; + u8 loopback_mode; + u8 rsvd2[11]; + }; + u8 color; +}; + +/** + * struct lif_status - Lif status register + * @eid: most recent NotifyQ event id + * @port_num: port the lif is connected to + * @link_status: port status (enum port_oper_status) + * @link_speed: speed of link in Mbps + * @link_down_count: number of times link status changes + */ +struct ionic_lif_status { + __le64 eid; + u8 port_num; + u8 rsvd; + __le16 link_status; + __le32 link_speed; /* units of 1Mbps: eg 10000 = 10Gbps */ + __le16 link_down_count; + u8 rsvd2[46]; +}; + +/** + * struct lif_reset_cmd - LIF reset command + * @opcode: opcode + * @index: LIF index + */ +struct ionic_lif_reset_cmd { + u8 opcode; + u8 rsvd; + __le16 index; + __le32 rsvd2[15]; +}; + +typedef struct ionic_admin_comp ionic_lif_reset_comp; + +enum ionic_dev_state { + IONIC_DEV_DISABLE = 0, + IONIC_DEV_ENABLE = 1, + IONIC_DEV_HANG_RESET = 2, +}; + +/** + * enum dev_attr - List of device attributes + */ +enum ionic_dev_attr { + IONIC_DEV_ATTR_STATE = 0, + IONIC_DEV_ATTR_NAME = 1, + IONIC_DEV_ATTR_FEATURES = 2, +}; + +/** + * struct dev_setattr_cmd - Set Device attributes on the NIC + * @opcode: Opcode + * @attr: Attribute type (enum dev_attr) + * @state: Device state (enum dev_state) + * @name: The bus info, e.g. PCI slot-device-function, 0 terminated + * @features: Device features + */ +struct ionic_dev_setattr_cmd { + u8 opcode; + u8 attr; + __le16 rsvd; + union { + u8 state; + char name[IONIC_IFNAMSIZ]; + __le64 features; + u8 rsvd2[60]; + }; +}; + +/** + * struct dev_setattr_comp - Device set attr command completion + * @status: The status of the command (enum status_code) + * @features: Device features + * @color: Color bit + */ +struct ionic_dev_setattr_comp { + u8 status; + u8 rsvd[3]; + union { + __le64 features; + u8 rsvd2[11]; + }; + u8 color; +}; + +/** + * struct dev_getattr_cmd - Get Device attributes from the NIC + * @opcode: opcode + * @attr: Attribute type (enum dev_attr) + */ +struct ionic_dev_getattr_cmd { + u8 opcode; + u8 attr; + u8 rsvd[62]; +}; + +/** + * struct dev_setattr_comp - Device set attr command completion + * @status: The status of the command (enum status_code) + * @features: Device features + * @color: Color bit + */ +struct ionic_dev_getattr_comp { + u8 status; + u8 rsvd[3]; + union { + __le64 features; + u8 rsvd2[11]; + }; + u8 color; +}; + +/** + * RSS parameters + */ +#define IONIC_RSS_HASH_KEY_SIZE 40 + +enum ionic_rss_hash_types { + IONIC_RSS_TYPE_IPV4 = BIT(0), + IONIC_RSS_TYPE_IPV4_TCP = BIT(1), + IONIC_RSS_TYPE_IPV4_UDP = BIT(2), + IONIC_RSS_TYPE_IPV6 = BIT(3), + IONIC_RSS_TYPE_IPV6_TCP = BIT(4), + IONIC_RSS_TYPE_IPV6_UDP = BIT(5), +}; + +/** + * enum lif_attr - List of LIF attributes + */ +enum ionic_lif_attr { + IONIC_LIF_ATTR_STATE = 0, + IONIC_LIF_ATTR_NAME = 1, + IONIC_LIF_ATTR_MTU = 2, + IONIC_LIF_ATTR_MAC = 3, + IONIC_LIF_ATTR_FEATURES = 4, + IONIC_LIF_ATTR_RSS = 5, + IONIC_LIF_ATTR_STATS_CTRL = 6, +}; + +/** + * struct lif_setattr_cmd - Set LIF attributes on the NIC + * @opcode: Opcode + * @type: Attribute type (enum lif_attr) + * @index: LIF index + * @state: lif state (enum lif_state) + * @name: The netdev name string, 0 terminated + * @mtu: Mtu + * @mac: Station mac + * @features: Features (enum eth_hw_features) + * @rss: RSS properties + * @types: The hash types to enable (see rss_hash_types). + * @key: The hash secret key. + * @addr: Address for the indirection table shared memory. + * @stats_ctl: stats control commands (enum stats_ctl_cmd) + */ +struct ionic_lif_setattr_cmd { + u8 opcode; + u8 attr; + __le16 index; + union { + u8 state; + char name[IONIC_IFNAMSIZ]; + __le32 mtu; + u8 mac[6]; + __le64 features; + struct { + __le16 types; + u8 key[IONIC_RSS_HASH_KEY_SIZE]; + u8 rsvd[6]; + __le64 addr; + } rss; + u8 stats_ctl; + u8 rsvd[60]; + }; +}; + +/** + * struct lif_setattr_comp - LIF set attr command completion + * @status: The status of the command (enum status_code) + * @comp_index: The index in the descriptor ring for which this + * is the completion. + * @features: features (enum eth_hw_features) + * @color: Color bit + */ +struct ionic_lif_setattr_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + union { + __le64 features; + u8 rsvd2[11]; + }; + u8 color; +}; + +/** + * struct lif_getattr_cmd - Get LIF attributes from the NIC + * @opcode: Opcode + * @attr: Attribute type (enum lif_attr) + * @index: LIF index + */ +struct ionic_lif_getattr_cmd { + u8 opcode; + u8 attr; + __le16 index; + u8 rsvd[60]; +}; + +/** + * struct lif_getattr_comp - LIF get attr command completion + * @status: The status of the command (enum status_code) + * @comp_index: The index in the descriptor ring for which this + * is the completion. + * @state: lif state (enum lif_state) + * @name: The netdev name string, 0 terminated + * @mtu: Mtu + * @mac: Station mac + * @features: Features (enum eth_hw_features) + * @color: Color bit + */ +struct ionic_lif_getattr_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + union { + u8 state; + __le32 mtu; + u8 mac[6]; + __le64 features; + u8 rsvd2[11]; + }; + u8 color; +}; + +enum ionic_rx_mode { + IONIC_RX_MODE_F_UNICAST = BIT(0), + IONIC_RX_MODE_F_MULTICAST = BIT(1), + IONIC_RX_MODE_F_BROADCAST = BIT(2), + IONIC_RX_MODE_F_PROMISC = BIT(3), + IONIC_RX_MODE_F_ALLMULTI = BIT(4), +}; + +/** + * struct rx_mode_set_cmd - Set LIF's Rx mode command + * @opcode: opcode + * @lif_index: LIF index + * @rx_mode: Rx mode flags: + * IONIC_RX_MODE_F_UNICAST: Accept known unicast packets. + * IONIC_RX_MODE_F_MULTICAST: Accept known multicast packets. + * IONIC_RX_MODE_F_BROADCAST: Accept broadcast packets. + * IONIC_RX_MODE_F_PROMISC: Accept any packets. + * IONIC_RX_MODE_F_ALLMULTI: Accept any multicast packets. + */ +struct ionic_rx_mode_set_cmd { + u8 opcode; + u8 rsvd; + __le16 lif_index; + __le16 rx_mode; + __le16 rsvd2[29]; +}; + +typedef struct ionic_admin_comp ionic_rx_mode_set_comp; + +enum ionic_rx_filter_match_type { + IONIC_RX_FILTER_MATCH_VLAN = 0, + IONIC_RX_FILTER_MATCH_MAC, + IONIC_RX_FILTER_MATCH_MAC_VLAN, +}; + +/** + * struct rx_filter_add_cmd - Add LIF Rx filter command + * @opcode: opcode + * @qtype: Queue type + * @lif_index: LIF index + * @qid: Queue ID + * @match: Rx filter match type. (See IONIC_RX_FILTER_MATCH_xxx) + * @vlan: VLAN ID + * @addr: MAC address (network-byte order) + */ +struct ionic_rx_filter_add_cmd { + u8 opcode; + u8 qtype; + __le16 lif_index; + __le32 qid; + __le16 match; + union { + struct { + __le16 vlan; + } vlan; + struct { + u8 addr[6]; + } mac; + struct { + __le16 vlan; + u8 addr[6]; + } mac_vlan; + u8 rsvd[54]; + }; +}; + +/** + * struct rx_filter_add_comp - Add LIF Rx filter command completion + * @status: The status of the command (enum status_code) + * @comp_index: The index in the descriptor ring for which this + * is the completion. + * @filter_id: Filter ID + * @color: Color bit. + */ +struct ionic_rx_filter_add_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + __le32 filter_id; + u8 rsvd2[7]; + u8 color; +}; + +/** + * struct rx_filter_del_cmd - Delete LIF Rx filter command + * @opcode: opcode + * @lif_index: LIF index + * @filter_id: Filter ID + */ +struct ionic_rx_filter_del_cmd { + u8 opcode; + u8 rsvd; + __le16 lif_index; + __le32 filter_id; + u8 rsvd2[56]; +}; + +typedef struct ionic_admin_comp ionic_rx_filter_del_comp; + +/** + * struct qos_identify_cmd - QoS identify command + * @opcode: opcode + * @ver: Highest version of identify supported by driver + * + */ +struct ionic_qos_identify_cmd { + u8 opcode; + u8 ver; + u8 rsvd[62]; +}; + +/** + * struct qos_identify_comp - QoS identify command completion + * @status: The status of the command (enum status_code) + * @ver: Version of identify returned by device + */ +struct ionic_qos_identify_comp { + u8 status; + u8 ver; + u8 rsvd[14]; +}; + +#define IONIC_QOS_CLASS_MAX 7 +#define IONIC_QOS_CLASS_NAME_SZ 32 +#define IONIC_QOS_DSCP_MAX_VALUES 64 + +/** + * enum qos_class + */ +enum ionic_qos_class { + IONIC_QOS_CLASS_DEFAULT = 0, + IONIC_QOS_CLASS_USER_DEFINED_1 = 1, + IONIC_QOS_CLASS_USER_DEFINED_2 = 2, + IONIC_QOS_CLASS_USER_DEFINED_3 = 3, + IONIC_QOS_CLASS_USER_DEFINED_4 = 4, + IONIC_QOS_CLASS_USER_DEFINED_5 = 5, + IONIC_QOS_CLASS_USER_DEFINED_6 = 6, +}; + +/** + * enum qos_class_type - Traffic classification criteria + */ +enum ionic_qos_class_type { + IONIC_QOS_CLASS_TYPE_NONE = 0, + IONIC_QOS_CLASS_TYPE_PCP = 1, /* Dot1Q pcp */ + IONIC_QOS_CLASS_TYPE_DSCP = 2, /* IP dscp */ +}; + +/** + * enum qos_sched_type - Qos class scheduling type + */ +enum ionic_qos_sched_type { + IONIC_QOS_SCHED_TYPE_STRICT = 0, /* Strict priority */ + IONIC_QOS_SCHED_TYPE_DWRR = 1, /* Deficit weighted round-robin */ +}; + +/** + * union qos_config - Qos configuration structure + * @flags: Configuration flags + * IONIC_QOS_CONFIG_F_ENABLE enable + * IONIC_QOS_CONFIG_F_DROP drop/nodrop + * IONIC_QOS_CONFIG_F_RW_DOT1Q_PCP enable dot1q pcp rewrite + * IONIC_QOS_CONFIG_F_RW_IP_DSCP enable ip dscp rewrite + * @sched_type: Qos class scheduling type (enum qos_sched_type) + * @class_type: Qos class type (enum qos_class_type) + * @pause_type: Qos pause type (enum qos_pause_type) + * @name: Qos class name + * @mtu: MTU of the class + * @pfc_dot1q_pcp: Pcp value for pause frames (valid iff F_NODROP) + * @dwrr_weight: Qos class scheduling weight + * @strict_rlmt: Rate limit for strict priority scheduling + * @rw_dot1q_pcp: Rewrite dot1q pcp to this value (valid iff F_RW_DOT1Q_PCP) + * @rw_ip_dscp: Rewrite ip dscp to this value (valid iff F_RW_IP_DSCP) + * @dot1q_pcp: Dot1q pcp value + * @ndscp: Number of valid dscp values in the ip_dscp field + * @ip_dscp: IP dscp values + */ +union ionic_qos_config { + struct { +#define IONIC_QOS_CONFIG_F_ENABLE BIT(0) +#define IONIC_QOS_CONFIG_F_DROP BIT(1) +#define IONIC_QOS_CONFIG_F_RW_DOT1Q_PCP BIT(2) +#define IONIC_QOS_CONFIG_F_RW_IP_DSCP BIT(3) + u8 flags; + u8 sched_type; + u8 class_type; + u8 pause_type; + char name[IONIC_QOS_CLASS_NAME_SZ]; + __le32 mtu; + /* flow control */ + u8 pfc_cos; + /* scheduler */ + union { + u8 dwrr_weight; + __le64 strict_rlmt; + }; + /* marking */ + union { + u8 rw_dot1q_pcp; + u8 rw_ip_dscp; + }; + /* classification */ + union { + u8 dot1q_pcp; + struct { + u8 ndscp; + u8 ip_dscp[IONIC_QOS_DSCP_MAX_VALUES]; + }; + }; + }; + __le32 words[64]; +}; + +/** + * union qos_identity - QoS identity structure + * @version: Version of the identify structure + * @type: QoS system type + * @nclasses: Number of usable QoS classes + * @config: Current configuration of classes + */ +union ionic_qos_identity { + struct { + u8 version; + u8 type; + u8 rsvd[62]; + union ionic_qos_config config[IONIC_QOS_CLASS_MAX]; + }; + __le32 words[512]; +}; + +/** + * struct qos_init_cmd - QoS config init command + * @opcode: Opcode + * @group: Qos class id + * @info_pa: destination address for qos info + */ +struct ionic_qos_init_cmd { + u8 opcode; + u8 group; + u8 rsvd[6]; + __le64 info_pa; + u8 rsvd1[48]; +}; + +typedef struct ionic_admin_comp ionic_qos_init_comp; + +/** + * struct qos_reset_cmd - Qos config reset command + * @opcode: Opcode + */ +struct ionic_qos_reset_cmd { + u8 opcode; + u8 group; + u8 rsvd[62]; +}; + +typedef struct ionic_admin_comp ionic_qos_reset_comp; + +/** + * struct fw_download_cmd - Firmware download command + * @opcode: opcode + * @addr: dma address of the firmware buffer + * @offset: offset of the firmware buffer within the full image + * @length: number of valid bytes in the firmware buffer + */ +struct ionic_fw_download_cmd { + u8 opcode; + u8 rsvd[3]; + __le32 offset; + __le64 addr; + __le32 length; +}; + +typedef struct ionic_admin_comp ionic_fw_download_comp; + +enum ionic_fw_control_oper { + IONIC_FW_RESET = 0, /* Reset firmware */ + IONIC_FW_INSTALL = 1, /* Install firmware */ + IONIC_FW_ACTIVATE = 2, /* Activate firmware */ +}; + +/** + * struct fw_control_cmd - Firmware control command + * @opcode: opcode + * @oper: firmware control operation (enum fw_control_oper) + * @slot: slot to activate + */ +struct ionic_fw_control_cmd { + u8 opcode; + u8 rsvd[3]; + u8 oper; + u8 slot; + u8 rsvd1[58]; +}; + +/** + * struct fw_control_comp - Firmware control copletion + * @opcode: opcode + * @slot: slot where the firmware was installed + */ +struct ionic_fw_control_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 slot; + u8 rsvd1[10]; + u8 color; +}; + +/****************************************************************** + ******************* RDMA Commands ******************************** + ******************************************************************/ + +/** + * struct rdma_reset_cmd - Reset RDMA LIF cmd + * @opcode: opcode + * @lif_index: lif index + * + * There is no rdma specific dev command completion struct. Completion uses + * the common struct admin_comp. Only the status is indicated. Nonzero status + * means the LIF does not support rdma. + **/ +struct ionic_rdma_reset_cmd { + u8 opcode; + u8 rsvd; + __le16 lif_index; + u8 rsvd2[60]; +}; + +/** + * struct rdma_queue_cmd - Create RDMA Queue command + * @opcode: opcode, 52, 53 + * @lif_index lif index + * @qid_ver: (qid | (rdma version << 24)) + * @cid: intr, eq_id, or cq_id + * @dbid: doorbell page id + * @depth_log2: log base two of queue depth + * @stride_log2: log base two of queue stride + * @dma_addr: address of the queue memory + * @xxx_table_index: temporary, but should not need pgtbl for contig. queues. + * + * The same command struct is used to create an rdma event queue, completion + * queue, or rdma admin queue. The cid is an interrupt number for an event + * queue, an event queue id for a completion queue, or a completion queue id + * for an rdma admin queue. + * + * The queue created via a dev command must be contiguous in dma space. + * + * The dev commands are intended only to be used during driver initialization, + * to create queues supporting the rdma admin queue. Other queues, and other + * types of rdma resources like memory regions, will be created and registered + * via the rdma admin queue, and will support a more complete interface + * providing scatter gather lists for larger, scattered queue buffers and + * memory registration. + * + * There is no rdma specific dev command completion struct. Completion uses + * the common struct admin_comp. Only the status is indicated. + **/ +struct ionic_rdma_queue_cmd { + u8 opcode; + u8 rsvd; + __le16 lif_index; + __le32 qid_ver; + __le32 cid; + __le16 dbid; + u8 depth_log2; + u8 stride_log2; + __le64 dma_addr; + u8 rsvd2[36]; + __le32 xxx_table_index; +}; + +/****************************************************************** + ******************* Notify Events ******************************** + ******************************************************************/ + +/** + * struct notifyq_event + * @eid: event number + * @ecode: event code + * @data: unspecified data about the event + * + * This is the generic event report struct from which the other + * actual events will be formed. + */ +struct ionic_notifyq_event { + __le64 eid; + __le16 ecode; + u8 data[54]; +}; + +/** + * struct link_change_event + * @eid: event number + * @ecode: event code = EVENT_OPCODE_LINK_CHANGE + * @link_status: link up or down, with error bits (enum port_status) + * @link_speed: speed of the network link + * + * Sent when the network link state changes between UP and DOWN + */ +struct ionic_link_change_event { + __le64 eid; + __le16 ecode; + __le16 link_status; + __le32 link_speed; /* units of 1Mbps: e.g. 10000 = 10Gbps */ + u8 rsvd[48]; +}; + +/** + * struct reset_event + * @eid: event number + * @ecode: event code = EVENT_OPCODE_RESET + * @reset_code: reset type + * @state: 0=pending, 1=complete, 2=error + * + * Sent when the NIC or some subsystem is going to be or + * has been reset. + */ +struct ionic_reset_event { + __le64 eid; + __le16 ecode; + u8 reset_code; + u8 state; + u8 rsvd[52]; +}; + +/** + * struct heartbeat_event + * @eid: event number + * @ecode: event code = EVENT_OPCODE_HEARTBEAT + * + * Sent periodically by the NIC to indicate continued health + */ +struct ionic_heartbeat_event { + __le64 eid; + __le16 ecode; + u8 rsvd[54]; +}; + +/** + * struct log_event + * @eid: event number + * @ecode: event code = EVENT_OPCODE_LOG + * @data: log data + * + * Sent to notify the driver of an internal error. + */ +struct ionic_log_event { + __le64 eid; + __le16 ecode; + u8 data[54]; +}; + +/** + * struct port_stats + */ +struct ionic_port_stats { + __le64 frames_rx_ok; + __le64 frames_rx_all; + __le64 frames_rx_bad_fcs; + __le64 frames_rx_bad_all; + __le64 octets_rx_ok; + __le64 octets_rx_all; + __le64 frames_rx_unicast; + __le64 frames_rx_multicast; + __le64 frames_rx_broadcast; + __le64 frames_rx_pause; + __le64 frames_rx_bad_length; + __le64 frames_rx_undersized; + __le64 frames_rx_oversized; + __le64 frames_rx_fragments; + __le64 frames_rx_jabber; + __le64 frames_rx_pripause; + __le64 frames_rx_stomped_crc; + __le64 frames_rx_too_long; + __le64 frames_rx_vlan_good; + __le64 frames_rx_dropped; + __le64 frames_rx_less_than_64b; + __le64 frames_rx_64b; + __le64 frames_rx_65b_127b; + __le64 frames_rx_128b_255b; + __le64 frames_rx_256b_511b; + __le64 frames_rx_512b_1023b; + __le64 frames_rx_1024b_1518b; + __le64 frames_rx_1519b_2047b; + __le64 frames_rx_2048b_4095b; + __le64 frames_rx_4096b_8191b; + __le64 frames_rx_8192b_9215b; + __le64 frames_rx_other; + __le64 frames_tx_ok; + __le64 frames_tx_all; + __le64 frames_tx_bad; + __le64 octets_tx_ok; + __le64 octets_tx_total; + __le64 frames_tx_unicast; + __le64 frames_tx_multicast; + __le64 frames_tx_broadcast; + __le64 frames_tx_pause; + __le64 frames_tx_pripause; + __le64 frames_tx_vlan; + __le64 frames_tx_less_than_64b; + __le64 frames_tx_64b; + __le64 frames_tx_65b_127b; + __le64 frames_tx_128b_255b; + __le64 frames_tx_256b_511b; + __le64 frames_tx_512b_1023b; + __le64 frames_tx_1024b_1518b; + __le64 frames_tx_1519b_2047b; + __le64 frames_tx_2048b_4095b; + __le64 frames_tx_4096b_8191b; + __le64 frames_tx_8192b_9215b; + __le64 frames_tx_other; + __le64 frames_tx_pri_0; + __le64 frames_tx_pri_1; + __le64 frames_tx_pri_2; + __le64 frames_tx_pri_3; + __le64 frames_tx_pri_4; + __le64 frames_tx_pri_5; + __le64 frames_tx_pri_6; + __le64 frames_tx_pri_7; + __le64 frames_rx_pri_0; + __le64 frames_rx_pri_1; + __le64 frames_rx_pri_2; + __le64 frames_rx_pri_3; + __le64 frames_rx_pri_4; + __le64 frames_rx_pri_5; + __le64 frames_rx_pri_6; + __le64 frames_rx_pri_7; + __le64 tx_pripause_0_1us_count; + __le64 tx_pripause_1_1us_count; + __le64 tx_pripause_2_1us_count; + __le64 tx_pripause_3_1us_count; + __le64 tx_pripause_4_1us_count; + __le64 tx_pripause_5_1us_count; + __le64 tx_pripause_6_1us_count; + __le64 tx_pripause_7_1us_count; + __le64 rx_pripause_0_1us_count; + __le64 rx_pripause_1_1us_count; + __le64 rx_pripause_2_1us_count; + __le64 rx_pripause_3_1us_count; + __le64 rx_pripause_4_1us_count; + __le64 rx_pripause_5_1us_count; + __le64 rx_pripause_6_1us_count; + __le64 rx_pripause_7_1us_count; + __le64 rx_pause_1us_count; + __le64 frames_tx_truncated; +}; + +struct ionic_mgmt_port_stats { + __le64 frames_rx_ok; + __le64 frames_rx_all; + __le64 frames_rx_bad_fcs; + __le64 frames_rx_bad_all; + __le64 octets_rx_ok; + __le64 octets_rx_all; + __le64 frames_rx_unicast; + __le64 frames_rx_multicast; + __le64 frames_rx_broadcast; + __le64 frames_rx_pause; + __le64 frames_rx_bad_length0; + __le64 frames_rx_undersized1; + __le64 frames_rx_oversized2; + __le64 frames_rx_fragments3; + __le64 frames_rx_jabber4; + __le64 frames_rx_64b5; + __le64 frames_rx_65b_127b6; + __le64 frames_rx_128b_255b7; + __le64 frames_rx_256b_511b8; + __le64 frames_rx_512b_1023b9; + __le64 frames_rx_1024b_1518b0; + __le64 frames_rx_gt_1518b1; + __le64 frames_rx_fifo_full2; + __le64 frames_tx_ok3; + __le64 frames_tx_all4; + __le64 frames_tx_bad5; + __le64 octets_tx_ok6; + __le64 octets_tx_total7; + __le64 frames_tx_unicast8; + __le64 frames_tx_multicast9; + __le64 frames_tx_broadcast0; + __le64 frames_tx_pause1; +}; + +/** + * struct port_identity - port identity structure + * @version: identity structure version + * @type: type of port (enum port_type) + * @num_lanes: number of lanes for the port + * @autoneg: autoneg supported + * @min_frame_size: minimum frame size supported + * @max_frame_size: maximum frame size supported + * @fec_type: supported fec types + * @pause_type: supported pause types + * @loopback_mode: supported loopback mode + * @speeds: supported speeds + * @config: current port configuration + */ +union ionic_port_identity { + struct { + u8 version; + u8 type; + u8 num_lanes; + u8 autoneg; + __le32 min_frame_size; + __le32 max_frame_size; + u8 fec_type[4]; + u8 pause_type[2]; + u8 loopback_mode[2]; + __le32 speeds[16]; + u8 rsvd2[44]; + union ionic_port_config config; + }; + __le32 words[512]; +}; + +/** + * struct port_info - port info structure + * @port_status: port status + * @port_stats: port stats + */ +struct ionic_port_info { + union ionic_port_config config; + struct ionic_port_status status; + struct ionic_port_stats stats; +}; + +/** + * struct lif_stats + */ +struct ionic_lif_stats { + /* RX */ + __le64 rx_ucast_bytes; + __le64 rx_ucast_packets; + __le64 rx_mcast_bytes; + __le64 rx_mcast_packets; + __le64 rx_bcast_bytes; + __le64 rx_bcast_packets; + __le64 rsvd0; + __le64 rsvd1; + /* RX drops */ + __le64 rx_ucast_drop_bytes; + __le64 rx_ucast_drop_packets; + __le64 rx_mcast_drop_bytes; + __le64 rx_mcast_drop_packets; + __le64 rx_bcast_drop_bytes; + __le64 rx_bcast_drop_packets; + __le64 rx_dma_error; + __le64 rsvd2; + /* TX */ + __le64 tx_ucast_bytes; + __le64 tx_ucast_packets; + __le64 tx_mcast_bytes; + __le64 tx_mcast_packets; + __le64 tx_bcast_bytes; + __le64 tx_bcast_packets; + __le64 rsvd3; + __le64 rsvd4; + /* TX drops */ + __le64 tx_ucast_drop_bytes; + __le64 tx_ucast_drop_packets; + __le64 tx_mcast_drop_bytes; + __le64 tx_mcast_drop_packets; + __le64 tx_bcast_drop_bytes; + __le64 tx_bcast_drop_packets; + __le64 tx_dma_error; + __le64 rsvd5; + /* Rx Queue/Ring drops */ + __le64 rx_queue_disabled; + __le64 rx_queue_empty; + __le64 rx_queue_error; + __le64 rx_desc_fetch_error; + __le64 rx_desc_data_error; + __le64 rsvd6; + __le64 rsvd7; + __le64 rsvd8; + /* Tx Queue/Ring drops */ + __le64 tx_queue_disabled; + __le64 tx_queue_error; + __le64 tx_desc_fetch_error; + __le64 tx_desc_data_error; + __le64 rsvd9; + __le64 rsvd10; + __le64 rsvd11; + __le64 rsvd12; + + /* RDMA/ROCE TX */ + __le64 tx_rdma_ucast_bytes; + __le64 tx_rdma_ucast_packets; + __le64 tx_rdma_mcast_bytes; + __le64 tx_rdma_mcast_packets; + __le64 tx_rdma_cnp_packets; + __le64 rsvd13; + __le64 rsvd14; + __le64 rsvd15; + + /* RDMA/ROCE RX */ + __le64 rx_rdma_ucast_bytes; + __le64 rx_rdma_ucast_packets; + __le64 rx_rdma_mcast_bytes; + __le64 rx_rdma_mcast_packets; + __le64 rx_rdma_cnp_packets; + __le64 rx_rdma_ecn_packets; + __le64 rsvd16; + __le64 rsvd17; + + __le64 rsvd18; + __le64 rsvd19; + __le64 rsvd20; + __le64 rsvd21; + __le64 rsvd22; + __le64 rsvd23; + __le64 rsvd24; + __le64 rsvd25; + + __le64 rsvd26; + __le64 rsvd27; + __le64 rsvd28; + __le64 rsvd29; + __le64 rsvd30; + __le64 rsvd31; + __le64 rsvd32; + __le64 rsvd33; + + __le64 rsvd34; + __le64 rsvd35; + __le64 rsvd36; + __le64 rsvd37; + __le64 rsvd38; + __le64 rsvd39; + __le64 rsvd40; + __le64 rsvd41; + + __le64 rsvd42; + __le64 rsvd43; + __le64 rsvd44; + __le64 rsvd45; + __le64 rsvd46; + __le64 rsvd47; + __le64 rsvd48; + __le64 rsvd49; + + /* RDMA/ROCE REQ Error/Debugs (768 - 895) */ + __le64 rdma_req_rx_pkt_seq_err; + __le64 rdma_req_rx_rnr_retry_err; + __le64 rdma_req_rx_remote_access_err; + __le64 rdma_req_rx_remote_inv_req_err; + __le64 rdma_req_rx_remote_oper_err; + __le64 rdma_req_rx_implied_nak_seq_err; + __le64 rdma_req_rx_cqe_err; + __le64 rdma_req_rx_cqe_flush_err; + + __le64 rdma_req_rx_dup_responses; + __le64 rdma_req_rx_invalid_packets; + __le64 rdma_req_tx_local_access_err; + __le64 rdma_req_tx_local_oper_err; + __le64 rdma_req_tx_memory_mgmt_err; + __le64 rsvd52; + __le64 rsvd53; + __le64 rsvd54; + + /* RDMA/ROCE RESP Error/Debugs (896 - 1023) */ + __le64 rdma_resp_rx_dup_requests; + __le64 rdma_resp_rx_out_of_buffer; + __le64 rdma_resp_rx_out_of_seq_pkts; + __le64 rdma_resp_rx_cqe_err; + __le64 rdma_resp_rx_cqe_flush_err; + __le64 rdma_resp_rx_local_len_err; + __le64 rdma_resp_rx_inv_request_err; + __le64 rdma_resp_rx_local_qp_oper_err; + + __le64 rdma_resp_rx_out_of_atomic_resource; + __le64 rdma_resp_tx_pkt_seq_err; + __le64 rdma_resp_tx_remote_inv_req_err; + __le64 rdma_resp_tx_remote_access_err; + __le64 rdma_resp_tx_remote_oper_err; + __le64 rdma_resp_tx_rnr_retry_err; + __le64 rsvd57; + __le64 rsvd58; +}; + +/** + * struct lif_info - lif info structure + */ +struct ionic_lif_info { + union ionic_lif_config config; + struct ionic_lif_status status; + struct ionic_lif_stats stats; +}; + +union ionic_dev_cmd { + u32 words[16]; + struct ionic_admin_cmd cmd; + struct ionic_nop_cmd nop; + + struct ionic_dev_identify_cmd identify; + struct ionic_dev_init_cmd init; + struct ionic_dev_reset_cmd reset; + struct ionic_dev_getattr_cmd getattr; + struct ionic_dev_setattr_cmd setattr; + + struct ionic_port_identify_cmd port_identify; + struct ionic_port_init_cmd port_init; + struct ionic_port_reset_cmd port_reset; + struct ionic_port_getattr_cmd port_getattr; + struct ionic_port_setattr_cmd port_setattr; + + struct ionic_lif_identify_cmd lif_identify; + struct ionic_lif_init_cmd lif_init; + struct ionic_lif_reset_cmd lif_reset; + + struct ionic_qos_identify_cmd qos_identify; + struct ionic_qos_init_cmd qos_init; + struct ionic_qos_reset_cmd qos_reset; + + struct ionic_q_init_cmd q_init; +}; + +union ionic_dev_cmd_comp { + u32 words[4]; + u8 status; + struct ionic_admin_comp comp; + struct ionic_nop_comp nop; + + struct ionic_dev_identify_comp identify; + struct ionic_dev_init_comp init; + struct ionic_dev_reset_comp reset; + struct ionic_dev_getattr_comp getattr; + struct ionic_dev_setattr_comp setattr; + + struct ionic_port_identify_comp port_identify; + struct ionic_port_init_comp port_init; + struct ionic_port_reset_comp port_reset; + struct ionic_port_getattr_comp port_getattr; + struct ionic_port_setattr_comp port_setattr; + + struct ionic_lif_identify_comp lif_identify; + struct ionic_lif_init_comp lif_init; + ionic_lif_reset_comp lif_reset; + + struct ionic_qos_identify_comp qos_identify; + ionic_qos_init_comp qos_init; + ionic_qos_reset_comp qos_reset; + + struct ionic_q_init_comp q_init; +}; + +/** + * union dev_info - Device info register format (read-only) + * @signature: Signature value of 0x44455649 ('DEVI'). + * @version: Current version of info. + * @asic_type: Asic type. + * @asic_rev: Asic revision. + * @fw_status: Firmware status. + * @fw_heartbeat: Firmware heartbeat counter. + * @serial_num: Serial number. + * @fw_version: Firmware version. + */ +union ionic_dev_info_regs { +#define IONIC_DEVINFO_FWVERS_BUFLEN 32 +#define IONIC_DEVINFO_SERIAL_BUFLEN 32 + struct { + u32 signature; + u8 version; + u8 asic_type; + u8 asic_rev; + u8 fw_status; + u32 fw_heartbeat; + char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN]; + char serial_num[IONIC_DEVINFO_SERIAL_BUFLEN]; + }; + u32 words[512]; +}; + +/** + * union dev_cmd_regs - Device command register format (read-write) + * @doorbell: Device Cmd Doorbell, write-only. + * Write a 1 to signal device to process cmd, + * poll done for completion. + * @done: Done indicator, bit 0 == 1 when command is complete. + * @cmd: Opcode-specific command bytes + * @comp: Opcode-specific response bytes + * @data: Opcode-specific side-data + */ +union ionic_dev_cmd_regs { + struct { + u32 doorbell; + u32 done; + union ionic_dev_cmd cmd; + union ionic_dev_cmd_comp comp; + u8 rsvd[48]; + u32 data[478]; + }; + u32 words[512]; +}; + +/** + * union dev_regs - Device register format in for bar 0 page 0 + * @info: Device info registers + * @devcmd: Device command registers + */ +union ionic_dev_regs { + struct { + union ionic_dev_info_regs info; + union ionic_dev_cmd_regs devcmd; + }; + __le32 words[1024]; +}; + +union ionic_adminq_cmd { + struct ionic_admin_cmd cmd; + struct ionic_nop_cmd nop; + struct ionic_q_init_cmd q_init; + struct ionic_q_control_cmd q_control; + struct ionic_lif_setattr_cmd lif_setattr; + struct ionic_lif_getattr_cmd lif_getattr; + struct ionic_rx_mode_set_cmd rx_mode_set; + struct ionic_rx_filter_add_cmd rx_filter_add; + struct ionic_rx_filter_del_cmd rx_filter_del; + struct ionic_rdma_reset_cmd rdma_reset; + struct ionic_rdma_queue_cmd rdma_queue; + struct ionic_fw_download_cmd fw_download; + struct ionic_fw_control_cmd fw_control; +}; + +union ionic_adminq_comp { + struct ionic_admin_comp comp; + struct ionic_nop_comp nop; + struct ionic_q_init_comp q_init; + struct ionic_lif_setattr_comp lif_setattr; + struct ionic_lif_getattr_comp lif_getattr; + struct ionic_rx_filter_add_comp rx_filter_add; + struct ionic_fw_control_comp fw_control; +}; + +#define IONIC_BARS_MAX 6 +#define IONIC_PCI_BAR_DBELL 1 + +/* BAR0 */ +#define IONIC_BAR0_SIZE 0x8000 + +#define IONIC_BAR0_DEV_INFO_REGS_OFFSET 0x0000 +#define IONIC_BAR0_DEV_CMD_REGS_OFFSET 0x0800 +#define IONIC_BAR0_DEV_CMD_DATA_REGS_OFFSET 0x0c00 +#define IONIC_BAR0_INTR_STATUS_OFFSET 0x1000 +#define IONIC_BAR0_INTR_CTRL_OFFSET 0x2000 +#define IONIC_DEV_CMD_DONE 0x00000001 + +#define IONIC_ASIC_TYPE_CAPRI 0 + +/** + * struct doorbell - Doorbell register layout + * @p_index: Producer index + * @ring: Selects the specific ring of the queue to update. + * Type-specific meaning: + * ring=0: Default producer/consumer queue. + * ring=1: (CQ, EQ) Re-Arm queue. RDMA CQs + * send events to EQs when armed. EQs send + * interrupts when armed. + * @qid: The queue id selects the queue destination for the + * producer index and flags. + */ +struct ionic_doorbell { + __le16 p_index; + u8 ring; + u8 qid_lo; + __le16 qid_hi; + u16 rsvd2; +}; + +struct ionic_intr_status { + u32 status[2]; +}; + +struct ionic_notifyq_cmd { + __le32 data; /* Not used but needed for qcq structure */ +}; + +union ionic_notifyq_comp { + struct ionic_notifyq_event event; + struct ionic_link_change_event link_change; + struct ionic_reset_event reset; + struct ionic_heartbeat_event heartbeat; + struct ionic_log_event log; +}; + +/* Deprecate */ +struct ionic_identity { + union ionic_drv_identity drv; + union ionic_dev_identity dev; + union ionic_lif_identity lif; + union ionic_port_identity port; + union ionic_qos_identity qos; +}; + +#pragma pack(pop) + +#endif /* _IONIC_IF_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c new file mode 100644 index 000000000000..db7c82742828 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -0,0 +1,2274 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/cpumask.h> + +#include "ionic.h" +#include "ionic_bus.h" +#include "ionic_lif.h" +#include "ionic_txrx.h" +#include "ionic_ethtool.h" +#include "ionic_debugfs.h" + +static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode); +static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr); +static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr); +static void ionic_link_status_check(struct ionic_lif *lif); + +static void ionic_lif_deferred_work(struct work_struct *work) +{ + struct ionic_lif *lif = container_of(work, struct ionic_lif, deferred.work); + struct ionic_deferred *def = &lif->deferred; + struct ionic_deferred_work *w = NULL; + + spin_lock_bh(&def->lock); + if (!list_empty(&def->list)) { + w = list_first_entry(&def->list, + struct ionic_deferred_work, list); + list_del(&w->list); + } + spin_unlock_bh(&def->lock); + + if (w) { + switch (w->type) { + case IONIC_DW_TYPE_RX_MODE: + ionic_lif_rx_mode(lif, w->rx_mode); + break; + case IONIC_DW_TYPE_RX_ADDR_ADD: + ionic_lif_addr_add(lif, w->addr); + break; + case IONIC_DW_TYPE_RX_ADDR_DEL: + ionic_lif_addr_del(lif, w->addr); + break; + case IONIC_DW_TYPE_LINK_STATUS: + ionic_link_status_check(lif); + break; + default: + break; + } + kfree(w); + schedule_work(&def->work); + } +} + +static void ionic_lif_deferred_enqueue(struct ionic_deferred *def, + struct ionic_deferred_work *work) +{ + spin_lock_bh(&def->lock); + list_add_tail(&work->list, &def->list); + spin_unlock_bh(&def->lock); + schedule_work(&def->work); +} + +static void ionic_link_status_check(struct ionic_lif *lif) +{ + struct net_device *netdev = lif->netdev; + u16 link_status; + bool link_up; + + link_status = le16_to_cpu(lif->info->status.link_status); + link_up = link_status == IONIC_PORT_OPER_STATUS_UP; + + /* filter out the no-change cases */ + if (link_up == netif_carrier_ok(netdev)) + goto link_out; + + if (link_up) { + netdev_info(netdev, "Link up - %d Gbps\n", + le32_to_cpu(lif->info->status.link_speed) / 1000); + + if (test_bit(IONIC_LIF_UP, lif->state)) { + netif_tx_wake_all_queues(lif->netdev); + netif_carrier_on(netdev); + } + } else { + netdev_info(netdev, "Link down\n"); + + /* carrier off first to avoid watchdog timeout */ + netif_carrier_off(netdev); + if (test_bit(IONIC_LIF_UP, lif->state)) + netif_tx_stop_all_queues(netdev); + } + +link_out: + clear_bit(IONIC_LIF_LINK_CHECK_REQUESTED, lif->state); +} + +static void ionic_link_status_check_request(struct ionic_lif *lif) +{ + struct ionic_deferred_work *work; + + /* we only need one request outstanding at a time */ + if (test_and_set_bit(IONIC_LIF_LINK_CHECK_REQUESTED, lif->state)) + return; + + if (in_interrupt()) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + work->type = IONIC_DW_TYPE_LINK_STATUS; + ionic_lif_deferred_enqueue(&lif->deferred, work); + } else { + ionic_link_status_check(lif); + } +} + +static irqreturn_t ionic_isr(int irq, void *data) +{ + struct napi_struct *napi = data; + + napi_schedule_irqoff(napi); + + return IRQ_HANDLED; +} + +static int ionic_request_irq(struct ionic_lif *lif, struct ionic_qcq *qcq) +{ + struct ionic_intr_info *intr = &qcq->intr; + struct device *dev = lif->ionic->dev; + struct ionic_queue *q = &qcq->q; + const char *name; + + if (lif->registered) + name = lif->netdev->name; + else + name = dev_name(dev); + + snprintf(intr->name, sizeof(intr->name), + "%s-%s-%s", IONIC_DRV_NAME, name, q->name); + + return devm_request_irq(dev, intr->vector, ionic_isr, + 0, intr->name, &qcq->napi); +} + +static int ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr) +{ + struct ionic *ionic = lif->ionic; + int index; + + index = find_first_zero_bit(ionic->intrs, ionic->nintrs); + if (index == ionic->nintrs) { + netdev_warn(lif->netdev, "%s: no intr, index=%d nintrs=%d\n", + __func__, index, ionic->nintrs); + return -ENOSPC; + } + + set_bit(index, ionic->intrs); + ionic_intr_init(&ionic->idev, intr, index); + + return 0; +} + +static void ionic_intr_free(struct ionic_lif *lif, int index) +{ + if (index != INTR_INDEX_NOT_ASSIGNED && index < lif->ionic->nintrs) + clear_bit(index, lif->ionic->intrs); +} + +static int ionic_qcq_enable(struct ionic_qcq *qcq) +{ + struct ionic_queue *q = &qcq->q; + struct ionic_lif *lif = q->lif; + struct ionic_dev *idev; + struct device *dev; + + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.q_control = { + .opcode = IONIC_CMD_Q_CONTROL, + .lif_index = cpu_to_le16(lif->index), + .type = q->type, + .index = cpu_to_le32(q->index), + .oper = IONIC_Q_ENABLE, + }, + }; + + idev = &lif->ionic->idev; + dev = lif->ionic->dev; + + dev_dbg(dev, "q_enable.index %d q_enable.qtype %d\n", + ctx.cmd.q_control.index, ctx.cmd.q_control.type); + + if (qcq->flags & IONIC_QCQ_F_INTR) { + irq_set_affinity_hint(qcq->intr.vector, + &qcq->intr.affinity_mask); + napi_enable(&qcq->napi); + ionic_intr_clean(idev->intr_ctrl, qcq->intr.index); + ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, + IONIC_INTR_MASK_CLEAR); + } + + return ionic_adminq_post_wait(lif, &ctx); +} + +static int ionic_qcq_disable(struct ionic_qcq *qcq) +{ + struct ionic_queue *q = &qcq->q; + struct ionic_lif *lif = q->lif; + struct ionic_dev *idev; + struct device *dev; + + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.q_control = { + .opcode = IONIC_CMD_Q_CONTROL, + .lif_index = cpu_to_le16(lif->index), + .type = q->type, + .index = cpu_to_le32(q->index), + .oper = IONIC_Q_DISABLE, + }, + }; + + idev = &lif->ionic->idev; + dev = lif->ionic->dev; + + dev_dbg(dev, "q_disable.index %d q_disable.qtype %d\n", + ctx.cmd.q_control.index, ctx.cmd.q_control.type); + + if (qcq->flags & IONIC_QCQ_F_INTR) { + ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, + IONIC_INTR_MASK_SET); + synchronize_irq(qcq->intr.vector); + irq_set_affinity_hint(qcq->intr.vector, NULL); + napi_disable(&qcq->napi); + } + + return ionic_adminq_post_wait(lif, &ctx); +} + +static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq) +{ + struct ionic_dev *idev = &lif->ionic->idev; + struct device *dev = lif->ionic->dev; + + if (!qcq) + return; + + ionic_debugfs_del_qcq(qcq); + + if (!(qcq->flags & IONIC_QCQ_F_INITED)) + return; + + if (qcq->flags & IONIC_QCQ_F_INTR) { + ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, + IONIC_INTR_MASK_SET); + devm_free_irq(dev, qcq->intr.vector, &qcq->napi); + netif_napi_del(&qcq->napi); + } + + qcq->flags &= ~IONIC_QCQ_F_INITED; +} + +static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq) +{ + struct device *dev = lif->ionic->dev; + + if (!qcq) + return; + + dma_free_coherent(dev, qcq->total_size, qcq->base, qcq->base_pa); + qcq->base = NULL; + qcq->base_pa = 0; + + if (qcq->flags & IONIC_QCQ_F_INTR) + ionic_intr_free(lif, qcq->intr.index); + + devm_kfree(dev, qcq->cq.info); + qcq->cq.info = NULL; + devm_kfree(dev, qcq->q.info); + qcq->q.info = NULL; + devm_kfree(dev, qcq); +} + +static void ionic_qcqs_free(struct ionic_lif *lif) +{ + struct device *dev = lif->ionic->dev; + unsigned int i; + + if (lif->notifyqcq) { + ionic_qcq_free(lif, lif->notifyqcq); + lif->notifyqcq = NULL; + } + + if (lif->adminqcq) { + ionic_qcq_free(lif, lif->adminqcq); + lif->adminqcq = NULL; + } + + for (i = 0; i < lif->nxqs; i++) + if (lif->rxqcqs[i].stats) + devm_kfree(dev, lif->rxqcqs[i].stats); + + devm_kfree(dev, lif->rxqcqs); + lif->rxqcqs = NULL; + + for (i = 0; i < lif->nxqs; i++) + if (lif->txqcqs[i].stats) + devm_kfree(dev, lif->txqcqs[i].stats); + + devm_kfree(dev, lif->txqcqs); + lif->txqcqs = NULL; +} + +static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq, + struct ionic_qcq *n_qcq) +{ + if (WARN_ON(n_qcq->flags & IONIC_QCQ_F_INTR)) { + ionic_intr_free(n_qcq->cq.lif, n_qcq->intr.index); + n_qcq->flags &= ~IONIC_QCQ_F_INTR; + } + + n_qcq->intr.vector = src_qcq->intr.vector; + n_qcq->intr.index = src_qcq->intr.index; +} + +static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, + unsigned int index, + const char *name, unsigned int flags, + unsigned int num_descs, unsigned int desc_size, + unsigned int cq_desc_size, + unsigned int sg_desc_size, + unsigned int pid, struct ionic_qcq **qcq) +{ + struct ionic_dev *idev = &lif->ionic->idev; + u32 q_size, cq_size, sg_size, total_size; + struct device *dev = lif->ionic->dev; + void *q_base, *cq_base, *sg_base; + dma_addr_t cq_base_pa = 0; + dma_addr_t sg_base_pa = 0; + dma_addr_t q_base_pa = 0; + struct ionic_qcq *new; + int err; + + *qcq = NULL; + + q_size = num_descs * desc_size; + cq_size = num_descs * cq_desc_size; + sg_size = num_descs * sg_desc_size; + + total_size = ALIGN(q_size, PAGE_SIZE) + ALIGN(cq_size, PAGE_SIZE); + /* Note: aligning q_size/cq_size is not enough due to cq_base + * address aligning as q_base could be not aligned to the page. + * Adding PAGE_SIZE. + */ + total_size += PAGE_SIZE; + if (flags & IONIC_QCQ_F_SG) { + total_size += ALIGN(sg_size, PAGE_SIZE); + total_size += PAGE_SIZE; + } + + new = devm_kzalloc(dev, sizeof(*new), GFP_KERNEL); + if (!new) { + netdev_err(lif->netdev, "Cannot allocate queue structure\n"); + err = -ENOMEM; + goto err_out; + } + + new->flags = flags; + + new->q.info = devm_kzalloc(dev, sizeof(*new->q.info) * num_descs, + GFP_KERNEL); + if (!new->q.info) { + netdev_err(lif->netdev, "Cannot allocate queue info\n"); + err = -ENOMEM; + goto err_out; + } + + new->q.type = type; + + err = ionic_q_init(lif, idev, &new->q, index, name, num_descs, + desc_size, sg_desc_size, pid); + if (err) { + netdev_err(lif->netdev, "Cannot initialize queue\n"); + goto err_out; + } + + if (flags & IONIC_QCQ_F_INTR) { + err = ionic_intr_alloc(lif, &new->intr); + if (err) { + netdev_warn(lif->netdev, "no intr for %s: %d\n", + name, err); + goto err_out; + } + + err = ionic_bus_get_irq(lif->ionic, new->intr.index); + if (err < 0) { + netdev_warn(lif->netdev, "no vector for %s: %d\n", + name, err); + goto err_out_free_intr; + } + new->intr.vector = err; + ionic_intr_mask_assert(idev->intr_ctrl, new->intr.index, + IONIC_INTR_MASK_SET); + + new->intr.cpu = new->intr.index % num_online_cpus(); + if (cpu_online(new->intr.cpu)) + cpumask_set_cpu(new->intr.cpu, + &new->intr.affinity_mask); + } else { + new->intr.index = INTR_INDEX_NOT_ASSIGNED; + } + + new->cq.info = devm_kzalloc(dev, sizeof(*new->cq.info) * num_descs, + GFP_KERNEL); + if (!new->cq.info) { + netdev_err(lif->netdev, "Cannot allocate completion queue info\n"); + err = -ENOMEM; + goto err_out_free_intr; + } + + err = ionic_cq_init(lif, &new->cq, &new->intr, num_descs, cq_desc_size); + if (err) { + netdev_err(lif->netdev, "Cannot initialize completion queue\n"); + goto err_out_free_intr; + } + + new->base = dma_alloc_coherent(dev, total_size, &new->base_pa, + GFP_KERNEL); + if (!new->base) { + netdev_err(lif->netdev, "Cannot allocate queue DMA memory\n"); + err = -ENOMEM; + goto err_out_free_intr; + } + + new->total_size = total_size; + + q_base = new->base; + q_base_pa = new->base_pa; + + cq_base = (void *)ALIGN((uintptr_t)q_base + q_size, PAGE_SIZE); + cq_base_pa = ALIGN(q_base_pa + q_size, PAGE_SIZE); + + if (flags & IONIC_QCQ_F_SG) { + sg_base = (void *)ALIGN((uintptr_t)cq_base + cq_size, + PAGE_SIZE); + sg_base_pa = ALIGN(cq_base_pa + cq_size, PAGE_SIZE); + ionic_q_sg_map(&new->q, sg_base, sg_base_pa); + } + + ionic_q_map(&new->q, q_base, q_base_pa); + ionic_cq_map(&new->cq, cq_base, cq_base_pa); + ionic_cq_bind(&new->cq, &new->q); + + *qcq = new; + + return 0; + +err_out_free_intr: + ionic_intr_free(lif, new->intr.index); +err_out: + dev_err(dev, "qcq alloc of %s%d failed %d\n", name, index, err); + return err; +} + +static int ionic_qcqs_alloc(struct ionic_lif *lif) +{ + struct device *dev = lif->ionic->dev; + unsigned int q_list_size; + unsigned int flags; + int err; + int i; + + flags = IONIC_QCQ_F_INTR; + err = ionic_qcq_alloc(lif, IONIC_QTYPE_ADMINQ, 0, "admin", flags, + IONIC_ADMINQ_LENGTH, + sizeof(struct ionic_admin_cmd), + sizeof(struct ionic_admin_comp), + 0, lif->kern_pid, &lif->adminqcq); + if (err) + return err; + + if (lif->ionic->nnqs_per_lif) { + flags = IONIC_QCQ_F_NOTIFYQ; + err = ionic_qcq_alloc(lif, IONIC_QTYPE_NOTIFYQ, 0, "notifyq", + flags, IONIC_NOTIFYQ_LENGTH, + sizeof(struct ionic_notifyq_cmd), + sizeof(union ionic_notifyq_comp), + 0, lif->kern_pid, &lif->notifyqcq); + if (err) + goto err_out_free_adminqcq; + + /* Let the notifyq ride on the adminq interrupt */ + ionic_link_qcq_interrupts(lif->adminqcq, lif->notifyqcq); + } + + q_list_size = sizeof(*lif->txqcqs) * lif->nxqs; + err = -ENOMEM; + lif->txqcqs = devm_kzalloc(dev, q_list_size, GFP_KERNEL); + if (!lif->txqcqs) + goto err_out_free_notifyqcq; + for (i = 0; i < lif->nxqs; i++) { + lif->txqcqs[i].stats = devm_kzalloc(dev, + sizeof(struct ionic_q_stats), + GFP_KERNEL); + if (!lif->txqcqs[i].stats) + goto err_out_free_tx_stats; + } + + lif->rxqcqs = devm_kzalloc(dev, q_list_size, GFP_KERNEL); + if (!lif->rxqcqs) + goto err_out_free_tx_stats; + for (i = 0; i < lif->nxqs; i++) { + lif->rxqcqs[i].stats = devm_kzalloc(dev, + sizeof(struct ionic_q_stats), + GFP_KERNEL); + if (!lif->rxqcqs[i].stats) + goto err_out_free_rx_stats; + } + + return 0; + +err_out_free_rx_stats: + for (i = 0; i < lif->nxqs; i++) + if (lif->rxqcqs[i].stats) + devm_kfree(dev, lif->rxqcqs[i].stats); + devm_kfree(dev, lif->rxqcqs); + lif->rxqcqs = NULL; +err_out_free_tx_stats: + for (i = 0; i < lif->nxqs; i++) + if (lif->txqcqs[i].stats) + devm_kfree(dev, lif->txqcqs[i].stats); + devm_kfree(dev, lif->txqcqs); + lif->txqcqs = NULL; +err_out_free_notifyqcq: + if (lif->notifyqcq) { + ionic_qcq_free(lif, lif->notifyqcq); + lif->notifyqcq = NULL; + } +err_out_free_adminqcq: + ionic_qcq_free(lif, lif->adminqcq); + lif->adminqcq = NULL; + + return err; +} + +static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq) +{ + struct device *dev = lif->ionic->dev; + struct ionic_queue *q = &qcq->q; + struct ionic_cq *cq = &qcq->cq; + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.q_init = { + .opcode = IONIC_CMD_Q_INIT, + .lif_index = cpu_to_le16(lif->index), + .type = q->type, + .index = cpu_to_le32(q->index), + .flags = cpu_to_le16(IONIC_QINIT_F_IRQ | + IONIC_QINIT_F_SG), + .intr_index = cpu_to_le16(lif->rxqcqs[q->index].qcq->intr.index), + .pid = cpu_to_le16(q->pid), + .ring_size = ilog2(q->num_descs), + .ring_base = cpu_to_le64(q->base_pa), + .cq_ring_base = cpu_to_le64(cq->base_pa), + .sg_ring_base = cpu_to_le64(q->sg_base_pa), + }, + }; + int err; + + dev_dbg(dev, "txq_init.pid %d\n", ctx.cmd.q_init.pid); + dev_dbg(dev, "txq_init.index %d\n", ctx.cmd.q_init.index); + dev_dbg(dev, "txq_init.ring_base 0x%llx\n", ctx.cmd.q_init.ring_base); + dev_dbg(dev, "txq_init.ring_size %d\n", ctx.cmd.q_init.ring_size); + + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + return err; + + q->hw_type = ctx.comp.q_init.hw_type; + q->hw_index = le32_to_cpu(ctx.comp.q_init.hw_index); + q->dbval = IONIC_DBELL_QID(q->hw_index); + + dev_dbg(dev, "txq->hw_type %d\n", q->hw_type); + dev_dbg(dev, "txq->hw_index %d\n", q->hw_index); + + qcq->flags |= IONIC_QCQ_F_INITED; + + ionic_debugfs_add_qcq(lif, qcq); + + return 0; +} + +static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq) +{ + struct device *dev = lif->ionic->dev; + struct ionic_queue *q = &qcq->q; + struct ionic_cq *cq = &qcq->cq; + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.q_init = { + .opcode = IONIC_CMD_Q_INIT, + .lif_index = cpu_to_le16(lif->index), + .type = q->type, + .index = cpu_to_le32(q->index), + .flags = cpu_to_le16(IONIC_QINIT_F_IRQ), + .intr_index = cpu_to_le16(cq->bound_intr->index), + .pid = cpu_to_le16(q->pid), + .ring_size = ilog2(q->num_descs), + .ring_base = cpu_to_le64(q->base_pa), + .cq_ring_base = cpu_to_le64(cq->base_pa), + }, + }; + int err; + + dev_dbg(dev, "rxq_init.pid %d\n", ctx.cmd.q_init.pid); + dev_dbg(dev, "rxq_init.index %d\n", ctx.cmd.q_init.index); + dev_dbg(dev, "rxq_init.ring_base 0x%llx\n", ctx.cmd.q_init.ring_base); + dev_dbg(dev, "rxq_init.ring_size %d\n", ctx.cmd.q_init.ring_size); + + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + return err; + + q->hw_type = ctx.comp.q_init.hw_type; + q->hw_index = le32_to_cpu(ctx.comp.q_init.hw_index); + q->dbval = IONIC_DBELL_QID(q->hw_index); + + dev_dbg(dev, "rxq->hw_type %d\n", q->hw_type); + dev_dbg(dev, "rxq->hw_index %d\n", q->hw_index); + + netif_napi_add(lif->netdev, &qcq->napi, ionic_rx_napi, + NAPI_POLL_WEIGHT); + + err = ionic_request_irq(lif, qcq); + if (err) { + netif_napi_del(&qcq->napi); + return err; + } + + qcq->flags |= IONIC_QCQ_F_INITED; + + ionic_debugfs_add_qcq(lif, qcq); + + return 0; +} + +static bool ionic_notifyq_service(struct ionic_cq *cq, + struct ionic_cq_info *cq_info) +{ + union ionic_notifyq_comp *comp = cq_info->cq_desc; + struct net_device *netdev; + struct ionic_queue *q; + struct ionic_lif *lif; + u64 eid; + + q = cq->bound_q; + lif = q->info[0].cb_arg; + netdev = lif->netdev; + eid = le64_to_cpu(comp->event.eid); + + /* Have we run out of new completions to process? */ + if (eid <= lif->last_eid) + return false; + + lif->last_eid = eid; + + dev_dbg(lif->ionic->dev, "notifyq event:\n"); + dynamic_hex_dump("event ", DUMP_PREFIX_OFFSET, 16, 1, + comp, sizeof(*comp), true); + + switch (le16_to_cpu(comp->event.ecode)) { + case IONIC_EVENT_LINK_CHANGE: + ionic_link_status_check_request(lif); + break; + case IONIC_EVENT_RESET: + netdev_info(netdev, "Notifyq IONIC_EVENT_RESET eid=%lld\n", + eid); + netdev_info(netdev, " reset_code=%d state=%d\n", + comp->reset.reset_code, + comp->reset.state); + break; + default: + netdev_warn(netdev, "Notifyq unknown event ecode=%d eid=%lld\n", + comp->event.ecode, eid); + break; + } + + return true; +} + +static int ionic_notifyq_clean(struct ionic_lif *lif, int budget) +{ + struct ionic_dev *idev = &lif->ionic->idev; + struct ionic_cq *cq = &lif->notifyqcq->cq; + u32 work_done; + + work_done = ionic_cq_service(cq, budget, ionic_notifyq_service, + NULL, NULL); + if (work_done) + ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index, + work_done, IONIC_INTR_CRED_RESET_COALESCE); + + return work_done; +} + +static bool ionic_adminq_service(struct ionic_cq *cq, + struct ionic_cq_info *cq_info) +{ + struct ionic_admin_comp *comp = cq_info->cq_desc; + + if (!color_match(comp->color, cq->done_color)) + return false; + + ionic_q_service(cq->bound_q, cq_info, le16_to_cpu(comp->comp_index)); + + return true; +} + +static int ionic_adminq_napi(struct napi_struct *napi, int budget) +{ + struct ionic_lif *lif = napi_to_cq(napi)->lif; + int n_work = 0; + int a_work = 0; + + if (likely(lif->notifyqcq && lif->notifyqcq->flags & IONIC_QCQ_F_INITED)) + n_work = ionic_notifyq_clean(lif, budget); + a_work = ionic_napi(napi, budget, ionic_adminq_service, NULL, NULL); + + return max(n_work, a_work); +} + +static void ionic_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *ns) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_lif_stats *ls; + + memset(ns, 0, sizeof(*ns)); + ls = &lif->info->stats; + + ns->rx_packets = le64_to_cpu(ls->rx_ucast_packets) + + le64_to_cpu(ls->rx_mcast_packets) + + le64_to_cpu(ls->rx_bcast_packets); + + ns->tx_packets = le64_to_cpu(ls->tx_ucast_packets) + + le64_to_cpu(ls->tx_mcast_packets) + + le64_to_cpu(ls->tx_bcast_packets); + + ns->rx_bytes = le64_to_cpu(ls->rx_ucast_bytes) + + le64_to_cpu(ls->rx_mcast_bytes) + + le64_to_cpu(ls->rx_bcast_bytes); + + ns->tx_bytes = le64_to_cpu(ls->tx_ucast_bytes) + + le64_to_cpu(ls->tx_mcast_bytes) + + le64_to_cpu(ls->tx_bcast_bytes); + + ns->rx_dropped = le64_to_cpu(ls->rx_ucast_drop_packets) + + le64_to_cpu(ls->rx_mcast_drop_packets) + + le64_to_cpu(ls->rx_bcast_drop_packets); + + ns->tx_dropped = le64_to_cpu(ls->tx_ucast_drop_packets) + + le64_to_cpu(ls->tx_mcast_drop_packets) + + le64_to_cpu(ls->tx_bcast_drop_packets); + + ns->multicast = le64_to_cpu(ls->rx_mcast_packets); + + ns->rx_over_errors = le64_to_cpu(ls->rx_queue_empty); + + ns->rx_missed_errors = le64_to_cpu(ls->rx_dma_error) + + le64_to_cpu(ls->rx_queue_disabled) + + le64_to_cpu(ls->rx_desc_fetch_error) + + le64_to_cpu(ls->rx_desc_data_error); + + ns->tx_aborted_errors = le64_to_cpu(ls->tx_dma_error) + + le64_to_cpu(ls->tx_queue_disabled) + + le64_to_cpu(ls->tx_desc_fetch_error) + + le64_to_cpu(ls->tx_desc_data_error); + + ns->rx_errors = ns->rx_over_errors + + ns->rx_missed_errors; + + ns->tx_errors = ns->tx_aborted_errors; +} + +static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_filter_add = { + .opcode = IONIC_CMD_RX_FILTER_ADD, + .lif_index = cpu_to_le16(lif->index), + .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_MAC), + }, + }; + struct ionic_rx_filter *f; + int err; + + /* don't bother if we already have it */ + spin_lock_bh(&lif->rx_filters.lock); + f = ionic_rx_filter_by_addr(lif, addr); + spin_unlock_bh(&lif->rx_filters.lock); + if (f) + return 0; + + netdev_dbg(lif->netdev, "rx_filter add ADDR %pM (id %d)\n", addr, + ctx.comp.rx_filter_add.filter_id); + + memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN); + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + return err; + + return ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx); +} + +static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_filter_del = { + .opcode = IONIC_CMD_RX_FILTER_DEL, + .lif_index = cpu_to_le16(lif->index), + }, + }; + struct ionic_rx_filter *f; + int err; + + spin_lock_bh(&lif->rx_filters.lock); + f = ionic_rx_filter_by_addr(lif, addr); + if (!f) { + spin_unlock_bh(&lif->rx_filters.lock); + return -ENOENT; + } + + ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id); + ionic_rx_filter_free(lif, f); + spin_unlock_bh(&lif->rx_filters.lock); + + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + return err; + + netdev_dbg(lif->netdev, "rx_filter del ADDR %pM (id %d)\n", addr, + ctx.cmd.rx_filter_del.filter_id); + + return 0; +} + +static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add) +{ + struct ionic *ionic = lif->ionic; + struct ionic_deferred_work *work; + unsigned int nmfilters; + unsigned int nufilters; + + if (add) { + /* Do we have space for this filter? We test the counters + * here before checking the need for deferral so that we + * can return an overflow error to the stack. + */ + nmfilters = le32_to_cpu(ionic->ident.lif.eth.max_mcast_filters); + nufilters = le32_to_cpu(ionic->ident.lif.eth.max_ucast_filters); + + if ((is_multicast_ether_addr(addr) && lif->nmcast < nmfilters)) + lif->nmcast++; + else if (!is_multicast_ether_addr(addr) && + lif->nucast < nufilters) + lif->nucast++; + else + return -ENOSPC; + } else { + if (is_multicast_ether_addr(addr) && lif->nmcast) + lif->nmcast--; + else if (!is_multicast_ether_addr(addr) && lif->nucast) + lif->nucast--; + } + + if (in_interrupt()) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + netdev_err(lif->netdev, "%s OOM\n", __func__); + return -ENOMEM; + } + work->type = add ? IONIC_DW_TYPE_RX_ADDR_ADD : + IONIC_DW_TYPE_RX_ADDR_DEL; + memcpy(work->addr, addr, ETH_ALEN); + netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n", + add ? "add" : "del", addr); + ionic_lif_deferred_enqueue(&lif->deferred, work); + } else { + netdev_dbg(lif->netdev, "rx_filter %s %pM\n", + add ? "add" : "del", addr); + if (add) + return ionic_lif_addr_add(lif, addr); + else + return ionic_lif_addr_del(lif, addr); + } + + return 0; +} + +static int ionic_addr_add(struct net_device *netdev, const u8 *addr) +{ + return ionic_lif_addr(netdev_priv(netdev), addr, true); +} + +static int ionic_addr_del(struct net_device *netdev, const u8 *addr) +{ + return ionic_lif_addr(netdev_priv(netdev), addr, false); +} + +static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_mode_set = { + .opcode = IONIC_CMD_RX_MODE_SET, + .lif_index = cpu_to_le16(lif->index), + .rx_mode = cpu_to_le16(rx_mode), + }, + }; + char buf[128]; + int err; + int i; +#define REMAIN(__x) (sizeof(buf) - (__x)) + + i = snprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:", + lif->rx_mode, rx_mode); + if (rx_mode & IONIC_RX_MODE_F_UNICAST) + i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST"); + if (rx_mode & IONIC_RX_MODE_F_MULTICAST) + i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST"); + if (rx_mode & IONIC_RX_MODE_F_BROADCAST) + i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST"); + if (rx_mode & IONIC_RX_MODE_F_PROMISC) + i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC"); + if (rx_mode & IONIC_RX_MODE_F_ALLMULTI) + i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI"); + netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf); + + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + netdev_warn(lif->netdev, "set rx_mode 0x%04x failed: %d\n", + rx_mode, err); + else + lif->rx_mode = rx_mode; +} + +static void _ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode) +{ + struct ionic_deferred_work *work; + + if (in_interrupt()) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + netdev_err(lif->netdev, "%s OOM\n", __func__); + return; + } + work->type = IONIC_DW_TYPE_RX_MODE; + work->rx_mode = rx_mode; + netdev_dbg(lif->netdev, "deferred: rx_mode\n"); + ionic_lif_deferred_enqueue(&lif->deferred, work); + } else { + ionic_lif_rx_mode(lif, rx_mode); + } +} + +static void ionic_set_rx_mode(struct net_device *netdev) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_identity *ident; + unsigned int nfilters; + unsigned int rx_mode; + + ident = &lif->ionic->ident; + + rx_mode = IONIC_RX_MODE_F_UNICAST; + rx_mode |= (netdev->flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0; + rx_mode |= (netdev->flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0; + rx_mode |= (netdev->flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0; + rx_mode |= (netdev->flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0; + + /* sync unicast addresses + * next check to see if we're in an overflow state + * if so, we track that we overflowed and enable NIC PROMISC + * else if the overflow is set and not needed + * we remove our overflow flag and check the netdev flags + * to see if we can disable NIC PROMISC + */ + __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); + nfilters = le32_to_cpu(ident->lif.eth.max_ucast_filters); + if (netdev_uc_count(netdev) + 1 > nfilters) { + rx_mode |= IONIC_RX_MODE_F_PROMISC; + lif->uc_overflow = true; + } else if (lif->uc_overflow) { + lif->uc_overflow = false; + if (!(netdev->flags & IFF_PROMISC)) + rx_mode &= ~IONIC_RX_MODE_F_PROMISC; + } + + /* same for multicast */ + __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del); + nfilters = le32_to_cpu(ident->lif.eth.max_mcast_filters); + if (netdev_mc_count(netdev) > nfilters) { + rx_mode |= IONIC_RX_MODE_F_ALLMULTI; + lif->mc_overflow = true; + } else if (lif->mc_overflow) { + lif->mc_overflow = false; + if (!(netdev->flags & IFF_ALLMULTI)) + rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI; + } + + if (lif->rx_mode != rx_mode) + _ionic_lif_rx_mode(lif, rx_mode); +} + +static __le64 ionic_netdev_features_to_nic(netdev_features_t features) +{ + u64 wanted = 0; + + if (features & NETIF_F_HW_VLAN_CTAG_TX) + wanted |= IONIC_ETH_HW_VLAN_TX_TAG; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + wanted |= IONIC_ETH_HW_VLAN_RX_STRIP; + if (features & NETIF_F_HW_VLAN_CTAG_FILTER) + wanted |= IONIC_ETH_HW_VLAN_RX_FILTER; + if (features & NETIF_F_RXHASH) + wanted |= IONIC_ETH_HW_RX_HASH; + if (features & NETIF_F_RXCSUM) + wanted |= IONIC_ETH_HW_RX_CSUM; + if (features & NETIF_F_SG) + wanted |= IONIC_ETH_HW_TX_SG; + if (features & NETIF_F_HW_CSUM) + wanted |= IONIC_ETH_HW_TX_CSUM; + if (features & NETIF_F_TSO) + wanted |= IONIC_ETH_HW_TSO; + if (features & NETIF_F_TSO6) + wanted |= IONIC_ETH_HW_TSO_IPV6; + if (features & NETIF_F_TSO_ECN) + wanted |= IONIC_ETH_HW_TSO_ECN; + if (features & NETIF_F_GSO_GRE) + wanted |= IONIC_ETH_HW_TSO_GRE; + if (features & NETIF_F_GSO_GRE_CSUM) + wanted |= IONIC_ETH_HW_TSO_GRE_CSUM; + if (features & NETIF_F_GSO_IPXIP4) + wanted |= IONIC_ETH_HW_TSO_IPXIP4; + if (features & NETIF_F_GSO_IPXIP6) + wanted |= IONIC_ETH_HW_TSO_IPXIP6; + if (features & NETIF_F_GSO_UDP_TUNNEL) + wanted |= IONIC_ETH_HW_TSO_UDP; + if (features & NETIF_F_GSO_UDP_TUNNEL_CSUM) + wanted |= IONIC_ETH_HW_TSO_UDP_CSUM; + + return cpu_to_le64(wanted); +} + +static int ionic_set_nic_features(struct ionic_lif *lif, + netdev_features_t features) +{ + struct device *dev = lif->ionic->dev; + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.lif_setattr = { + .opcode = IONIC_CMD_LIF_SETATTR, + .index = cpu_to_le16(lif->index), + .attr = IONIC_LIF_ATTR_FEATURES, + }, + }; + u64 vlan_flags = IONIC_ETH_HW_VLAN_TX_TAG | + IONIC_ETH_HW_VLAN_RX_STRIP | + IONIC_ETH_HW_VLAN_RX_FILTER; + int err; + + ctx.cmd.lif_setattr.features = ionic_netdev_features_to_nic(features); + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + return err; + + lif->hw_features = le64_to_cpu(ctx.cmd.lif_setattr.features & + ctx.comp.lif_setattr.features); + + if ((vlan_flags & features) && + !(vlan_flags & le64_to_cpu(ctx.comp.lif_setattr.features))) + dev_info_once(lif->ionic->dev, "NIC is not supporting vlan offload, likely in SmartNIC mode\n"); + + if (lif->hw_features & IONIC_ETH_HW_VLAN_TX_TAG) + dev_dbg(dev, "feature ETH_HW_VLAN_TX_TAG\n"); + if (lif->hw_features & IONIC_ETH_HW_VLAN_RX_STRIP) + dev_dbg(dev, "feature ETH_HW_VLAN_RX_STRIP\n"); + if (lif->hw_features & IONIC_ETH_HW_VLAN_RX_FILTER) + dev_dbg(dev, "feature ETH_HW_VLAN_RX_FILTER\n"); + if (lif->hw_features & IONIC_ETH_HW_RX_HASH) + dev_dbg(dev, "feature ETH_HW_RX_HASH\n"); + if (lif->hw_features & IONIC_ETH_HW_TX_SG) + dev_dbg(dev, "feature ETH_HW_TX_SG\n"); + if (lif->hw_features & IONIC_ETH_HW_TX_CSUM) + dev_dbg(dev, "feature ETH_HW_TX_CSUM\n"); + if (lif->hw_features & IONIC_ETH_HW_RX_CSUM) + dev_dbg(dev, "feature ETH_HW_RX_CSUM\n"); + if (lif->hw_features & IONIC_ETH_HW_TSO) + dev_dbg(dev, "feature ETH_HW_TSO\n"); + if (lif->hw_features & IONIC_ETH_HW_TSO_IPV6) + dev_dbg(dev, "feature ETH_HW_TSO_IPV6\n"); + if (lif->hw_features & IONIC_ETH_HW_TSO_ECN) + dev_dbg(dev, "feature ETH_HW_TSO_ECN\n"); + if (lif->hw_features & IONIC_ETH_HW_TSO_GRE) + dev_dbg(dev, "feature ETH_HW_TSO_GRE\n"); + if (lif->hw_features & IONIC_ETH_HW_TSO_GRE_CSUM) + dev_dbg(dev, "feature ETH_HW_TSO_GRE_CSUM\n"); + if (lif->hw_features & IONIC_ETH_HW_TSO_IPXIP4) + dev_dbg(dev, "feature ETH_HW_TSO_IPXIP4\n"); + if (lif->hw_features & IONIC_ETH_HW_TSO_IPXIP6) + dev_dbg(dev, "feature ETH_HW_TSO_IPXIP6\n"); + if (lif->hw_features & IONIC_ETH_HW_TSO_UDP) + dev_dbg(dev, "feature ETH_HW_TSO_UDP\n"); + if (lif->hw_features & IONIC_ETH_HW_TSO_UDP_CSUM) + dev_dbg(dev, "feature ETH_HW_TSO_UDP_CSUM\n"); + + return 0; +} + +static int ionic_init_nic_features(struct ionic_lif *lif) +{ + struct net_device *netdev = lif->netdev; + netdev_features_t features; + int err; + + /* set up what we expect to support by default */ + features = NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_RXHASH | + NETIF_F_SG | + NETIF_F_HW_CSUM | + NETIF_F_RXCSUM | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_TSO_ECN; + + err = ionic_set_nic_features(lif, features); + if (err) + return err; + + /* tell the netdev what we actually can support */ + netdev->features |= NETIF_F_HIGHDMA; + + if (lif->hw_features & IONIC_ETH_HW_VLAN_TX_TAG) + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + if (lif->hw_features & IONIC_ETH_HW_VLAN_RX_STRIP) + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + if (lif->hw_features & IONIC_ETH_HW_VLAN_RX_FILTER) + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + if (lif->hw_features & IONIC_ETH_HW_RX_HASH) + netdev->hw_features |= NETIF_F_RXHASH; + if (lif->hw_features & IONIC_ETH_HW_TX_SG) + netdev->hw_features |= NETIF_F_SG; + + if (lif->hw_features & IONIC_ETH_HW_TX_CSUM) + netdev->hw_enc_features |= NETIF_F_HW_CSUM; + if (lif->hw_features & IONIC_ETH_HW_RX_CSUM) + netdev->hw_enc_features |= NETIF_F_RXCSUM; + if (lif->hw_features & IONIC_ETH_HW_TSO) + netdev->hw_enc_features |= NETIF_F_TSO; + if (lif->hw_features & IONIC_ETH_HW_TSO_IPV6) + netdev->hw_enc_features |= NETIF_F_TSO6; + if (lif->hw_features & IONIC_ETH_HW_TSO_ECN) + netdev->hw_enc_features |= NETIF_F_TSO_ECN; + if (lif->hw_features & IONIC_ETH_HW_TSO_GRE) + netdev->hw_enc_features |= NETIF_F_GSO_GRE; + if (lif->hw_features & IONIC_ETH_HW_TSO_GRE_CSUM) + netdev->hw_enc_features |= NETIF_F_GSO_GRE_CSUM; + if (lif->hw_features & IONIC_ETH_HW_TSO_IPXIP4) + netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4; + if (lif->hw_features & IONIC_ETH_HW_TSO_IPXIP6) + netdev->hw_enc_features |= NETIF_F_GSO_IPXIP6; + if (lif->hw_features & IONIC_ETH_HW_TSO_UDP) + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; + if (lif->hw_features & IONIC_ETH_HW_TSO_UDP_CSUM) + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + + netdev->hw_features |= netdev->hw_enc_features; + netdev->features |= netdev->hw_features; + + netdev->priv_flags |= IFF_UNICAST_FLT; + + return 0; +} + +static int ionic_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct ionic_lif *lif = netdev_priv(netdev); + int err; + + netdev_dbg(netdev, "%s: lif->features=0x%08llx new_features=0x%08llx\n", + __func__, (u64)lif->netdev->features, (u64)features); + + err = ionic_set_nic_features(lif, features); + + return err; +} + +static int ionic_set_mac_address(struct net_device *netdev, void *sa) +{ + struct sockaddr *addr = sa; + u8 *mac; + int err; + + mac = (u8 *)addr->sa_data; + if (ether_addr_equal(netdev->dev_addr, mac)) + return 0; + + err = eth_prepare_mac_addr_change(netdev, addr); + if (err) + return err; + + if (!is_zero_ether_addr(netdev->dev_addr)) { + netdev_info(netdev, "deleting mac addr %pM\n", + netdev->dev_addr); + ionic_addr_del(netdev, netdev->dev_addr); + } + + eth_commit_mac_addr_change(netdev, addr); + netdev_info(netdev, "updating mac addr %pM\n", mac); + + return ionic_addr_add(netdev, mac); +} + +static int ionic_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.lif_setattr = { + .opcode = IONIC_CMD_LIF_SETATTR, + .index = cpu_to_le16(lif->index), + .attr = IONIC_LIF_ATTR_MTU, + .mtu = cpu_to_le32(new_mtu), + }, + }; + int err; + + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + return err; + + netdev->mtu = new_mtu; + err = ionic_reset_queues(lif); + + return err; +} + +static void ionic_tx_timeout_work(struct work_struct *ws) +{ + struct ionic_lif *lif = container_of(ws, struct ionic_lif, tx_timeout_work); + + netdev_info(lif->netdev, "Tx Timeout recovery\n"); + + rtnl_lock(); + ionic_reset_queues(lif); + rtnl_unlock(); +} + +static void ionic_tx_timeout(struct net_device *netdev) +{ + struct ionic_lif *lif = netdev_priv(netdev); + + schedule_work(&lif->tx_timeout_work); +} + +static int ionic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, + u16 vid) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_filter_add = { + .opcode = IONIC_CMD_RX_FILTER_ADD, + .lif_index = cpu_to_le16(lif->index), + .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_VLAN), + .vlan.vlan = cpu_to_le16(vid), + }, + }; + int err; + + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + return err; + + netdev_dbg(netdev, "rx_filter add VLAN %d (id %d)\n", vid, + ctx.comp.rx_filter_add.filter_id); + + return ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx); +} + +static int ionic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, + u16 vid) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_filter_del = { + .opcode = IONIC_CMD_RX_FILTER_DEL, + .lif_index = cpu_to_le16(lif->index), + }, + }; + struct ionic_rx_filter *f; + + spin_lock_bh(&lif->rx_filters.lock); + + f = ionic_rx_filter_by_vlan(lif, vid); + if (!f) { + spin_unlock_bh(&lif->rx_filters.lock); + return -ENOENT; + } + + netdev_dbg(netdev, "rx_filter del VLAN %d (id %d)\n", vid, + le32_to_cpu(ctx.cmd.rx_filter_del.filter_id)); + + ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id); + ionic_rx_filter_free(lif, f); + spin_unlock_bh(&lif->rx_filters.lock); + + return ionic_adminq_post_wait(lif, &ctx); +} + +int ionic_lif_rss_config(struct ionic_lif *lif, const u16 types, + const u8 *key, const u32 *indir) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.lif_setattr = { + .opcode = IONIC_CMD_LIF_SETATTR, + .attr = IONIC_LIF_ATTR_RSS, + .rss.types = cpu_to_le16(types), + .rss.addr = cpu_to_le64(lif->rss_ind_tbl_pa), + }, + }; + unsigned int i, tbl_sz; + + lif->rss_types = types; + + if (key) + memcpy(lif->rss_hash_key, key, IONIC_RSS_HASH_KEY_SIZE); + + if (indir) { + tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz); + for (i = 0; i < tbl_sz; i++) + lif->rss_ind_tbl[i] = indir[i]; + } + + memcpy(ctx.cmd.lif_setattr.rss.key, lif->rss_hash_key, + IONIC_RSS_HASH_KEY_SIZE); + + return ionic_adminq_post_wait(lif, &ctx); +} + +static int ionic_lif_rss_init(struct ionic_lif *lif) +{ + u8 rss_key[IONIC_RSS_HASH_KEY_SIZE]; + unsigned int tbl_sz; + unsigned int i; + + netdev_rss_key_fill(rss_key, IONIC_RSS_HASH_KEY_SIZE); + + lif->rss_types = IONIC_RSS_TYPE_IPV4 | + IONIC_RSS_TYPE_IPV4_TCP | + IONIC_RSS_TYPE_IPV4_UDP | + IONIC_RSS_TYPE_IPV6 | + IONIC_RSS_TYPE_IPV6_TCP | + IONIC_RSS_TYPE_IPV6_UDP; + + /* Fill indirection table with 'default' values */ + tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz); + for (i = 0; i < tbl_sz; i++) + lif->rss_ind_tbl[i] = ethtool_rxfh_indir_default(i, lif->nxqs); + + return ionic_lif_rss_config(lif, lif->rss_types, rss_key, NULL); +} + +static int ionic_lif_rss_deinit(struct ionic_lif *lif) +{ + return ionic_lif_rss_config(lif, 0x0, NULL, NULL); +} + +static void ionic_txrx_disable(struct ionic_lif *lif) +{ + unsigned int i; + + for (i = 0; i < lif->nxqs; i++) { + ionic_qcq_disable(lif->txqcqs[i].qcq); + ionic_qcq_disable(lif->rxqcqs[i].qcq); + } +} + +static void ionic_txrx_deinit(struct ionic_lif *lif) +{ + unsigned int i; + + for (i = 0; i < lif->nxqs; i++) { + ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq); + ionic_tx_flush(&lif->txqcqs[i].qcq->cq); + + ionic_lif_qcq_deinit(lif, lif->rxqcqs[i].qcq); + ionic_rx_flush(&lif->rxqcqs[i].qcq->cq); + ionic_rx_empty(&lif->rxqcqs[i].qcq->q); + } +} + +static void ionic_txrx_free(struct ionic_lif *lif) +{ + unsigned int i; + + for (i = 0; i < lif->nxqs; i++) { + ionic_qcq_free(lif, lif->txqcqs[i].qcq); + lif->txqcqs[i].qcq = NULL; + + ionic_qcq_free(lif, lif->rxqcqs[i].qcq); + lif->rxqcqs[i].qcq = NULL; + } +} + +static int ionic_txrx_alloc(struct ionic_lif *lif) +{ + unsigned int flags; + unsigned int i; + int err = 0; + u32 coal; + + flags = IONIC_QCQ_F_TX_STATS | IONIC_QCQ_F_SG; + for (i = 0; i < lif->nxqs; i++) { + err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags, + lif->ntxq_descs, + sizeof(struct ionic_txq_desc), + sizeof(struct ionic_txq_comp), + sizeof(struct ionic_txq_sg_desc), + lif->kern_pid, &lif->txqcqs[i].qcq); + if (err) + goto err_out; + + lif->txqcqs[i].qcq->stats = lif->txqcqs[i].stats; + } + + flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_INTR; + coal = ionic_coal_usec_to_hw(lif->ionic, lif->rx_coalesce_usecs); + for (i = 0; i < lif->nxqs; i++) { + err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags, + lif->nrxq_descs, + sizeof(struct ionic_rxq_desc), + sizeof(struct ionic_rxq_comp), + 0, lif->kern_pid, &lif->rxqcqs[i].qcq); + if (err) + goto err_out; + + lif->rxqcqs[i].qcq->stats = lif->rxqcqs[i].stats; + + ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, + lif->rxqcqs[i].qcq->intr.index, coal); + ionic_link_qcq_interrupts(lif->rxqcqs[i].qcq, + lif->txqcqs[i].qcq); + } + + return 0; + +err_out: + ionic_txrx_free(lif); + + return err; +} + +static int ionic_txrx_init(struct ionic_lif *lif) +{ + unsigned int i; + int err; + + for (i = 0; i < lif->nxqs; i++) { + err = ionic_lif_txq_init(lif, lif->txqcqs[i].qcq); + if (err) + goto err_out; + + err = ionic_lif_rxq_init(lif, lif->rxqcqs[i].qcq); + if (err) { + ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq); + goto err_out; + } + } + + if (lif->netdev->features & NETIF_F_RXHASH) + ionic_lif_rss_init(lif); + + ionic_set_rx_mode(lif->netdev); + + return 0; + +err_out: + while (i--) { + ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq); + ionic_lif_qcq_deinit(lif, lif->rxqcqs[i].qcq); + } + + return err; +} + +static int ionic_txrx_enable(struct ionic_lif *lif) +{ + int i, err; + + for (i = 0; i < lif->nxqs; i++) { + err = ionic_qcq_enable(lif->txqcqs[i].qcq); + if (err) + goto err_out; + + ionic_rx_fill(&lif->rxqcqs[i].qcq->q); + err = ionic_qcq_enable(lif->rxqcqs[i].qcq); + if (err) { + ionic_qcq_disable(lif->txqcqs[i].qcq); + goto err_out; + } + } + + return 0; + +err_out: + while (i--) { + ionic_qcq_disable(lif->rxqcqs[i].qcq); + ionic_qcq_disable(lif->txqcqs[i].qcq); + } + + return err; +} + +int ionic_open(struct net_device *netdev) +{ + struct ionic_lif *lif = netdev_priv(netdev); + int err; + + netif_carrier_off(netdev); + + err = ionic_txrx_alloc(lif); + if (err) + return err; + + err = ionic_txrx_init(lif); + if (err) + goto err_txrx_free; + + err = ionic_txrx_enable(lif); + if (err) + goto err_txrx_deinit; + + netif_set_real_num_tx_queues(netdev, lif->nxqs); + netif_set_real_num_rx_queues(netdev, lif->nxqs); + + set_bit(IONIC_LIF_UP, lif->state); + + ionic_link_status_check_request(lif); + if (netif_carrier_ok(netdev)) + netif_tx_wake_all_queues(netdev); + + return 0; + +err_txrx_deinit: + ionic_txrx_deinit(lif); +err_txrx_free: + ionic_txrx_free(lif); + return err; +} + +int ionic_stop(struct net_device *netdev) +{ + struct ionic_lif *lif = netdev_priv(netdev); + int err = 0; + + if (!test_bit(IONIC_LIF_UP, lif->state)) { + dev_dbg(lif->ionic->dev, "%s: %s state=DOWN\n", + __func__, lif->name); + return 0; + } + dev_dbg(lif->ionic->dev, "%s: %s state=UP\n", __func__, lif->name); + clear_bit(IONIC_LIF_UP, lif->state); + + /* carrier off before disabling queues to avoid watchdog timeout */ + netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); + netif_tx_disable(netdev); + + ionic_txrx_disable(lif); + ionic_txrx_deinit(lif); + ionic_txrx_free(lif); + + return err; +} + +static const struct net_device_ops ionic_netdev_ops = { + .ndo_open = ionic_open, + .ndo_stop = ionic_stop, + .ndo_start_xmit = ionic_start_xmit, + .ndo_get_stats64 = ionic_get_stats64, + .ndo_set_rx_mode = ionic_set_rx_mode, + .ndo_set_features = ionic_set_features, + .ndo_set_mac_address = ionic_set_mac_address, + .ndo_validate_addr = eth_validate_addr, + .ndo_tx_timeout = ionic_tx_timeout, + .ndo_change_mtu = ionic_change_mtu, + .ndo_vlan_rx_add_vid = ionic_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = ionic_vlan_rx_kill_vid, +}; + +int ionic_reset_queues(struct ionic_lif *lif) +{ + bool running; + int err = 0; + + /* Put off the next watchdog timeout */ + netif_trans_update(lif->netdev); + + if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET)) + return -EBUSY; + + running = netif_running(lif->netdev); + if (running) + err = ionic_stop(lif->netdev); + if (!err && running) + ionic_open(lif->netdev); + + clear_bit(IONIC_LIF_QUEUE_RESET, lif->state); + + return err; +} + +static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index) +{ + struct device *dev = ionic->dev; + struct net_device *netdev; + struct ionic_lif *lif; + int tbl_sz; + u32 coal; + int err; + + netdev = alloc_etherdev_mqs(sizeof(*lif), + ionic->ntxqs_per_lif, ionic->ntxqs_per_lif); + if (!netdev) { + dev_err(dev, "Cannot allocate netdev, aborting\n"); + return ERR_PTR(-ENOMEM); + } + + SET_NETDEV_DEV(netdev, dev); + + lif = netdev_priv(netdev); + lif->netdev = netdev; + ionic->master_lif = lif; + netdev->netdev_ops = &ionic_netdev_ops; + ionic_ethtool_set_ops(netdev); + + netdev->watchdog_timeo = 2 * HZ; + netdev->min_mtu = IONIC_MIN_MTU; + netdev->max_mtu = IONIC_MAX_MTU; + + lif->neqs = ionic->neqs_per_lif; + lif->nxqs = ionic->ntxqs_per_lif; + + lif->ionic = ionic; + lif->index = index; + lif->ntxq_descs = IONIC_DEF_TXRX_DESC; + lif->nrxq_descs = IONIC_DEF_TXRX_DESC; + + /* Convert the default coalesce value to actual hw resolution */ + coal = ionic_coal_usec_to_hw(lif->ionic, IONIC_ITR_COAL_USEC_DEFAULT); + lif->rx_coalesce_usecs = ionic_coal_hw_to_usec(lif->ionic, coal); + + snprintf(lif->name, sizeof(lif->name), "lif%u", index); + + spin_lock_init(&lif->adminq_lock); + + spin_lock_init(&lif->deferred.lock); + INIT_LIST_HEAD(&lif->deferred.list); + INIT_WORK(&lif->deferred.work, ionic_lif_deferred_work); + + /* allocate lif info */ + lif->info_sz = ALIGN(sizeof(*lif->info), PAGE_SIZE); + lif->info = dma_alloc_coherent(dev, lif->info_sz, + &lif->info_pa, GFP_KERNEL); + if (!lif->info) { + dev_err(dev, "Failed to allocate lif info, aborting\n"); + err = -ENOMEM; + goto err_out_free_netdev; + } + + /* allocate queues */ + err = ionic_qcqs_alloc(lif); + if (err) + goto err_out_free_lif_info; + + /* allocate rss indirection table */ + tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz); + lif->rss_ind_tbl_sz = sizeof(*lif->rss_ind_tbl) * tbl_sz; + lif->rss_ind_tbl = dma_alloc_coherent(dev, lif->rss_ind_tbl_sz, + &lif->rss_ind_tbl_pa, + GFP_KERNEL); + + if (!lif->rss_ind_tbl) { + dev_err(dev, "Failed to allocate rss indirection table, aborting\n"); + goto err_out_free_qcqs; + } + + list_add_tail(&lif->list, &ionic->lifs); + + return lif; + +err_out_free_qcqs: + ionic_qcqs_free(lif); +err_out_free_lif_info: + dma_free_coherent(dev, lif->info_sz, lif->info, lif->info_pa); + lif->info = NULL; + lif->info_pa = 0; +err_out_free_netdev: + free_netdev(lif->netdev); + lif = NULL; + + return ERR_PTR(err); +} + +int ionic_lifs_alloc(struct ionic *ionic) +{ + struct ionic_lif *lif; + + INIT_LIST_HEAD(&ionic->lifs); + + /* only build the first lif, others are for later features */ + set_bit(0, ionic->lifbits); + lif = ionic_lif_alloc(ionic, 0); + + return PTR_ERR_OR_ZERO(lif); +} + +static void ionic_lif_reset(struct ionic_lif *lif) +{ + struct ionic_dev *idev = &lif->ionic->idev; + + mutex_lock(&lif->ionic->dev_cmd_lock); + ionic_dev_cmd_lif_reset(idev, lif->index); + ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT); + mutex_unlock(&lif->ionic->dev_cmd_lock); +} + +static void ionic_lif_free(struct ionic_lif *lif) +{ + struct device *dev = lif->ionic->dev; + + /* free rss indirection table */ + dma_free_coherent(dev, lif->rss_ind_tbl_sz, lif->rss_ind_tbl, + lif->rss_ind_tbl_pa); + lif->rss_ind_tbl = NULL; + lif->rss_ind_tbl_pa = 0; + + /* free queues */ + ionic_qcqs_free(lif); + ionic_lif_reset(lif); + + /* free lif info */ + dma_free_coherent(dev, lif->info_sz, lif->info, lif->info_pa); + lif->info = NULL; + lif->info_pa = 0; + + /* unmap doorbell page */ + ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage); + lif->kern_dbpage = NULL; + kfree(lif->dbid_inuse); + lif->dbid_inuse = NULL; + + /* free netdev & lif */ + ionic_debugfs_del_lif(lif); + list_del(&lif->list); + free_netdev(lif->netdev); +} + +void ionic_lifs_free(struct ionic *ionic) +{ + struct list_head *cur, *tmp; + struct ionic_lif *lif; + + list_for_each_safe(cur, tmp, &ionic->lifs) { + lif = list_entry(cur, struct ionic_lif, list); + + ionic_lif_free(lif); + } +} + +static void ionic_lif_deinit(struct ionic_lif *lif) +{ + if (!test_bit(IONIC_LIF_INITED, lif->state)) + return; + + clear_bit(IONIC_LIF_INITED, lif->state); + + ionic_rx_filters_deinit(lif); + ionic_lif_rss_deinit(lif); + + napi_disable(&lif->adminqcq->napi); + ionic_lif_qcq_deinit(lif, lif->notifyqcq); + ionic_lif_qcq_deinit(lif, lif->adminqcq); + + ionic_lif_reset(lif); +} + +void ionic_lifs_deinit(struct ionic *ionic) +{ + struct list_head *cur, *tmp; + struct ionic_lif *lif; + + list_for_each_safe(cur, tmp, &ionic->lifs) { + lif = list_entry(cur, struct ionic_lif, list); + ionic_lif_deinit(lif); + } +} + +static int ionic_lif_adminq_init(struct ionic_lif *lif) +{ + struct device *dev = lif->ionic->dev; + struct ionic_q_init_comp comp; + struct ionic_dev *idev; + struct ionic_qcq *qcq; + struct ionic_queue *q; + int err; + + idev = &lif->ionic->idev; + qcq = lif->adminqcq; + q = &qcq->q; + + mutex_lock(&lif->ionic->dev_cmd_lock); + ionic_dev_cmd_adminq_init(idev, qcq, lif->index, qcq->intr.index); + err = ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT); + ionic_dev_cmd_comp(idev, (union ionic_dev_cmd_comp *)&comp); + mutex_unlock(&lif->ionic->dev_cmd_lock); + if (err) { + netdev_err(lif->netdev, "adminq init failed %d\n", err); + return err; + } + + q->hw_type = comp.hw_type; + q->hw_index = le32_to_cpu(comp.hw_index); + q->dbval = IONIC_DBELL_QID(q->hw_index); + + dev_dbg(dev, "adminq->hw_type %d\n", q->hw_type); + dev_dbg(dev, "adminq->hw_index %d\n", q->hw_index); + + netif_napi_add(lif->netdev, &qcq->napi, ionic_adminq_napi, + NAPI_POLL_WEIGHT); + + err = ionic_request_irq(lif, qcq); + if (err) { + netdev_warn(lif->netdev, "adminq irq request failed %d\n", err); + netif_napi_del(&qcq->napi); + return err; + } + + napi_enable(&qcq->napi); + + if (qcq->flags & IONIC_QCQ_F_INTR) + ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, + IONIC_INTR_MASK_CLEAR); + + qcq->flags |= IONIC_QCQ_F_INITED; + + ionic_debugfs_add_qcq(lif, qcq); + + return 0; +} + +static int ionic_lif_notifyq_init(struct ionic_lif *lif) +{ + struct ionic_qcq *qcq = lif->notifyqcq; + struct device *dev = lif->ionic->dev; + struct ionic_queue *q = &qcq->q; + int err; + + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.q_init = { + .opcode = IONIC_CMD_Q_INIT, + .lif_index = cpu_to_le16(lif->index), + .type = q->type, + .index = cpu_to_le32(q->index), + .flags = cpu_to_le16(IONIC_QINIT_F_IRQ | + IONIC_QINIT_F_ENA), + .intr_index = cpu_to_le16(lif->adminqcq->intr.index), + .pid = cpu_to_le16(q->pid), + .ring_size = ilog2(q->num_descs), + .ring_base = cpu_to_le64(q->base_pa), + } + }; + + dev_dbg(dev, "notifyq_init.pid %d\n", ctx.cmd.q_init.pid); + dev_dbg(dev, "notifyq_init.index %d\n", ctx.cmd.q_init.index); + dev_dbg(dev, "notifyq_init.ring_base 0x%llx\n", ctx.cmd.q_init.ring_base); + dev_dbg(dev, "notifyq_init.ring_size %d\n", ctx.cmd.q_init.ring_size); + + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + return err; + + q->hw_type = ctx.comp.q_init.hw_type; + q->hw_index = le32_to_cpu(ctx.comp.q_init.hw_index); + q->dbval = IONIC_DBELL_QID(q->hw_index); + + dev_dbg(dev, "notifyq->hw_type %d\n", q->hw_type); + dev_dbg(dev, "notifyq->hw_index %d\n", q->hw_index); + + /* preset the callback info */ + q->info[0].cb_arg = lif; + + qcq->flags |= IONIC_QCQ_F_INITED; + + ionic_debugfs_add_qcq(lif, qcq); + + return 0; +} + +static int ionic_station_set(struct ionic_lif *lif) +{ + struct net_device *netdev = lif->netdev; + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.lif_getattr = { + .opcode = IONIC_CMD_LIF_GETATTR, + .index = cpu_to_le16(lif->index), + .attr = IONIC_LIF_ATTR_MAC, + }, + }; + struct sockaddr addr; + int err; + + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + return err; + + memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len); + addr.sa_family = AF_INET; + err = eth_prepare_mac_addr_change(netdev, &addr); + if (err) + return err; + + if (!is_zero_ether_addr(netdev->dev_addr)) { + netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n", + netdev->dev_addr); + ionic_lif_addr(lif, netdev->dev_addr, false); + } + + eth_commit_mac_addr_change(netdev, &addr); + netdev_dbg(lif->netdev, "adding station MAC addr %pM\n", + netdev->dev_addr); + ionic_lif_addr(lif, netdev->dev_addr, true); + + return 0; +} + +static int ionic_lif_init(struct ionic_lif *lif) +{ + struct ionic_dev *idev = &lif->ionic->idev; + struct device *dev = lif->ionic->dev; + struct ionic_lif_init_comp comp; + int dbpage_num; + int err; + + ionic_debugfs_add_lif(lif); + + mutex_lock(&lif->ionic->dev_cmd_lock); + ionic_dev_cmd_lif_init(idev, lif->index, lif->info_pa); + err = ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT); + ionic_dev_cmd_comp(idev, (union ionic_dev_cmd_comp *)&comp); + mutex_unlock(&lif->ionic->dev_cmd_lock); + if (err) + return err; + + lif->hw_index = le16_to_cpu(comp.hw_index); + + /* now that we have the hw_index we can figure out our doorbell page */ + lif->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif); + if (!lif->dbid_count) { + dev_err(dev, "No doorbell pages, aborting\n"); + return -EINVAL; + } + + lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL); + if (!lif->dbid_inuse) { + dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n"); + return -ENOMEM; + } + + /* first doorbell id reserved for kernel (dbid aka pid == zero) */ + set_bit(0, lif->dbid_inuse); + lif->kern_pid = 0; + + dbpage_num = ionic_db_page_num(lif, lif->kern_pid); + lif->kern_dbpage = ionic_bus_map_dbpage(lif->ionic, dbpage_num); + if (!lif->kern_dbpage) { + dev_err(dev, "Cannot map dbpage, aborting\n"); + err = -ENOMEM; + goto err_out_free_dbid; + } + + err = ionic_lif_adminq_init(lif); + if (err) + goto err_out_adminq_deinit; + + if (lif->ionic->nnqs_per_lif) { + err = ionic_lif_notifyq_init(lif); + if (err) + goto err_out_notifyq_deinit; + } + + err = ionic_init_nic_features(lif); + if (err) + goto err_out_notifyq_deinit; + + err = ionic_rx_filters_init(lif); + if (err) + goto err_out_notifyq_deinit; + + err = ionic_station_set(lif); + if (err) + goto err_out_notifyq_deinit; + + lif->rx_copybreak = IONIC_RX_COPYBREAK_DEFAULT; + + set_bit(IONIC_LIF_INITED, lif->state); + + INIT_WORK(&lif->tx_timeout_work, ionic_tx_timeout_work); + + return 0; + +err_out_notifyq_deinit: + ionic_lif_qcq_deinit(lif, lif->notifyqcq); +err_out_adminq_deinit: + ionic_lif_qcq_deinit(lif, lif->adminqcq); + ionic_lif_reset(lif); + ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage); + lif->kern_dbpage = NULL; +err_out_free_dbid: + kfree(lif->dbid_inuse); + lif->dbid_inuse = NULL; + + return err; +} + +int ionic_lifs_init(struct ionic *ionic) +{ + struct list_head *cur, *tmp; + struct ionic_lif *lif; + int err; + + list_for_each_safe(cur, tmp, &ionic->lifs) { + lif = list_entry(cur, struct ionic_lif, list); + err = ionic_lif_init(lif); + if (err) + return err; + } + + return 0; +} + +static void ionic_lif_notify_work(struct work_struct *ws) +{ +} + +static void ionic_lif_set_netdev_info(struct ionic_lif *lif) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.lif_setattr = { + .opcode = IONIC_CMD_LIF_SETATTR, + .index = cpu_to_le16(lif->index), + .attr = IONIC_LIF_ATTR_NAME, + }, + }; + + strlcpy(ctx.cmd.lif_setattr.name, lif->netdev->name, + sizeof(ctx.cmd.lif_setattr.name)); + + ionic_adminq_post_wait(lif, &ctx); +} + +static struct ionic_lif *ionic_netdev_lif(struct net_device *netdev) +{ + if (!netdev || netdev->netdev_ops->ndo_start_xmit != ionic_start_xmit) + return NULL; + + return netdev_priv(netdev); +} + +static int ionic_lif_notify(struct notifier_block *nb, + unsigned long event, void *info) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(info); + struct ionic *ionic = container_of(nb, struct ionic, nb); + struct ionic_lif *lif = ionic_netdev_lif(ndev); + + if (!lif || lif->ionic != ionic) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_CHANGENAME: + ionic_lif_set_netdev_info(lif); + break; + } + + return NOTIFY_DONE; +} + +int ionic_lifs_register(struct ionic *ionic) +{ + int err; + + INIT_WORK(&ionic->nb_work, ionic_lif_notify_work); + + ionic->nb.notifier_call = ionic_lif_notify; + + err = register_netdevice_notifier(&ionic->nb); + if (err) + ionic->nb.notifier_call = NULL; + + /* only register LIF0 for now */ + err = register_netdev(ionic->master_lif->netdev); + if (err) { + dev_err(ionic->dev, "Cannot register net device, aborting\n"); + return err; + } + + ionic_link_status_check_request(ionic->master_lif); + ionic->master_lif->registered = true; + + return 0; +} + +void ionic_lifs_unregister(struct ionic *ionic) +{ + if (ionic->nb.notifier_call) { + unregister_netdevice_notifier(&ionic->nb); + cancel_work_sync(&ionic->nb_work); + ionic->nb.notifier_call = NULL; + } + + /* There is only one lif ever registered in the + * current model, so don't bother searching the + * ionic->lif for candidates to unregister + */ + cancel_work_sync(&ionic->master_lif->deferred.work); + cancel_work_sync(&ionic->master_lif->tx_timeout_work); + if (ionic->master_lif->netdev->reg_state == NETREG_REGISTERED) + unregister_netdev(ionic->master_lif->netdev); +} + +int ionic_lif_identify(struct ionic *ionic, u8 lif_type, + union ionic_lif_identity *lid) +{ + struct ionic_dev *idev = &ionic->idev; + size_t sz; + int err; + + sz = min(sizeof(*lid), sizeof(idev->dev_cmd_regs->data)); + + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_lif_identify(idev, lif_type, IONIC_IDENTITY_VERSION_1); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + memcpy_fromio(lid, &idev->dev_cmd_regs->data, sz); + mutex_unlock(&ionic->dev_cmd_lock); + if (err) + return (err); + + dev_dbg(ionic->dev, "capabilities 0x%llx\n", + le64_to_cpu(lid->capabilities)); + + dev_dbg(ionic->dev, "eth.max_ucast_filters %d\n", + le32_to_cpu(lid->eth.max_ucast_filters)); + dev_dbg(ionic->dev, "eth.max_mcast_filters %d\n", + le32_to_cpu(lid->eth.max_mcast_filters)); + dev_dbg(ionic->dev, "eth.features 0x%llx\n", + le64_to_cpu(lid->eth.config.features)); + dev_dbg(ionic->dev, "eth.queue_count[IONIC_QTYPE_ADMINQ] %d\n", + le32_to_cpu(lid->eth.config.queue_count[IONIC_QTYPE_ADMINQ])); + dev_dbg(ionic->dev, "eth.queue_count[IONIC_QTYPE_NOTIFYQ] %d\n", + le32_to_cpu(lid->eth.config.queue_count[IONIC_QTYPE_NOTIFYQ])); + dev_dbg(ionic->dev, "eth.queue_count[IONIC_QTYPE_RXQ] %d\n", + le32_to_cpu(lid->eth.config.queue_count[IONIC_QTYPE_RXQ])); + dev_dbg(ionic->dev, "eth.queue_count[IONIC_QTYPE_TXQ] %d\n", + le32_to_cpu(lid->eth.config.queue_count[IONIC_QTYPE_TXQ])); + dev_dbg(ionic->dev, "eth.config.name %s\n", lid->eth.config.name); + dev_dbg(ionic->dev, "eth.config.mac %pM\n", lid->eth.config.mac); + dev_dbg(ionic->dev, "eth.config.mtu %d\n", + le32_to_cpu(lid->eth.config.mtu)); + + return 0; +} + +int ionic_lifs_size(struct ionic *ionic) +{ + struct ionic_identity *ident = &ionic->ident; + unsigned int nintrs, dev_nintrs; + union ionic_lif_config *lc; + unsigned int ntxqs_per_lif; + unsigned int nrxqs_per_lif; + unsigned int neqs_per_lif; + unsigned int nnqs_per_lif; + unsigned int nxqs, neqs; + unsigned int min_intrs; + int err; + + lc = &ident->lif.eth.config; + dev_nintrs = le32_to_cpu(ident->dev.nintrs); + neqs_per_lif = le32_to_cpu(ident->lif.rdma.eq_qtype.qid_count); + nnqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_NOTIFYQ]); + ntxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_TXQ]); + nrxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_RXQ]); + + nxqs = min(ntxqs_per_lif, nrxqs_per_lif); + nxqs = min(nxqs, num_online_cpus()); + neqs = min(neqs_per_lif, num_online_cpus()); + +try_again: + /* interrupt usage: + * 1 for master lif adminq/notifyq + * 1 for each CPU for master lif TxRx queue pairs + * whatever's left is for RDMA queues + */ + nintrs = 1 + nxqs + neqs; + min_intrs = 2; /* adminq + 1 TxRx queue pair */ + + if (nintrs > dev_nintrs) + goto try_fewer; + + err = ionic_bus_alloc_irq_vectors(ionic, nintrs); + if (err < 0 && err != -ENOSPC) { + dev_err(ionic->dev, "Can't get intrs from OS: %d\n", err); + return err; + } + if (err == -ENOSPC) + goto try_fewer; + + if (err != nintrs) { + ionic_bus_free_irq_vectors(ionic); + goto try_fewer; + } + + ionic->nnqs_per_lif = nnqs_per_lif; + ionic->neqs_per_lif = neqs; + ionic->ntxqs_per_lif = nxqs; + ionic->nrxqs_per_lif = nxqs; + ionic->nintrs = nintrs; + + ionic_debugfs_add_sizes(ionic); + + return 0; + +try_fewer: + if (nnqs_per_lif > 1) { + nnqs_per_lif >>= 1; + goto try_again; + } + if (neqs > 1) { + neqs >>= 1; + goto try_again; + } + if (nxqs > 1) { + nxqs >>= 1; + goto try_again; + } + dev_err(ionic->dev, "Can't get minimum %d intrs from OS\n", min_intrs); + return -ENOSPC; +} diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h new file mode 100644 index 000000000000..812190e729c2 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -0,0 +1,277 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#ifndef _IONIC_LIF_H_ +#define _IONIC_LIF_H_ + +#include <linux/pci.h> +#include "ionic_rx_filter.h" + +#define IONIC_ADMINQ_LENGTH 16 /* must be a power of two */ +#define IONIC_NOTIFYQ_LENGTH 64 /* must be a power of two */ + +#define IONIC_MAX_NUM_NAPI_CNTR (NAPI_POLL_WEIGHT + 1) +#define IONIC_MAX_NUM_SG_CNTR (IONIC_TX_MAX_SG_ELEMS + 1) +#define IONIC_RX_COPYBREAK_DEFAULT 256 + +struct ionic_tx_stats { + u64 dma_map_err; + u64 pkts; + u64 bytes; + u64 clean; + u64 linearize; + u64 no_csum; + u64 csum; + u64 crc32_csum; + u64 tso; + u64 frags; + u64 sg_cntr[IONIC_MAX_NUM_SG_CNTR]; +}; + +struct ionic_rx_stats { + u64 dma_map_err; + u64 alloc_err; + u64 pkts; + u64 bytes; + u64 csum_none; + u64 csum_complete; + u64 csum_error; + u64 buffers_posted; +}; + +#define IONIC_QCQ_F_INITED BIT(0) +#define IONIC_QCQ_F_SG BIT(1) +#define IONIC_QCQ_F_INTR BIT(2) +#define IONIC_QCQ_F_TX_STATS BIT(3) +#define IONIC_QCQ_F_RX_STATS BIT(4) +#define IONIC_QCQ_F_NOTIFYQ BIT(5) + +struct ionic_napi_stats { + u64 poll_count; + u64 work_done_cntr[IONIC_MAX_NUM_NAPI_CNTR]; +}; + +struct ionic_q_stats { + union { + struct ionic_tx_stats tx; + struct ionic_rx_stats rx; + }; +}; + +struct ionic_qcq { + void *base; + dma_addr_t base_pa; + unsigned int total_size; + struct ionic_queue q; + struct ionic_cq cq; + struct ionic_intr_info intr; + struct napi_struct napi; + struct ionic_napi_stats napi_stats; + struct ionic_q_stats *stats; + unsigned int flags; + struct dentry *dentry; +}; + +struct ionic_qcqst { + struct ionic_qcq *qcq; + struct ionic_q_stats *stats; +}; + +#define q_to_qcq(q) container_of(q, struct ionic_qcq, q) +#define q_to_tx_stats(q) (&q_to_qcq(q)->stats->tx) +#define q_to_rx_stats(q) (&q_to_qcq(q)->stats->rx) +#define napi_to_qcq(napi) container_of(napi, struct ionic_qcq, napi) +#define napi_to_cq(napi) (&napi_to_qcq(napi)->cq) + +enum ionic_deferred_work_type { + IONIC_DW_TYPE_RX_MODE, + IONIC_DW_TYPE_RX_ADDR_ADD, + IONIC_DW_TYPE_RX_ADDR_DEL, + IONIC_DW_TYPE_LINK_STATUS, + IONIC_DW_TYPE_LIF_RESET, +}; + +struct ionic_deferred_work { + struct list_head list; + enum ionic_deferred_work_type type; + union { + unsigned int rx_mode; + u8 addr[ETH_ALEN]; + }; +}; + +struct ionic_deferred { + spinlock_t lock; /* lock for deferred work list */ + struct list_head list; + struct work_struct work; +}; + +struct ionic_lif_sw_stats { + u64 tx_packets; + u64 tx_bytes; + u64 rx_packets; + u64 rx_bytes; + u64 tx_tso; + u64 tx_no_csum; + u64 tx_csum; + u64 rx_csum_none; + u64 rx_csum_complete; + u64 rx_csum_error; +}; + +enum ionic_lif_state_flags { + IONIC_LIF_INITED, + IONIC_LIF_SW_DEBUG_STATS, + IONIC_LIF_UP, + IONIC_LIF_LINK_CHECK_REQUESTED, + IONIC_LIF_QUEUE_RESET, + + /* leave this as last */ + IONIC_LIF_STATE_SIZE +}; + +#define IONIC_LIF_NAME_MAX_SZ 32 +struct ionic_lif { + char name[IONIC_LIF_NAME_MAX_SZ]; + struct list_head list; + struct net_device *netdev; + DECLARE_BITMAP(state, IONIC_LIF_STATE_SIZE); + struct ionic *ionic; + bool registered; + unsigned int index; + unsigned int hw_index; + unsigned int kern_pid; + u64 __iomem *kern_dbpage; + spinlock_t adminq_lock; /* lock for AdminQ operations */ + struct ionic_qcq *adminqcq; + struct ionic_qcq *notifyqcq; + struct ionic_qcqst *txqcqs; + struct ionic_qcqst *rxqcqs; + u64 last_eid; + unsigned int neqs; + unsigned int nxqs; + unsigned int ntxq_descs; + unsigned int nrxq_descs; + u32 rx_copybreak; + unsigned int rx_mode; + u64 hw_features; + bool mc_overflow; + unsigned int nmcast; + bool uc_overflow; + unsigned int nucast; + + struct ionic_lif_info *info; + dma_addr_t info_pa; + u32 info_sz; + + u16 rss_types; + u8 rss_hash_key[IONIC_RSS_HASH_KEY_SIZE]; + u8 *rss_ind_tbl; + dma_addr_t rss_ind_tbl_pa; + u32 rss_ind_tbl_sz; + + struct ionic_rx_filters rx_filters; + struct ionic_deferred deferred; + unsigned long *dbid_inuse; + unsigned int dbid_count; + struct dentry *dentry; + u32 rx_coalesce_usecs; + u32 flags; + struct work_struct tx_timeout_work; +}; + +#define lif_to_txqcq(lif, i) ((lif)->txqcqs[i].qcq) +#define lif_to_rxqcq(lif, i) ((lif)->rxqcqs[i].qcq) +#define lif_to_txq(lif, i) (&lif_to_txqcq((lif), i)->q) +#define lif_to_rxq(lif, i) (&lif_to_txqcq((lif), i)->q) + +static inline int ionic_wait_for_bit(struct ionic_lif *lif, int bitname) +{ + unsigned long tlimit = jiffies + HZ; + + while (test_and_set_bit(bitname, lif->state) && + time_before(jiffies, tlimit)) + usleep_range(100, 200); + + return test_bit(bitname, lif->state); +} + +static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs) +{ + u32 mult = le32_to_cpu(ionic->ident.dev.intr_coal_mult); + u32 div = le32_to_cpu(ionic->ident.dev.intr_coal_div); + + /* Div-by-zero should never be an issue, but check anyway */ + if (!div || !mult) + return 0; + + /* Round up in case usecs is close to the next hw unit */ + usecs += (div / mult) >> 1; + + /* Convert from usecs to device units */ + return (usecs * mult) / div; +} + +static inline u32 ionic_coal_hw_to_usec(struct ionic *ionic, u32 units) +{ + u32 mult = le32_to_cpu(ionic->ident.dev.intr_coal_mult); + u32 div = le32_to_cpu(ionic->ident.dev.intr_coal_div); + + /* Div-by-zero should never be an issue, but check anyway */ + if (!div || !mult) + return 0; + + /* Convert from device units to usec */ + return (units * div) / mult; +} + +int ionic_lifs_alloc(struct ionic *ionic); +void ionic_lifs_free(struct ionic *ionic); +void ionic_lifs_deinit(struct ionic *ionic); +int ionic_lifs_init(struct ionic *ionic); +int ionic_lifs_register(struct ionic *ionic); +void ionic_lifs_unregister(struct ionic *ionic); +int ionic_lif_identify(struct ionic *ionic, u8 lif_type, + union ionic_lif_identity *lif_ident); +int ionic_lifs_size(struct ionic *ionic); +int ionic_lif_rss_config(struct ionic_lif *lif, u16 types, + const u8 *key, const u32 *indir); + +int ionic_open(struct net_device *netdev); +int ionic_stop(struct net_device *netdev); +int ionic_reset_queues(struct ionic_lif *lif); + +static inline void debug_stats_txq_post(struct ionic_qcq *qcq, + struct ionic_txq_desc *desc, bool dbell) +{ + u8 num_sg_elems = ((le64_to_cpu(desc->cmd) >> IONIC_TXQ_DESC_NSGE_SHIFT) + & IONIC_TXQ_DESC_NSGE_MASK); + + qcq->q.dbell_count += dbell; + + if (num_sg_elems > (IONIC_MAX_NUM_SG_CNTR - 1)) + num_sg_elems = IONIC_MAX_NUM_SG_CNTR - 1; + + qcq->stats->tx.sg_cntr[num_sg_elems]++; +} + +static inline void debug_stats_napi_poll(struct ionic_qcq *qcq, + unsigned int work_done) +{ + qcq->napi_stats.poll_count++; + + if (work_done > (IONIC_MAX_NUM_NAPI_CNTR - 1)) + work_done = IONIC_MAX_NUM_NAPI_CNTR - 1; + + qcq->napi_stats.work_done_cntr[work_done]++; +} + +#define DEBUG_STATS_CQE_CNT(cq) ((cq)->compl_count++) +#define DEBUG_STATS_RX_BUFF_CNT(qcq) ((qcq)->stats->rx.buffers_posted++) +#define DEBUG_STATS_INTR_REARM(intr) ((intr)->rearm_count++) +#define DEBUG_STATS_TXQ_POST(qcq, txdesc, dbell) \ + debug_stats_txq_post(qcq, txdesc, dbell) +#define DEBUG_STATS_NAPI_POLL(qcq, work_done) \ + debug_stats_napi_poll(qcq, work_done) + +#endif /* _IONIC_LIF_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c new file mode 100644 index 000000000000..5ec67f3f1853 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -0,0 +1,549 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#include <linux/module.h> +#include <linux/version.h> +#include <linux/netdevice.h> +#include <linux/utsname.h> + +#include "ionic.h" +#include "ionic_bus.h" +#include "ionic_lif.h" +#include "ionic_debugfs.h" + +MODULE_DESCRIPTION(IONIC_DRV_DESCRIPTION); +MODULE_AUTHOR("Pensando Systems, Inc"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(IONIC_DRV_VERSION); + +static const char *ionic_error_to_str(enum ionic_status_code code) +{ + switch (code) { + case IONIC_RC_SUCCESS: + return "IONIC_RC_SUCCESS"; + case IONIC_RC_EVERSION: + return "IONIC_RC_EVERSION"; + case IONIC_RC_EOPCODE: + return "IONIC_RC_EOPCODE"; + case IONIC_RC_EIO: + return "IONIC_RC_EIO"; + case IONIC_RC_EPERM: + return "IONIC_RC_EPERM"; + case IONIC_RC_EQID: + return "IONIC_RC_EQID"; + case IONIC_RC_EQTYPE: + return "IONIC_RC_EQTYPE"; + case IONIC_RC_ENOENT: + return "IONIC_RC_ENOENT"; + case IONIC_RC_EINTR: + return "IONIC_RC_EINTR"; + case IONIC_RC_EAGAIN: + return "IONIC_RC_EAGAIN"; + case IONIC_RC_ENOMEM: + return "IONIC_RC_ENOMEM"; + case IONIC_RC_EFAULT: + return "IONIC_RC_EFAULT"; + case IONIC_RC_EBUSY: + return "IONIC_RC_EBUSY"; + case IONIC_RC_EEXIST: + return "IONIC_RC_EEXIST"; + case IONIC_RC_EINVAL: + return "IONIC_RC_EINVAL"; + case IONIC_RC_ENOSPC: + return "IONIC_RC_ENOSPC"; + case IONIC_RC_ERANGE: + return "IONIC_RC_ERANGE"; + case IONIC_RC_BAD_ADDR: + return "IONIC_RC_BAD_ADDR"; + case IONIC_RC_DEV_CMD: + return "IONIC_RC_DEV_CMD"; + case IONIC_RC_ERROR: + return "IONIC_RC_ERROR"; + case IONIC_RC_ERDMA: + return "IONIC_RC_ERDMA"; + default: + return "IONIC_RC_UNKNOWN"; + } +} + +static int ionic_error_to_errno(enum ionic_status_code code) +{ + switch (code) { + case IONIC_RC_SUCCESS: + return 0; + case IONIC_RC_EVERSION: + case IONIC_RC_EQTYPE: + case IONIC_RC_EQID: + case IONIC_RC_EINVAL: + return -EINVAL; + case IONIC_RC_EPERM: + return -EPERM; + case IONIC_RC_ENOENT: + return -ENOENT; + case IONIC_RC_EAGAIN: + return -EAGAIN; + case IONIC_RC_ENOMEM: + return -ENOMEM; + case IONIC_RC_EFAULT: + return -EFAULT; + case IONIC_RC_EBUSY: + return -EBUSY; + case IONIC_RC_EEXIST: + return -EEXIST; + case IONIC_RC_ENOSPC: + return -ENOSPC; + case IONIC_RC_ERANGE: + return -ERANGE; + case IONIC_RC_BAD_ADDR: + return -EFAULT; + case IONIC_RC_EOPCODE: + case IONIC_RC_EINTR: + case IONIC_RC_DEV_CMD: + case IONIC_RC_ERROR: + case IONIC_RC_ERDMA: + case IONIC_RC_EIO: + default: + return -EIO; + } +} + +static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode) +{ + switch (opcode) { + case IONIC_CMD_NOP: + return "IONIC_CMD_NOP"; + case IONIC_CMD_INIT: + return "IONIC_CMD_INIT"; + case IONIC_CMD_RESET: + return "IONIC_CMD_RESET"; + case IONIC_CMD_IDENTIFY: + return "IONIC_CMD_IDENTIFY"; + case IONIC_CMD_GETATTR: + return "IONIC_CMD_GETATTR"; + case IONIC_CMD_SETATTR: + return "IONIC_CMD_SETATTR"; + case IONIC_CMD_PORT_IDENTIFY: + return "IONIC_CMD_PORT_IDENTIFY"; + case IONIC_CMD_PORT_INIT: + return "IONIC_CMD_PORT_INIT"; + case IONIC_CMD_PORT_RESET: + return "IONIC_CMD_PORT_RESET"; + case IONIC_CMD_PORT_GETATTR: + return "IONIC_CMD_PORT_GETATTR"; + case IONIC_CMD_PORT_SETATTR: + return "IONIC_CMD_PORT_SETATTR"; + case IONIC_CMD_LIF_INIT: + return "IONIC_CMD_LIF_INIT"; + case IONIC_CMD_LIF_RESET: + return "IONIC_CMD_LIF_RESET"; + case IONIC_CMD_LIF_IDENTIFY: + return "IONIC_CMD_LIF_IDENTIFY"; + case IONIC_CMD_LIF_SETATTR: + return "IONIC_CMD_LIF_SETATTR"; + case IONIC_CMD_LIF_GETATTR: + return "IONIC_CMD_LIF_GETATTR"; + case IONIC_CMD_RX_MODE_SET: + return "IONIC_CMD_RX_MODE_SET"; + case IONIC_CMD_RX_FILTER_ADD: + return "IONIC_CMD_RX_FILTER_ADD"; + case IONIC_CMD_RX_FILTER_DEL: + return "IONIC_CMD_RX_FILTER_DEL"; + case IONIC_CMD_Q_INIT: + return "IONIC_CMD_Q_INIT"; + case IONIC_CMD_Q_CONTROL: + return "IONIC_CMD_Q_CONTROL"; + case IONIC_CMD_RDMA_RESET_LIF: + return "IONIC_CMD_RDMA_RESET_LIF"; + case IONIC_CMD_RDMA_CREATE_EQ: + return "IONIC_CMD_RDMA_CREATE_EQ"; + case IONIC_CMD_RDMA_CREATE_CQ: + return "IONIC_CMD_RDMA_CREATE_CQ"; + case IONIC_CMD_RDMA_CREATE_ADMINQ: + return "IONIC_CMD_RDMA_CREATE_ADMINQ"; + case IONIC_CMD_FW_DOWNLOAD: + return "IONIC_CMD_FW_DOWNLOAD"; + case IONIC_CMD_FW_CONTROL: + return "IONIC_CMD_FW_CONTROL"; + default: + return "DEVCMD_UNKNOWN"; + } +} + +static void ionic_adminq_flush(struct ionic_lif *lif) +{ + struct ionic_queue *adminq = &lif->adminqcq->q; + + spin_lock(&lif->adminq_lock); + + while (adminq->tail != adminq->head) { + memset(adminq->tail->desc, 0, sizeof(union ionic_adminq_cmd)); + adminq->tail->cb = NULL; + adminq->tail->cb_arg = NULL; + adminq->tail = adminq->tail->next; + } + spin_unlock(&lif->adminq_lock); +} + +static int ionic_adminq_check_err(struct ionic_lif *lif, + struct ionic_admin_ctx *ctx, + bool timeout) +{ + struct net_device *netdev = lif->netdev; + const char *opcode_str; + const char *status_str; + int err = 0; + + if (ctx->comp.comp.status || timeout) { + opcode_str = ionic_opcode_to_str(ctx->cmd.cmd.opcode); + status_str = ionic_error_to_str(ctx->comp.comp.status); + err = timeout ? -ETIMEDOUT : + ionic_error_to_errno(ctx->comp.comp.status); + + netdev_err(netdev, "%s (%d) failed: %s (%d)\n", + opcode_str, ctx->cmd.cmd.opcode, + timeout ? "TIMEOUT" : status_str, err); + + if (timeout) + ionic_adminq_flush(lif); + } + + return err; +} + +static void ionic_adminq_cb(struct ionic_queue *q, + struct ionic_desc_info *desc_info, + struct ionic_cq_info *cq_info, void *cb_arg) +{ + struct ionic_admin_ctx *ctx = cb_arg; + struct ionic_admin_comp *comp; + struct device *dev; + + if (!ctx) + return; + + comp = cq_info->cq_desc; + dev = &q->lif->netdev->dev; + + memcpy(&ctx->comp, comp, sizeof(*comp)); + + dev_dbg(dev, "comp admin queue command:\n"); + dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1, + &ctx->comp, sizeof(ctx->comp), true); + + complete_all(&ctx->work); +} + +static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) +{ + struct ionic_queue *adminq = &lif->adminqcq->q; + int err = 0; + + WARN_ON(in_interrupt()); + + spin_lock(&lif->adminq_lock); + if (!ionic_q_has_space(adminq, 1)) { + err = -ENOSPC; + goto err_out; + } + + memcpy(adminq->head->desc, &ctx->cmd, sizeof(ctx->cmd)); + + dev_dbg(&lif->netdev->dev, "post admin queue command:\n"); + dynamic_hex_dump("cmd ", DUMP_PREFIX_OFFSET, 16, 1, + &ctx->cmd, sizeof(ctx->cmd), true); + + ionic_q_post(adminq, true, ionic_adminq_cb, ctx); + +err_out: + spin_unlock(&lif->adminq_lock); + + return err; +} + +int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) +{ + struct net_device *netdev = lif->netdev; + unsigned long remaining; + const char *name; + int err; + + err = ionic_adminq_post(lif, ctx); + if (err) { + name = ionic_opcode_to_str(ctx->cmd.cmd.opcode); + netdev_err(netdev, "Posting of %s (%d) failed: %d\n", + name, ctx->cmd.cmd.opcode, err); + return err; + } + + remaining = wait_for_completion_timeout(&ctx->work, + HZ * (ulong)DEVCMD_TIMEOUT); + return ionic_adminq_check_err(lif, ctx, (remaining == 0)); +} + +int ionic_napi(struct napi_struct *napi, int budget, ionic_cq_cb cb, + ionic_cq_done_cb done_cb, void *done_arg) +{ + struct ionic_qcq *qcq = napi_to_qcq(napi); + struct ionic_cq *cq = &qcq->cq; + u32 work_done, flags = 0; + + work_done = ionic_cq_service(cq, budget, cb, done_cb, done_arg); + + if (work_done < budget && napi_complete_done(napi, work_done)) { + flags |= IONIC_INTR_CRED_UNMASK; + DEBUG_STATS_INTR_REARM(cq->bound_intr); + } + + if (work_done || flags) { + flags |= IONIC_INTR_CRED_RESET_COALESCE; + ionic_intr_credits(cq->lif->ionic->idev.intr_ctrl, + cq->bound_intr->index, + work_done, flags); + } + + DEBUG_STATS_NAPI_POLL(qcq, work_done); + + return work_done; +} + +int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds) +{ + struct ionic_dev *idev = &ionic->idev; + unsigned long start_time; + unsigned long max_wait; + unsigned long duration; + int opcode; + int done; + int err; + + WARN_ON(in_interrupt()); + + /* Wait for dev cmd to complete, retrying if we get EAGAIN, + * but don't wait any longer than max_seconds. + */ + max_wait = jiffies + (max_seconds * HZ); +try_again: + start_time = jiffies; + do { + done = ionic_dev_cmd_done(idev); + if (done) + break; + msleep(20); + } while (!done && time_before(jiffies, max_wait)); + duration = jiffies - start_time; + + opcode = idev->dev_cmd_regs->cmd.cmd.opcode; + dev_dbg(ionic->dev, "DEVCMD %s (%d) done=%d took %ld secs (%ld jiffies)\n", + ionic_opcode_to_str(opcode), opcode, + done, duration / HZ, duration); + + if (!done && !time_before(jiffies, max_wait)) { + dev_warn(ionic->dev, "DEVCMD %s (%d) timeout after %ld secs\n", + ionic_opcode_to_str(opcode), opcode, max_seconds); + return -ETIMEDOUT; + } + + err = ionic_dev_cmd_status(&ionic->idev); + if (err) { + if (err == IONIC_RC_EAGAIN && !time_after(jiffies, max_wait)) { + dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) retrying...\n", + ionic_opcode_to_str(opcode), opcode, + ionic_error_to_str(err), err); + + msleep(1000); + iowrite32(0, &idev->dev_cmd_regs->done); + iowrite32(1, &idev->dev_cmd_regs->doorbell); + goto try_again; + } + + dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n", + ionic_opcode_to_str(opcode), opcode, + ionic_error_to_str(err), err); + + return ionic_error_to_errno(err); + } + + return 0; +} + +int ionic_setup(struct ionic *ionic) +{ + int err; + + err = ionic_dev_setup(ionic); + if (err) + return err; + + return 0; +} + +int ionic_identify(struct ionic *ionic) +{ + struct ionic_identity *ident = &ionic->ident; + struct ionic_dev *idev = &ionic->idev; + size_t sz; + int err; + + memset(ident, 0, sizeof(*ident)); + + ident->drv.os_type = cpu_to_le32(IONIC_OS_TYPE_LINUX); + strncpy(ident->drv.driver_ver_str, IONIC_DRV_VERSION, + sizeof(ident->drv.driver_ver_str) - 1); + + mutex_lock(&ionic->dev_cmd_lock); + + sz = min(sizeof(ident->drv), sizeof(idev->dev_cmd_regs->data)); + memcpy_toio(&idev->dev_cmd_regs->data, &ident->drv, sz); + + ionic_dev_cmd_identify(idev, IONIC_IDENTITY_VERSION_1); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + if (!err) { + sz = min(sizeof(ident->dev), sizeof(idev->dev_cmd_regs->data)); + memcpy_fromio(&ident->dev, &idev->dev_cmd_regs->data, sz); + } + + mutex_unlock(&ionic->dev_cmd_lock); + + if (err) + goto err_out_unmap; + + ionic_debugfs_add_ident(ionic); + + return 0; + +err_out_unmap: + return err; +} + +int ionic_init(struct ionic *ionic) +{ + struct ionic_dev *idev = &ionic->idev; + int err; + + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_init(idev); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + + return err; +} + +int ionic_reset(struct ionic *ionic) +{ + struct ionic_dev *idev = &ionic->idev; + int err; + + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_reset(idev); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + + return err; +} + +int ionic_port_identify(struct ionic *ionic) +{ + struct ionic_identity *ident = &ionic->ident; + struct ionic_dev *idev = &ionic->idev; + size_t sz; + int err; + + mutex_lock(&ionic->dev_cmd_lock); + + ionic_dev_cmd_port_identify(idev); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + if (!err) { + sz = min(sizeof(ident->port), sizeof(idev->dev_cmd_regs->data)); + memcpy_fromio(&ident->port, &idev->dev_cmd_regs->data, sz); + } + + mutex_unlock(&ionic->dev_cmd_lock); + + return err; +} + +int ionic_port_init(struct ionic *ionic) +{ + struct ionic_identity *ident = &ionic->ident; + struct ionic_dev *idev = &ionic->idev; + size_t sz; + int err; + + if (idev->port_info) + return 0; + + idev->port_info_sz = ALIGN(sizeof(*idev->port_info), PAGE_SIZE); + idev->port_info = dma_alloc_coherent(ionic->dev, idev->port_info_sz, + &idev->port_info_pa, + GFP_KERNEL); + if (!idev->port_info) { + dev_err(ionic->dev, "Failed to allocate port info, aborting\n"); + return -ENOMEM; + } + + sz = min(sizeof(ident->port.config), sizeof(idev->dev_cmd_regs->data)); + + mutex_lock(&ionic->dev_cmd_lock); + + memcpy_toio(&idev->dev_cmd_regs->data, &ident->port.config, sz); + ionic_dev_cmd_port_init(idev); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + + ionic_dev_cmd_port_state(&ionic->idev, IONIC_PORT_ADMIN_STATE_UP); + (void)ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + + mutex_unlock(&ionic->dev_cmd_lock); + if (err) { + dev_err(ionic->dev, "Failed to init port\n"); + dma_free_coherent(ionic->dev, idev->port_info_sz, + idev->port_info, idev->port_info_pa); + idev->port_info = NULL; + idev->port_info_pa = 0; + } + + return err; +} + +int ionic_port_reset(struct ionic *ionic) +{ + struct ionic_dev *idev = &ionic->idev; + int err; + + if (!idev->port_info) + return 0; + + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_port_reset(idev); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + + dma_free_coherent(ionic->dev, idev->port_info_sz, + idev->port_info, idev->port_info_pa); + + idev->port_info = NULL; + idev->port_info_pa = 0; + + if (err) + dev_err(ionic->dev, "Failed to reset port\n"); + + return err; +} + +static int __init ionic_init_module(void) +{ + pr_info("%s %s, ver %s\n", + IONIC_DRV_NAME, IONIC_DRV_DESCRIPTION, IONIC_DRV_VERSION); + ionic_debugfs_create(); + return ionic_bus_register_driver(); +} + +static void __exit ionic_cleanup_module(void) +{ + ionic_bus_unregister_driver(); + ionic_debugfs_destroy(); + + pr_info("%s removed\n", IONIC_DRV_NAME); +} + +module_init(ionic_init_module); +module_exit(ionic_cleanup_module); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_regs.h b/drivers/net/ethernet/pensando/ionic/ionic_regs.h new file mode 100644 index 000000000000..03ee5a36472b --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_regs.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB OR BSD-2-Clause */ +/* Copyright (c) 2018-2019 Pensando Systems, Inc. All rights reserved. */ + +#ifndef IONIC_REGS_H +#define IONIC_REGS_H + +#include <linux/io.h> + +/** struct ionic_intr - interrupt control register set. + * @coal_init: coalesce timer initial value. + * @mask: interrupt mask value. + * @credits: interrupt credit count and return. + * @mask_assert: interrupt mask value on assert. + * @coal: coalesce timer time remaining. + */ +struct ionic_intr { + u32 coal_init; + u32 mask; + u32 credits; + u32 mask_assert; + u32 coal; + u32 rsvd[3]; +}; + +#define IONIC_INTR_CTRL_REGS_MAX 2048 +#define IONIC_INTR_CTRL_COAL_MAX 0x3F + +/** enum ionic_intr_mask_vals - valid values for mask and mask_assert. + * @IONIC_INTR_MASK_CLEAR: unmask interrupt. + * @IONIC_INTR_MASK_SET: mask interrupt. + */ +enum ionic_intr_mask_vals { + IONIC_INTR_MASK_CLEAR = 0, + IONIC_INTR_MASK_SET = 1, +}; + +/** enum ionic_intr_credits_bits - bitwise composition of credits values. + * @IONIC_INTR_CRED_COUNT: bit mask of credit count, no shift needed. + * @IONIC_INTR_CRED_COUNT_SIGNED: bit mask of credit count, including sign bit. + * @IONIC_INTR_CRED_UNMASK: unmask the interrupt. + * @IONIC_INTR_CRED_RESET_COALESCE: reset the coalesce timer. + * @IONIC_INTR_CRED_REARM: unmask the and reset the timer. + */ +enum ionic_intr_credits_bits { + IONIC_INTR_CRED_COUNT = 0x7fffu, + IONIC_INTR_CRED_COUNT_SIGNED = 0xffffu, + IONIC_INTR_CRED_UNMASK = 0x10000u, + IONIC_INTR_CRED_RESET_COALESCE = 0x20000u, + IONIC_INTR_CRED_REARM = (IONIC_INTR_CRED_UNMASK | + IONIC_INTR_CRED_RESET_COALESCE), +}; + +static inline void ionic_intr_coal_init(struct ionic_intr __iomem *intr_ctrl, + int intr_idx, u32 coal) +{ + iowrite32(coal, &intr_ctrl[intr_idx].coal_init); +} + +static inline void ionic_intr_mask(struct ionic_intr __iomem *intr_ctrl, + int intr_idx, u32 mask) +{ + iowrite32(mask, &intr_ctrl[intr_idx].mask); +} + +static inline void ionic_intr_credits(struct ionic_intr __iomem *intr_ctrl, + int intr_idx, u32 cred, u32 flags) +{ + if (WARN_ON_ONCE(cred > IONIC_INTR_CRED_COUNT)) { + cred = ioread32(&intr_ctrl[intr_idx].credits); + cred &= IONIC_INTR_CRED_COUNT_SIGNED; + } + + iowrite32(cred | flags, &intr_ctrl[intr_idx].credits); +} + +static inline void ionic_intr_clean(struct ionic_intr __iomem *intr_ctrl, + int intr_idx) +{ + u32 cred; + + cred = ioread32(&intr_ctrl[intr_idx].credits); + cred &= IONIC_INTR_CRED_COUNT_SIGNED; + cred |= IONIC_INTR_CRED_RESET_COALESCE; + iowrite32(cred, &intr_ctrl[intr_idx].credits); +} + +static inline void ionic_intr_mask_assert(struct ionic_intr __iomem *intr_ctrl, + int intr_idx, u32 mask) +{ + iowrite32(mask, &intr_ctrl[intr_idx].mask_assert); +} + +/** enum ionic_dbell_bits - bitwise composition of dbell values. + * + * @IONIC_DBELL_QID_MASK: unshifted mask of valid queue id bits. + * @IONIC_DBELL_QID_SHIFT: queue id shift amount in dbell value. + * @IONIC_DBELL_QID: macro to build QID component of dbell value. + * + * @IONIC_DBELL_RING_MASK: unshifted mask of valid ring bits. + * @IONIC_DBELL_RING_SHIFT: ring shift amount in dbell value. + * @IONIC_DBELL_RING: macro to build ring component of dbell value. + * + * @IONIC_DBELL_RING_0: ring zero dbell component value. + * @IONIC_DBELL_RING_1: ring one dbell component value. + * @IONIC_DBELL_RING_2: ring two dbell component value. + * @IONIC_DBELL_RING_3: ring three dbell component value. + * + * @IONIC_DBELL_INDEX_MASK: bit mask of valid index bits, no shift needed. + */ +enum ionic_dbell_bits { + IONIC_DBELL_QID_MASK = 0xffffff, + IONIC_DBELL_QID_SHIFT = 24, + +#define IONIC_DBELL_QID(n) \ + (((u64)(n) & IONIC_DBELL_QID_MASK) << IONIC_DBELL_QID_SHIFT) + + IONIC_DBELL_RING_MASK = 0x7, + IONIC_DBELL_RING_SHIFT = 16, + +#define IONIC_DBELL_RING(n) \ + (((u64)(n) & IONIC_DBELL_RING_MASK) << IONIC_DBELL_RING_SHIFT) + + IONIC_DBELL_RING_0 = 0, + IONIC_DBELL_RING_1 = IONIC_DBELL_RING(1), + IONIC_DBELL_RING_2 = IONIC_DBELL_RING(2), + IONIC_DBELL_RING_3 = IONIC_DBELL_RING(3), + + IONIC_DBELL_INDEX_MASK = 0xffff, +}; + +static inline void ionic_dbell_ring(u64 __iomem *db_page, int qtype, u64 val) +{ + writeq(val, &db_page[qtype]); +} + +#endif /* IONIC_REGS_H */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c new file mode 100644 index 000000000000..7a093f148ee5 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> + +#include "ionic.h" +#include "ionic_lif.h" +#include "ionic_rx_filter.h" + +void ionic_rx_filter_free(struct ionic_lif *lif, struct ionic_rx_filter *f) +{ + struct device *dev = lif->ionic->dev; + + hlist_del(&f->by_id); + hlist_del(&f->by_hash); + devm_kfree(dev, f); +} + +int ionic_rx_filter_del(struct ionic_lif *lif, struct ionic_rx_filter *f) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_filter_del = { + .opcode = IONIC_CMD_RX_FILTER_DEL, + .filter_id = cpu_to_le32(f->filter_id), + }, + }; + + return ionic_adminq_post_wait(lif, &ctx); +} + +int ionic_rx_filters_init(struct ionic_lif *lif) +{ + unsigned int i; + + spin_lock_init(&lif->rx_filters.lock); + + for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) { + INIT_HLIST_HEAD(&lif->rx_filters.by_hash[i]); + INIT_HLIST_HEAD(&lif->rx_filters.by_id[i]); + } + + return 0; +} + +void ionic_rx_filters_deinit(struct ionic_lif *lif) +{ + struct ionic_rx_filter *f; + struct hlist_head *head; + struct hlist_node *tmp; + unsigned int i; + + for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) { + head = &lif->rx_filters.by_id[i]; + hlist_for_each_entry_safe(f, tmp, head, by_id) + ionic_rx_filter_free(lif, f); + } +} + +int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index, + u32 hash, struct ionic_admin_ctx *ctx) +{ + struct device *dev = lif->ionic->dev; + struct ionic_rx_filter_add_cmd *ac; + struct ionic_rx_filter *f; + struct hlist_head *head; + unsigned int key; + + ac = &ctx->cmd.rx_filter_add; + + switch (le16_to_cpu(ac->match)) { + case IONIC_RX_FILTER_MATCH_VLAN: + key = le16_to_cpu(ac->vlan.vlan); + break; + case IONIC_RX_FILTER_MATCH_MAC: + key = *(u32 *)ac->mac.addr; + break; + case IONIC_RX_FILTER_MATCH_MAC_VLAN: + key = le16_to_cpu(ac->mac_vlan.vlan); + break; + default: + return -EINVAL; + } + + f = devm_kzalloc(dev, sizeof(*f), GFP_KERNEL); + if (!f) + return -ENOMEM; + + f->flow_id = flow_id; + f->filter_id = le32_to_cpu(ctx->comp.rx_filter_add.filter_id); + f->rxq_index = rxq_index; + memcpy(&f->cmd, ac, sizeof(f->cmd)); + + INIT_HLIST_NODE(&f->by_hash); + INIT_HLIST_NODE(&f->by_id); + + spin_lock_bh(&lif->rx_filters.lock); + + key = hash_32(key, IONIC_RX_FILTER_HASH_BITS); + head = &lif->rx_filters.by_hash[key]; + hlist_add_head(&f->by_hash, head); + + key = f->filter_id & IONIC_RX_FILTER_HLISTS_MASK; + head = &lif->rx_filters.by_id[key]; + hlist_add_head(&f->by_id, head); + + spin_unlock_bh(&lif->rx_filters.lock); + + return 0; +} + +struct ionic_rx_filter *ionic_rx_filter_by_vlan(struct ionic_lif *lif, u16 vid) +{ + struct ionic_rx_filter *f; + struct hlist_head *head; + unsigned int key; + + key = hash_32(vid, IONIC_RX_FILTER_HASH_BITS); + head = &lif->rx_filters.by_hash[key]; + + hlist_for_each_entry(f, head, by_hash) { + if (le16_to_cpu(f->cmd.match) != IONIC_RX_FILTER_MATCH_VLAN) + continue; + if (le16_to_cpu(f->cmd.vlan.vlan) == vid) + return f; + } + + return NULL; +} + +struct ionic_rx_filter *ionic_rx_filter_by_addr(struct ionic_lif *lif, + const u8 *addr) +{ + struct ionic_rx_filter *f; + struct hlist_head *head; + unsigned int key; + + key = hash_32(*(u32 *)addr, IONIC_RX_FILTER_HASH_BITS); + head = &lif->rx_filters.by_hash[key]; + + hlist_for_each_entry(f, head, by_hash) { + if (le16_to_cpu(f->cmd.match) != IONIC_RX_FILTER_MATCH_MAC) + continue; + if (memcmp(addr, f->cmd.mac.addr, ETH_ALEN) == 0) + return f; + } + + return NULL; +} diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h new file mode 100644 index 000000000000..b6aec9c19918 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#ifndef _IONIC_RX_FILTER_H_ +#define _IONIC_RX_FILTER_H_ + +#define IONIC_RXQ_INDEX_ANY (0xFFFF) +struct ionic_rx_filter { + u32 flow_id; + u32 filter_id; + u16 rxq_index; + struct ionic_rx_filter_add_cmd cmd; + struct hlist_node by_hash; + struct hlist_node by_id; +}; + +#define IONIC_RX_FILTER_HASH_BITS 10 +#define IONIC_RX_FILTER_HLISTS BIT(IONIC_RX_FILTER_HASH_BITS) +#define IONIC_RX_FILTER_HLISTS_MASK (IONIC_RX_FILTER_HLISTS - 1) +struct ionic_rx_filters { + spinlock_t lock; /* filter list lock */ + struct hlist_head by_hash[IONIC_RX_FILTER_HLISTS]; /* by skb hash */ + struct hlist_head by_id[IONIC_RX_FILTER_HLISTS]; /* by filter_id */ +}; + +void ionic_rx_filter_free(struct ionic_lif *lif, struct ionic_rx_filter *f); +int ionic_rx_filter_del(struct ionic_lif *lif, struct ionic_rx_filter *f); +int ionic_rx_filters_init(struct ionic_lif *lif); +void ionic_rx_filters_deinit(struct ionic_lif *lif); +int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index, + u32 hash, struct ionic_admin_ctx *ctx); +struct ionic_rx_filter *ionic_rx_filter_by_vlan(struct ionic_lif *lif, u16 vid); +struct ionic_rx_filter *ionic_rx_filter_by_addr(struct ionic_lif *lif, const u8 *addr); + +#endif /* _IONIC_RX_FILTER_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c new file mode 100644 index 000000000000..e2907884f843 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/netdevice.h> + +#include "ionic.h" +#include "ionic_lif.h" +#include "ionic_stats.h" + +static const struct ionic_stat_desc ionic_lif_stats_desc[] = { + IONIC_LIF_STAT_DESC(tx_packets), + IONIC_LIF_STAT_DESC(tx_bytes), + IONIC_LIF_STAT_DESC(rx_packets), + IONIC_LIF_STAT_DESC(rx_bytes), + IONIC_LIF_STAT_DESC(tx_tso), + IONIC_LIF_STAT_DESC(tx_no_csum), + IONIC_LIF_STAT_DESC(tx_csum), + IONIC_LIF_STAT_DESC(rx_csum_none), + IONIC_LIF_STAT_DESC(rx_csum_complete), + IONIC_LIF_STAT_DESC(rx_csum_error), +}; + +static const struct ionic_stat_desc ionic_tx_stats_desc[] = { + IONIC_TX_STAT_DESC(pkts), + IONIC_TX_STAT_DESC(bytes), + IONIC_TX_STAT_DESC(clean), + IONIC_TX_STAT_DESC(dma_map_err), + IONIC_TX_STAT_DESC(linearize), + IONIC_TX_STAT_DESC(frags), +}; + +static const struct ionic_stat_desc ionic_rx_stats_desc[] = { + IONIC_RX_STAT_DESC(pkts), + IONIC_RX_STAT_DESC(bytes), + IONIC_RX_STAT_DESC(dma_map_err), + IONIC_RX_STAT_DESC(alloc_err), + IONIC_RX_STAT_DESC(csum_none), + IONIC_RX_STAT_DESC(csum_complete), + IONIC_RX_STAT_DESC(csum_error), +}; + +static const struct ionic_stat_desc ionic_txq_stats_desc[] = { + IONIC_TX_Q_STAT_DESC(stop), + IONIC_TX_Q_STAT_DESC(wake), + IONIC_TX_Q_STAT_DESC(drop), + IONIC_TX_Q_STAT_DESC(dbell_count), +}; + +static const struct ionic_stat_desc ionic_dbg_cq_stats_desc[] = { + IONIC_CQ_STAT_DESC(compl_count), +}; + +static const struct ionic_stat_desc ionic_dbg_intr_stats_desc[] = { + IONIC_INTR_STAT_DESC(rearm_count), +}; + +static const struct ionic_stat_desc ionic_dbg_napi_stats_desc[] = { + IONIC_NAPI_STAT_DESC(poll_count), +}; + +#define IONIC_NUM_LIF_STATS ARRAY_SIZE(ionic_lif_stats_desc) +#define IONIC_NUM_TX_STATS ARRAY_SIZE(ionic_tx_stats_desc) +#define IONIC_NUM_RX_STATS ARRAY_SIZE(ionic_rx_stats_desc) +#define IONIC_NUM_TX_Q_STATS ARRAY_SIZE(ionic_txq_stats_desc) +#define IONIC_NUM_DBG_CQ_STATS ARRAY_SIZE(ionic_dbg_cq_stats_desc) +#define IONIC_NUM_DBG_INTR_STATS ARRAY_SIZE(ionic_dbg_intr_stats_desc) +#define IONIC_NUM_DBG_NAPI_STATS ARRAY_SIZE(ionic_dbg_napi_stats_desc) + +#define MAX_Q(lif) ((lif)->netdev->real_num_tx_queues) + +static void ionic_get_lif_stats(struct ionic_lif *lif, + struct ionic_lif_sw_stats *stats) +{ + struct ionic_tx_stats *tstats; + struct ionic_rx_stats *rstats; + struct ionic_qcq *txqcq; + struct ionic_qcq *rxqcq; + int q_num; + + memset(stats, 0, sizeof(*stats)); + + for (q_num = 0; q_num < MAX_Q(lif); q_num++) { + txqcq = lif_to_txqcq(lif, q_num); + if (txqcq && txqcq->stats) { + tstats = &txqcq->stats->tx; + stats->tx_packets += tstats->pkts; + stats->tx_bytes += tstats->bytes; + stats->tx_tso += tstats->tso; + stats->tx_no_csum += tstats->no_csum; + stats->tx_csum += tstats->csum; + } + + rxqcq = lif_to_rxqcq(lif, q_num); + if (rxqcq && rxqcq->stats) { + rstats = &rxqcq->stats->rx; + stats->rx_packets += rstats->pkts; + stats->rx_bytes += rstats->bytes; + stats->rx_csum_none += rstats->csum_none; + stats->rx_csum_complete += rstats->csum_complete; + stats->rx_csum_error += rstats->csum_error; + } + } +} + +static u64 ionic_sw_stats_get_count(struct ionic_lif *lif) +{ + u64 total = 0; + + /* lif stats */ + total += IONIC_NUM_LIF_STATS; + + /* tx stats */ + total += MAX_Q(lif) * IONIC_NUM_TX_STATS; + + /* rx stats */ + total += MAX_Q(lif) * IONIC_NUM_RX_STATS; + + if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + /* tx debug stats */ + total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS + + IONIC_NUM_TX_Q_STATS + + IONIC_NUM_DBG_INTR_STATS + + IONIC_MAX_NUM_SG_CNTR); + + /* rx debug stats */ + total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS + + IONIC_NUM_DBG_INTR_STATS + + IONIC_NUM_DBG_NAPI_STATS + + IONIC_MAX_NUM_NAPI_CNTR); + } + + return total; +} + +static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf) +{ + int i, q_num; + + for (i = 0; i < IONIC_NUM_LIF_STATS; i++) { + snprintf(*buf, ETH_GSTRING_LEN, ionic_lif_stats_desc[i].name); + *buf += ETH_GSTRING_LEN; + } + for (q_num = 0; q_num < MAX_Q(lif); q_num++) { + for (i = 0; i < IONIC_NUM_TX_STATS; i++) { + snprintf(*buf, ETH_GSTRING_LEN, "tx_%d_%s", + q_num, ionic_tx_stats_desc[i].name); + *buf += ETH_GSTRING_LEN; + } + + if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) { + snprintf(*buf, ETH_GSTRING_LEN, + "txq_%d_%s", + q_num, + ionic_txq_stats_desc[i].name); + *buf += ETH_GSTRING_LEN; + } + for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { + snprintf(*buf, ETH_GSTRING_LEN, + "txq_%d_cq_%s", + q_num, + ionic_dbg_cq_stats_desc[i].name); + *buf += ETH_GSTRING_LEN; + } + for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { + snprintf(*buf, ETH_GSTRING_LEN, + "txq_%d_intr_%s", + q_num, + ionic_dbg_intr_stats_desc[i].name); + *buf += ETH_GSTRING_LEN; + } + for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) { + snprintf(*buf, ETH_GSTRING_LEN, + "txq_%d_sg_cntr_%d", + q_num, i); + *buf += ETH_GSTRING_LEN; + } + } + } + for (q_num = 0; q_num < MAX_Q(lif); q_num++) { + for (i = 0; i < IONIC_NUM_RX_STATS; i++) { + snprintf(*buf, ETH_GSTRING_LEN, + "rx_%d_%s", + q_num, ionic_rx_stats_desc[i].name); + *buf += ETH_GSTRING_LEN; + } + + if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { + snprintf(*buf, ETH_GSTRING_LEN, + "rxq_%d_cq_%s", + q_num, + ionic_dbg_cq_stats_desc[i].name); + *buf += ETH_GSTRING_LEN; + } + for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { + snprintf(*buf, ETH_GSTRING_LEN, + "rxq_%d_intr_%s", + q_num, + ionic_dbg_intr_stats_desc[i].name); + *buf += ETH_GSTRING_LEN; + } + for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) { + snprintf(*buf, ETH_GSTRING_LEN, + "rxq_%d_napi_%s", + q_num, + ionic_dbg_napi_stats_desc[i].name); + *buf += ETH_GSTRING_LEN; + } + for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) { + snprintf(*buf, ETH_GSTRING_LEN, + "rxq_%d_napi_work_done_%d", + q_num, i); + *buf += ETH_GSTRING_LEN; + } + } + } +} + +static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf) +{ + struct ionic_lif_sw_stats lif_stats; + struct ionic_qcq *txqcq, *rxqcq; + int i, q_num; + + ionic_get_lif_stats(lif, &lif_stats); + + for (i = 0; i < IONIC_NUM_LIF_STATS; i++) { + **buf = IONIC_READ_STAT64(&lif_stats, &ionic_lif_stats_desc[i]); + (*buf)++; + } + + for (q_num = 0; q_num < MAX_Q(lif); q_num++) { + txqcq = lif_to_txqcq(lif, q_num); + + for (i = 0; i < IONIC_NUM_TX_STATS; i++) { + **buf = IONIC_READ_STAT64(&txqcq->stats->tx, + &ionic_tx_stats_desc[i]); + (*buf)++; + } + + if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) { + **buf = IONIC_READ_STAT64(&txqcq->q, + &ionic_txq_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { + **buf = IONIC_READ_STAT64(&txqcq->cq, + &ionic_dbg_cq_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { + **buf = IONIC_READ_STAT64(&txqcq->intr, + &ionic_dbg_intr_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) { + **buf = txqcq->stats->tx.sg_cntr[i]; + (*buf)++; + } + } + } + + for (q_num = 0; q_num < MAX_Q(lif); q_num++) { + rxqcq = lif_to_rxqcq(lif, q_num); + + for (i = 0; i < IONIC_NUM_RX_STATS; i++) { + **buf = IONIC_READ_STAT64(&rxqcq->stats->rx, + &ionic_rx_stats_desc[i]); + (*buf)++; + } + + if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { + **buf = IONIC_READ_STAT64(&rxqcq->cq, + &ionic_dbg_cq_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { + **buf = IONIC_READ_STAT64(&rxqcq->intr, + &ionic_dbg_intr_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) { + **buf = IONIC_READ_STAT64(&rxqcq->napi_stats, + &ionic_dbg_napi_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) { + **buf = rxqcq->napi_stats.work_done_cntr[i]; + (*buf)++; + } + } + } +} + +const struct ionic_stats_group_intf ionic_stats_groups[] = { + /* SW Stats group */ + { + .get_strings = ionic_sw_stats_get_strings, + .get_values = ionic_sw_stats_get_values, + .get_count = ionic_sw_stats_get_count, + }, + /* Add more stat groups here */ +}; + +const int ionic_num_stats_grps = ARRAY_SIZE(ionic_stats_groups); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.h b/drivers/net/ethernet/pensando/ionic/ionic_stats.h new file mode 100644 index 000000000000..d2c1122a2c6e --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#ifndef _IONIC_STATS_H_ +#define _IONIC_STATS_H_ + +#define IONIC_STAT_TO_OFFSET(type, stat_name) (offsetof(type, stat_name)) + +#define IONIC_STAT_DESC(type, stat_name) { \ + .name = #stat_name, \ + .offset = IONIC_STAT_TO_OFFSET(type, stat_name) \ +} + +#define IONIC_LIF_STAT_DESC(stat_name) \ + IONIC_STAT_DESC(struct ionic_lif_sw_stats, stat_name) + +#define IONIC_TX_STAT_DESC(stat_name) \ + IONIC_STAT_DESC(struct ionic_tx_stats, stat_name) + +#define IONIC_RX_STAT_DESC(stat_name) \ + IONIC_STAT_DESC(struct ionic_rx_stats, stat_name) + +#define IONIC_TX_Q_STAT_DESC(stat_name) \ + IONIC_STAT_DESC(struct ionic_queue, stat_name) + +#define IONIC_CQ_STAT_DESC(stat_name) \ + IONIC_STAT_DESC(struct ionic_cq, stat_name) + +#define IONIC_INTR_STAT_DESC(stat_name) \ + IONIC_STAT_DESC(struct ionic_intr_info, stat_name) + +#define IONIC_NAPI_STAT_DESC(stat_name) \ + IONIC_STAT_DESC(struct ionic_napi_stats, stat_name) + +/* Interface structure for a particalar stats group */ +struct ionic_stats_group_intf { + void (*get_strings)(struct ionic_lif *lif, u8 **buf); + void (*get_values)(struct ionic_lif *lif, u64 **buf); + u64 (*get_count)(struct ionic_lif *lif); +}; + +extern const struct ionic_stats_group_intf ionic_stats_groups[]; +extern const int ionic_num_stats_grps; + +#define IONIC_READ_STAT64(base_ptr, desc_ptr) \ + (*((u64 *)(((u8 *)(base_ptr)) + (desc_ptr)->offset))) + +struct ionic_stat_desc { + char name[ETH_GSTRING_LEN]; + u64 offset; +}; + +#endif /* _IONIC_STATS_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c new file mode 100644 index 000000000000..ab6663d94f42 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -0,0 +1,925 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/if_vlan.h> +#include <net/ip6_checksum.h> + +#include "ionic.h" +#include "ionic_lif.h" +#include "ionic_txrx.h" + +static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info, + struct ionic_cq_info *cq_info, void *cb_arg); + +static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell, + ionic_desc_cb cb_func, void *cb_arg) +{ + DEBUG_STATS_TXQ_POST(q_to_qcq(q), q->head->desc, ring_dbell); + + ionic_q_post(q, ring_dbell, cb_func, cb_arg); +} + +static inline void ionic_rxq_post(struct ionic_queue *q, bool ring_dbell, + ionic_desc_cb cb_func, void *cb_arg) +{ + ionic_q_post(q, ring_dbell, cb_func, cb_arg); + + DEBUG_STATS_RX_BUFF_CNT(q_to_qcq(q)); +} + +static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q) +{ + return netdev_get_tx_queue(q->lif->netdev, q->index); +} + +static void ionic_rx_recycle(struct ionic_queue *q, struct ionic_desc_info *desc_info, + struct sk_buff *skb) +{ + struct ionic_rxq_desc *old = desc_info->desc; + struct ionic_rxq_desc *new = q->head->desc; + + new->addr = old->addr; + new->len = old->len; + + ionic_rxq_post(q, true, ionic_rx_clean, skb); +} + +static bool ionic_rx_copybreak(struct ionic_queue *q, struct ionic_desc_info *desc_info, + struct ionic_cq_info *cq_info, struct sk_buff **skb) +{ + struct ionic_rxq_comp *comp = cq_info->cq_desc; + struct ionic_rxq_desc *desc = desc_info->desc; + struct net_device *netdev = q->lif->netdev; + struct device *dev = q->lif->ionic->dev; + struct sk_buff *new_skb; + u16 clen, dlen; + + clen = le16_to_cpu(comp->len); + dlen = le16_to_cpu(desc->len); + if (clen > q->lif->rx_copybreak) { + dma_unmap_single(dev, (dma_addr_t)le64_to_cpu(desc->addr), + dlen, DMA_FROM_DEVICE); + return false; + } + + new_skb = netdev_alloc_skb_ip_align(netdev, clen); + if (!new_skb) { + dma_unmap_single(dev, (dma_addr_t)le64_to_cpu(desc->addr), + dlen, DMA_FROM_DEVICE); + return false; + } + + dma_sync_single_for_cpu(dev, (dma_addr_t)le64_to_cpu(desc->addr), + clen, DMA_FROM_DEVICE); + + memcpy(new_skb->data, (*skb)->data, clen); + + ionic_rx_recycle(q, desc_info, *skb); + *skb = new_skb; + + return true; +} + +static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info, + struct ionic_cq_info *cq_info, void *cb_arg) +{ + struct ionic_rxq_comp *comp = cq_info->cq_desc; + struct ionic_qcq *qcq = q_to_qcq(q); + struct sk_buff *skb = cb_arg; + struct ionic_rx_stats *stats; + struct net_device *netdev; + + stats = q_to_rx_stats(q); + netdev = q->lif->netdev; + + if (comp->status) { + ionic_rx_recycle(q, desc_info, skb); + return; + } + + if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state))) { + /* no packet processing while resetting */ + ionic_rx_recycle(q, desc_info, skb); + return; + } + + stats->pkts++; + stats->bytes += le16_to_cpu(comp->len); + + ionic_rx_copybreak(q, desc_info, cq_info, &skb); + + skb_put(skb, le16_to_cpu(comp->len)); + skb->protocol = eth_type_trans(skb, netdev); + + skb_record_rx_queue(skb, q->index); + + if (netdev->features & NETIF_F_RXHASH) { + switch (comp->pkt_type_color & IONIC_RXQ_COMP_PKT_TYPE_MASK) { + case IONIC_PKT_TYPE_IPV4: + case IONIC_PKT_TYPE_IPV6: + skb_set_hash(skb, le32_to_cpu(comp->rss_hash), + PKT_HASH_TYPE_L3); + break; + case IONIC_PKT_TYPE_IPV4_TCP: + case IONIC_PKT_TYPE_IPV6_TCP: + case IONIC_PKT_TYPE_IPV4_UDP: + case IONIC_PKT_TYPE_IPV6_UDP: + skb_set_hash(skb, le32_to_cpu(comp->rss_hash), + PKT_HASH_TYPE_L4); + break; + } + } + + if (netdev->features & NETIF_F_RXCSUM) { + if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = (__wsum)le16_to_cpu(comp->csum); + stats->csum_complete++; + } + } else { + stats->csum_none++; + } + + if ((comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_TCP_BAD) || + (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_UDP_BAD) || + (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_IP_BAD)) + stats->csum_error++; + + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) { + if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_VLAN) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + le16_to_cpu(comp->vlan_tci)); + } + + napi_gro_receive(&qcq->napi, skb); +} + +static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) +{ + struct ionic_rxq_comp *comp = cq_info->cq_desc; + struct ionic_queue *q = cq->bound_q; + struct ionic_desc_info *desc_info; + + if (!color_match(comp->pkt_type_color, cq->done_color)) + return false; + + /* check for empty queue */ + if (q->tail->index == q->head->index) + return false; + + desc_info = q->tail; + if (desc_info->index != le16_to_cpu(comp->comp_index)) + return false; + + q->tail = desc_info->next; + + /* clean the related q entry, only one per qc completion */ + ionic_rx_clean(q, desc_info, cq_info, desc_info->cb_arg); + + desc_info->cb = NULL; + desc_info->cb_arg = NULL; + + return true; +} + +static u32 ionic_rx_walk_cq(struct ionic_cq *rxcq, u32 limit) +{ + u32 work_done = 0; + + while (ionic_rx_service(rxcq, rxcq->tail)) { + if (rxcq->tail->last) + rxcq->done_color = !rxcq->done_color; + rxcq->tail = rxcq->tail->next; + DEBUG_STATS_CQE_CNT(rxcq); + + if (++work_done >= limit) + break; + } + + return work_done; +} + +void ionic_rx_flush(struct ionic_cq *cq) +{ + struct ionic_dev *idev = &cq->lif->ionic->idev; + u32 work_done; + + work_done = ionic_rx_walk_cq(cq, cq->num_descs); + + if (work_done) + ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index, + work_done, IONIC_INTR_CRED_RESET_COALESCE); +} + +static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q, unsigned int len, + dma_addr_t *dma_addr) +{ + struct ionic_lif *lif = q->lif; + struct ionic_rx_stats *stats; + struct net_device *netdev; + struct sk_buff *skb; + struct device *dev; + + netdev = lif->netdev; + dev = lif->ionic->dev; + stats = q_to_rx_stats(q); + skb = netdev_alloc_skb_ip_align(netdev, len); + if (!skb) { + net_warn_ratelimited("%s: SKB alloc failed on %s!\n", + netdev->name, q->name); + stats->alloc_err++; + return NULL; + } + + *dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, *dma_addr)) { + dev_kfree_skb(skb); + net_warn_ratelimited("%s: DMA single map failed on %s!\n", + netdev->name, q->name); + stats->dma_map_err++; + return NULL; + } + + return skb; +} + +#define IONIC_RX_RING_DOORBELL_STRIDE ((1 << 2) - 1) + +void ionic_rx_fill(struct ionic_queue *q) +{ + struct net_device *netdev = q->lif->netdev; + struct ionic_rxq_desc *desc; + struct sk_buff *skb; + dma_addr_t dma_addr; + bool ring_doorbell; + unsigned int len; + unsigned int i; + + len = netdev->mtu + ETH_HLEN; + + for (i = ionic_q_space_avail(q); i; i--) { + skb = ionic_rx_skb_alloc(q, len, &dma_addr); + if (!skb) + return; + + desc = q->head->desc; + desc->addr = cpu_to_le64(dma_addr); + desc->len = cpu_to_le16(len); + desc->opcode = IONIC_RXQ_DESC_OPCODE_SIMPLE; + + ring_doorbell = ((q->head->index + 1) & + IONIC_RX_RING_DOORBELL_STRIDE) == 0; + + ionic_rxq_post(q, ring_doorbell, ionic_rx_clean, skb); + } +} + +static void ionic_rx_fill_cb(void *arg) +{ + ionic_rx_fill(arg); +} + +void ionic_rx_empty(struct ionic_queue *q) +{ + struct device *dev = q->lif->ionic->dev; + struct ionic_desc_info *cur; + struct ionic_rxq_desc *desc; + + for (cur = q->tail; cur != q->head; cur = cur->next) { + desc = cur->desc; + dma_unmap_single(dev, le64_to_cpu(desc->addr), + le16_to_cpu(desc->len), DMA_FROM_DEVICE); + dev_kfree_skb(cur->cb_arg); + cur->cb_arg = NULL; + } +} + +int ionic_rx_napi(struct napi_struct *napi, int budget) +{ + struct ionic_qcq *qcq = napi_to_qcq(napi); + struct ionic_cq *rxcq = napi_to_cq(napi); + unsigned int qi = rxcq->bound_q->index; + struct ionic_dev *idev; + struct ionic_lif *lif; + struct ionic_cq *txcq; + u32 work_done = 0; + u32 flags = 0; + + lif = rxcq->bound_q->lif; + idev = &lif->ionic->idev; + txcq = &lif->txqcqs[qi].qcq->cq; + + ionic_tx_flush(txcq); + + work_done = ionic_rx_walk_cq(rxcq, budget); + + if (work_done) + ionic_rx_fill_cb(rxcq->bound_q); + + if (work_done < budget && napi_complete_done(napi, work_done)) { + flags |= IONIC_INTR_CRED_UNMASK; + DEBUG_STATS_INTR_REARM(rxcq->bound_intr); + } + + if (work_done || flags) { + flags |= IONIC_INTR_CRED_RESET_COALESCE; + ionic_intr_credits(idev->intr_ctrl, rxcq->bound_intr->index, + work_done, flags); + } + + DEBUG_STATS_NAPI_POLL(qcq, work_done); + + return work_done; +} + +static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, void *data, size_t len) +{ + struct ionic_tx_stats *stats = q_to_tx_stats(q); + struct device *dev = q->lif->ionic->dev; + dma_addr_t dma_addr; + + dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + net_warn_ratelimited("%s: DMA single map failed on %s!\n", + q->lif->netdev->name, q->name); + stats->dma_map_err++; + return 0; + } + return dma_addr; +} + +static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q, const skb_frag_t *frag, + size_t offset, size_t len) +{ + struct ionic_tx_stats *stats = q_to_tx_stats(q); + struct device *dev = q->lif->ionic->dev; + dma_addr_t dma_addr; + + dma_addr = skb_frag_dma_map(dev, frag, offset, len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + net_warn_ratelimited("%s: DMA frag map failed on %s!\n", + q->lif->netdev->name, q->name); + stats->dma_map_err++; + } + return dma_addr; +} + +static void ionic_tx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info, + struct ionic_cq_info *cq_info, void *cb_arg) +{ + struct ionic_txq_sg_desc *sg_desc = desc_info->sg_desc; + struct ionic_txq_sg_elem *elem = sg_desc->elems; + struct ionic_tx_stats *stats = q_to_tx_stats(q); + struct ionic_txq_desc *desc = desc_info->desc; + struct device *dev = q->lif->ionic->dev; + u8 opcode, flags, nsge; + u16 queue_index; + unsigned int i; + u64 addr; + + decode_txq_desc_cmd(le64_to_cpu(desc->cmd), + &opcode, &flags, &nsge, &addr); + + /* use unmap_single only if either this is not TSO, + * or this is first descriptor of a TSO + */ + if (opcode != IONIC_TXQ_DESC_OPCODE_TSO || + flags & IONIC_TXQ_DESC_FLAG_TSO_SOT) + dma_unmap_single(dev, (dma_addr_t)addr, + le16_to_cpu(desc->len), DMA_TO_DEVICE); + else + dma_unmap_page(dev, (dma_addr_t)addr, + le16_to_cpu(desc->len), DMA_TO_DEVICE); + + for (i = 0; i < nsge; i++, elem++) + dma_unmap_page(dev, (dma_addr_t)le64_to_cpu(elem->addr), + le16_to_cpu(elem->len), DMA_TO_DEVICE); + + if (cb_arg) { + struct sk_buff *skb = cb_arg; + u32 len = skb->len; + + queue_index = skb_get_queue_mapping(skb); + if (unlikely(__netif_subqueue_stopped(q->lif->netdev, + queue_index))) { + netif_wake_subqueue(q->lif->netdev, queue_index); + q->wake++; + } + dev_kfree_skb_any(skb); + stats->clean++; + netdev_tx_completed_queue(q_to_ndq(q), 1, len); + } +} + +void ionic_tx_flush(struct ionic_cq *cq) +{ + struct ionic_txq_comp *comp = cq->tail->cq_desc; + struct ionic_dev *idev = &cq->lif->ionic->idev; + struct ionic_queue *q = cq->bound_q; + struct ionic_desc_info *desc_info; + unsigned int work_done = 0; + + /* walk the completed cq entries */ + while (work_done < cq->num_descs && + color_match(comp->color, cq->done_color)) { + + /* clean the related q entries, there could be + * several q entries completed for each cq completion + */ + do { + desc_info = q->tail; + q->tail = desc_info->next; + ionic_tx_clean(q, desc_info, cq->tail, + desc_info->cb_arg); + desc_info->cb = NULL; + desc_info->cb_arg = NULL; + } while (desc_info->index != le16_to_cpu(comp->comp_index)); + + if (cq->tail->last) + cq->done_color = !cq->done_color; + + cq->tail = cq->tail->next; + comp = cq->tail->cq_desc; + DEBUG_STATS_CQE_CNT(cq); + + work_done++; + } + + if (work_done) + ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index, + work_done, 0); +} + +static int ionic_tx_tcp_inner_pseudo_csum(struct sk_buff *skb) +{ + int err; + + err = skb_cow_head(skb, 0); + if (err) + return err; + + if (skb->protocol == cpu_to_be16(ETH_P_IP)) { + inner_ip_hdr(skb)->check = 0; + inner_tcp_hdr(skb)->check = + ~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr, + inner_ip_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + } else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) { + inner_tcp_hdr(skb)->check = + ~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr, + &inner_ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + } + + return 0; +} + +static int ionic_tx_tcp_pseudo_csum(struct sk_buff *skb) +{ + int err; + + err = skb_cow_head(skb, 0); + if (err) + return err; + + if (skb->protocol == cpu_to_be16(ETH_P_IP)) { + ip_hdr(skb)->check = 0; + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + } else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) { + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + } + + return 0; +} + +static void ionic_tx_tso_post(struct ionic_queue *q, struct ionic_txq_desc *desc, + struct sk_buff *skb, + dma_addr_t addr, u8 nsge, u16 len, + unsigned int hdrlen, unsigned int mss, + bool outer_csum, + u16 vlan_tci, bool has_vlan, + bool start, bool done) +{ + u8 flags = 0; + u64 cmd; + + flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0; + flags |= outer_csum ? IONIC_TXQ_DESC_FLAG_ENCAP : 0; + flags |= start ? IONIC_TXQ_DESC_FLAG_TSO_SOT : 0; + flags |= done ? IONIC_TXQ_DESC_FLAG_TSO_EOT : 0; + + cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_TSO, flags, nsge, addr); + desc->cmd = cpu_to_le64(cmd); + desc->len = cpu_to_le16(len); + desc->vlan_tci = cpu_to_le16(vlan_tci); + desc->hdr_len = cpu_to_le16(hdrlen); + desc->mss = cpu_to_le16(mss); + + if (done) { + skb_tx_timestamp(skb); + netdev_tx_sent_queue(q_to_ndq(q), skb->len); + ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb); + } else { + ionic_txq_post(q, false, ionic_tx_clean, NULL); + } +} + +static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q, + struct ionic_txq_sg_elem **elem) +{ + struct ionic_txq_sg_desc *sg_desc = q->head->sg_desc; + struct ionic_txq_desc *desc = q->head->desc; + + *elem = sg_desc->elems; + return desc; +} + +static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) +{ + struct ionic_tx_stats *stats = q_to_tx_stats(q); + struct ionic_desc_info *abort = q->head; + struct device *dev = q->lif->ionic->dev; + struct ionic_desc_info *rewind = abort; + struct ionic_txq_sg_elem *elem; + struct ionic_txq_desc *desc; + unsigned int frag_left = 0; + unsigned int offset = 0; + unsigned int len_left; + dma_addr_t desc_addr; + unsigned int hdrlen; + unsigned int nfrags; + unsigned int seglen; + u64 total_bytes = 0; + u64 total_pkts = 0; + unsigned int left; + unsigned int len; + unsigned int mss; + skb_frag_t *frag; + bool start, done; + bool outer_csum; + bool has_vlan; + u16 desc_len; + u8 desc_nsge; + u16 vlan_tci; + bool encap; + int err; + + mss = skb_shinfo(skb)->gso_size; + nfrags = skb_shinfo(skb)->nr_frags; + len_left = skb->len - skb_headlen(skb); + outer_csum = (skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM) || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); + has_vlan = !!skb_vlan_tag_present(skb); + vlan_tci = skb_vlan_tag_get(skb); + encap = skb->encapsulation; + + /* Preload inner-most TCP csum field with IP pseudo hdr + * calculated with IP length set to zero. HW will later + * add in length to each TCP segment resulting from the TSO. + */ + + if (encap) + err = ionic_tx_tcp_inner_pseudo_csum(skb); + else + err = ionic_tx_tcp_pseudo_csum(skb); + if (err) + return err; + + if (encap) + hdrlen = skb_inner_transport_header(skb) - skb->data + + inner_tcp_hdrlen(skb); + else + hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); + + seglen = hdrlen + mss; + left = skb_headlen(skb); + + desc = ionic_tx_tso_next(q, &elem); + start = true; + + /* Chop skb->data up into desc segments */ + + while (left > 0) { + len = min(seglen, left); + frag_left = seglen - len; + desc_addr = ionic_tx_map_single(q, skb->data + offset, len); + if (dma_mapping_error(dev, desc_addr)) + goto err_out_abort; + desc_len = len; + desc_nsge = 0; + left -= len; + offset += len; + if (nfrags > 0 && frag_left > 0) + continue; + done = (nfrags == 0 && left == 0); + ionic_tx_tso_post(q, desc, skb, + desc_addr, desc_nsge, desc_len, + hdrlen, mss, + outer_csum, + vlan_tci, has_vlan, + start, done); + total_pkts++; + total_bytes += start ? len : len + hdrlen; + desc = ionic_tx_tso_next(q, &elem); + start = false; + seglen = mss; + } + + /* Chop skb frags into desc segments */ + + for (frag = skb_shinfo(skb)->frags; len_left; frag++) { + offset = 0; + left = skb_frag_size(frag); + len_left -= left; + nfrags--; + stats->frags++; + + while (left > 0) { + if (frag_left > 0) { + len = min(frag_left, left); + frag_left -= len; + elem->addr = + cpu_to_le64(ionic_tx_map_frag(q, frag, + offset, len)); + if (dma_mapping_error(dev, elem->addr)) + goto err_out_abort; + elem->len = cpu_to_le16(len); + elem++; + desc_nsge++; + left -= len; + offset += len; + if (nfrags > 0 && frag_left > 0) + continue; + done = (nfrags == 0 && left == 0); + ionic_tx_tso_post(q, desc, skb, desc_addr, + desc_nsge, desc_len, + hdrlen, mss, outer_csum, + vlan_tci, has_vlan, + start, done); + total_pkts++; + total_bytes += start ? len : len + hdrlen; + desc = ionic_tx_tso_next(q, &elem); + start = false; + } else { + len = min(mss, left); + frag_left = mss - len; + desc_addr = ionic_tx_map_frag(q, frag, + offset, len); + if (dma_mapping_error(dev, desc_addr)) + goto err_out_abort; + desc_len = len; + desc_nsge = 0; + left -= len; + offset += len; + if (nfrags > 0 && frag_left > 0) + continue; + done = (nfrags == 0 && left == 0); + ionic_tx_tso_post(q, desc, skb, desc_addr, + desc_nsge, desc_len, + hdrlen, mss, outer_csum, + vlan_tci, has_vlan, + start, done); + total_pkts++; + total_bytes += start ? len : len + hdrlen; + desc = ionic_tx_tso_next(q, &elem); + start = false; + } + } + } + + stats->pkts += total_pkts; + stats->bytes += total_bytes; + stats->tso++; + + return 0; + +err_out_abort: + while (rewind->desc != q->head->desc) { + ionic_tx_clean(q, rewind, NULL, NULL); + rewind = rewind->next; + } + q->head = abort; + + return -ENOMEM; +} + +static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb) +{ + struct ionic_tx_stats *stats = q_to_tx_stats(q); + struct ionic_txq_desc *desc = q->head->desc; + struct device *dev = q->lif->ionic->dev; + dma_addr_t dma_addr; + bool has_vlan; + u8 flags = 0; + bool encap; + u64 cmd; + + has_vlan = !!skb_vlan_tag_present(skb); + encap = skb->encapsulation; + + dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb)); + if (dma_mapping_error(dev, dma_addr)) + return -ENOMEM; + + flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0; + flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0; + + cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL, + flags, skb_shinfo(skb)->nr_frags, dma_addr); + desc->cmd = cpu_to_le64(cmd); + desc->len = cpu_to_le16(skb_headlen(skb)); + desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb)); + desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb)); + desc->csum_offset = cpu_to_le16(skb->csum_offset); + + if (skb->csum_not_inet) + stats->crc32_csum++; + else + stats->csum++; + + return 0; +} + +static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb) +{ + struct ionic_tx_stats *stats = q_to_tx_stats(q); + struct ionic_txq_desc *desc = q->head->desc; + struct device *dev = q->lif->ionic->dev; + dma_addr_t dma_addr; + bool has_vlan; + u8 flags = 0; + bool encap; + u64 cmd; + + has_vlan = !!skb_vlan_tag_present(skb); + encap = skb->encapsulation; + + dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb)); + if (dma_mapping_error(dev, dma_addr)) + return -ENOMEM; + + flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0; + flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0; + + cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_NONE, + flags, skb_shinfo(skb)->nr_frags, dma_addr); + desc->cmd = cpu_to_le64(cmd); + desc->len = cpu_to_le16(skb_headlen(skb)); + desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb)); + + stats->no_csum++; + + return 0; +} + +static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb) +{ + struct ionic_txq_sg_desc *sg_desc = q->head->sg_desc; + unsigned int len_left = skb->len - skb_headlen(skb); + struct ionic_txq_sg_elem *elem = sg_desc->elems; + struct ionic_tx_stats *stats = q_to_tx_stats(q); + struct device *dev = q->lif->ionic->dev; + dma_addr_t dma_addr; + skb_frag_t *frag; + u16 len; + + for (frag = skb_shinfo(skb)->frags; len_left; frag++, elem++) { + len = skb_frag_size(frag); + elem->len = cpu_to_le16(len); + dma_addr = ionic_tx_map_frag(q, frag, 0, len); + if (dma_mapping_error(dev, dma_addr)) + return -ENOMEM; + elem->addr = cpu_to_le64(dma_addr); + len_left -= len; + stats->frags++; + } + + return 0; +} + +static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb) +{ + struct ionic_tx_stats *stats = q_to_tx_stats(q); + int err; + + /* set up the initial descriptor */ + if (skb->ip_summed == CHECKSUM_PARTIAL) + err = ionic_tx_calc_csum(q, skb); + else + err = ionic_tx_calc_no_csum(q, skb); + if (err) + return err; + + /* add frags */ + err = ionic_tx_skb_frags(q, skb); + if (err) + return err; + + skb_tx_timestamp(skb); + stats->pkts++; + stats->bytes += skb->len; + + netdev_tx_sent_queue(q_to_ndq(q), skb->len); + ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb); + + return 0; +} + +static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) +{ + struct ionic_tx_stats *stats = q_to_tx_stats(q); + int err; + + /* If TSO, need roundup(skb->len/mss) descs */ + if (skb_is_gso(skb)) + return (skb->len / skb_shinfo(skb)->gso_size) + 1; + + /* If non-TSO, just need 1 desc and nr_frags sg elems */ + if (skb_shinfo(skb)->nr_frags <= IONIC_TX_MAX_SG_ELEMS) + return 1; + + /* Too many frags, so linearize */ + err = skb_linearize(skb); + if (err) + return err; + + stats->linearize++; + + /* Need 1 desc and zero sg elems */ + return 1; +} + +static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs) +{ + int stopped = 0; + + if (unlikely(!ionic_q_has_space(q, ndescs))) { + netif_stop_subqueue(q->lif->netdev, q->index); + q->stop++; + stopped = 1; + + /* Might race with ionic_tx_clean, check again */ + smp_rmb(); + if (ionic_q_has_space(q, ndescs)) { + netif_wake_subqueue(q->lif->netdev, q->index); + stopped = 0; + } + } + + return stopped; +} + +netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + u16 queue_index = skb_get_queue_mapping(skb); + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_queue *q; + int ndescs; + int err; + + if (unlikely(!test_bit(IONIC_LIF_UP, lif->state))) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + if (unlikely(!lif_to_txqcq(lif, queue_index))) + queue_index = 0; + q = lif_to_txq(lif, queue_index); + + ndescs = ionic_tx_descs_needed(q, skb); + if (ndescs < 0) + goto err_out_drop; + + if (unlikely(ionic_maybe_stop_tx(q, ndescs))) + return NETDEV_TX_BUSY; + + if (skb_is_gso(skb)) + err = ionic_tx_tso(q, skb); + else + err = ionic_tx(q, skb); + + if (err) + goto err_out_drop; + + /* Stop the queue if there aren't descriptors for the next packet. + * Since our SG lists per descriptor take care of most of the possible + * fragmentation, we don't need to have many descriptors available. + */ + ionic_maybe_stop_tx(q, 4); + + return NETDEV_TX_OK; + +err_out_drop: + q->stop++; + q->drop++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h new file mode 100644 index 000000000000..53775c62c85a --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ + +#ifndef _IONIC_TXRX_H_ +#define _IONIC_TXRX_H_ + +void ionic_rx_flush(struct ionic_cq *cq); +void ionic_tx_flush(struct ionic_cq *cq); + +void ionic_rx_fill(struct ionic_queue *q); +void ionic_rx_empty(struct ionic_queue *q); +int ionic_rx_napi(struct napi_struct *napi, int budget); +netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev); + +#endif /* _IONIC_TXRX_H_ */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index f380fae8799d..65ec16a31658 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -382,7 +382,7 @@ qed_iwarp2roce_state(enum qed_iwarp_qp_state state) } } -const static char *iwarp_state_names[] = { +static const char * const iwarp_state_names[] = { "IDLE", "RTS", "TERMINATE", diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index ac9195add811..a9c89d5d8898 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -193,16 +193,12 @@ enum ravb_reg { GECMR = 0x05b0, MAHR = 0x05c0, MALR = 0x05c8, - TROCR = 0x0700, /* Undocumented? */ - CDCR = 0x0708, /* Undocumented? */ - LCCR = 0x0710, /* Undocumented? */ + TROCR = 0x0700, /* R-Car Gen3 only */ CEFCR = 0x0740, FRECR = 0x0748, TSFRCR = 0x0750, TLFRCR = 0x0758, RFCR = 0x0760, - CERCR = 0x0768, /* Undocumented? */ - CEECR = 0x0770, /* Undocumented? */ MAFCR = 0x0778, }; @@ -220,7 +216,6 @@ enum CCC_BIT { CCC_CSEL_HPB = 0x00010000, CCC_CSEL_ETH_TX = 0x00020000, CCC_CSEL_GMII_REF = 0x00030000, - CCC_BOC = 0x00100000, /* Undocumented? */ CCC_LBME = 0x01000000, }; @@ -317,7 +312,7 @@ enum UFCD_BIT { /* SFO */ enum SFO_BIT { - SFO_FPB = 0x0000003F, + SFO_FBP = 0x0000003F, }; /* RTC */ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 6cacd5e893ac..de9aa8c47f1c 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -447,12 +447,6 @@ static int ravb_dmac_init(struct net_device *ndev) ravb_ring_format(ndev, RAVB_BE); ravb_ring_format(ndev, RAVB_NC); -#if defined(__LITTLE_ENDIAN) - ravb_modify(ndev, CCC, CCC_BOC, 0); -#else - ravb_modify(ndev, CCC, CCC_BOC, CCC_BOC); -#endif - /* Set AVB RX */ ravb_write(ndev, RCR_EFFS | RCR_ENCF | RCR_ETS0 | RCR_ESF | 0x18000000, RCR); @@ -1627,17 +1621,10 @@ static struct net_device_stats *ravb_get_stats(struct net_device *ndev) stats0 = &priv->stats[RAVB_BE]; stats1 = &priv->stats[RAVB_NC]; - nstats->tx_dropped += ravb_read(ndev, TROCR); - ravb_write(ndev, 0, TROCR); /* (write clear) */ - nstats->collisions += ravb_read(ndev, CDCR); - ravb_write(ndev, 0, CDCR); /* (write clear) */ - nstats->tx_carrier_errors += ravb_read(ndev, LCCR); - ravb_write(ndev, 0, LCCR); /* (write clear) */ - - nstats->tx_carrier_errors += ravb_read(ndev, CERCR); - ravb_write(ndev, 0, CERCR); /* (write clear) */ - nstats->tx_carrier_errors += ravb_read(ndev, CEECR); - ravb_write(ndev, 0, CEECR); /* (write clear) */ + if (priv->chip_id == RCAR_GEN3) { + nstats->tx_dropped += ravb_read(ndev, TROCR); + ravb_write(ndev, 0, TROCR); /* (write clear) */ + } nstats->rx_packets = stats0->rx_packets + stats1->rx_packets; nstats->tx_packets = stats0->tx_packets + stats1->tx_packets; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 2c5d3f5b84dd..786b158bd305 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2189,6 +2189,9 @@ static int rocker_router_fib_event(struct notifier_block *nb, struct rocker_fib_event_work *fib_work; struct fib_notifier_info *info = ptr; + if (!net_eq(info->net, &init_net)) + return NOTIFY_DONE; + if (info->family != AF_INET) return NOTIFY_DONE; diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 49aa56ca09cc..912bbb6515b2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -360,6 +360,8 @@ struct dma_features { unsigned int sphen; unsigned int vlins; unsigned int dvlan; + unsigned int l3l4fnum; + unsigned int arpoffsel; }; /* GMAC TX FIFO is 8K, Rx FIFO is 16K */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 7357b8bdc128..5923ca62d793 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -44,9 +44,11 @@ #define XGMAC_CONFIG_CST BIT(2) #define XGMAC_CONFIG_ACS BIT(1) #define XGMAC_CONFIG_RE BIT(0) -#define XGMAC_CORE_INIT_RX 0 +#define XGMAC_CORE_INIT_RX (XGMAC_CONFIG_GPSLCE | XGMAC_CONFIG_WD | \ + (XGMAC_JUMBO_LEN << XGMAC_CONFIG_GPSL_SHIFT)) #define XGMAC_PACKET_FILTER 0x00000008 #define XGMAC_FILTER_RA BIT(31) +#define XGMAC_FILTER_IPFE BIT(20) #define XGMAC_FILTER_VTFE BIT(16) #define XGMAC_FILTER_HPF BIT(10) #define XGMAC_FILTER_PCF BIT(7) @@ -119,6 +121,7 @@ #define XGMAC_HWFEAT_VLHASH BIT(4) #define XGMAC_HWFEAT_GMIISEL BIT(1) #define XGMAC_HW_FEATURE1 0x00000120 +#define XGMAC_HWFEAT_L3L4FNUM GENMASK(30, 27) #define XGMAC_HWFEAT_RSSEN BIT(20) #define XGMAC_HWFEAT_TSOEN BIT(18) #define XGMAC_HWFEAT_SPHEN BIT(17) @@ -150,6 +153,34 @@ #define XGMAC_DCS GENMASK(19, 16) #define XGMAC_DCS_SHIFT 16 #define XGMAC_ADDRx_LOW(x) (0x00000304 + (x) * 0x8) +#define XGMAC_L3L4_ADDR_CTRL 0x00000c00 +#define XGMAC_IDDR GENMASK(15, 8) +#define XGMAC_IDDR_SHIFT 8 +#define XGMAC_IDDR_FNUM 4 +#define XGMAC_TT BIT(1) +#define XGMAC_XB BIT(0) +#define XGMAC_L3L4_DATA 0x00000c04 +#define XGMAC_L3L4_CTRL 0x0 +#define XGMAC_L4DPIM0 BIT(21) +#define XGMAC_L4DPM0 BIT(20) +#define XGMAC_L4SPIM0 BIT(19) +#define XGMAC_L4SPM0 BIT(18) +#define XGMAC_L4PEN0 BIT(16) +#define XGMAC_L3HDBM0 GENMASK(15, 11) +#define XGMAC_L3HSBM0 GENMASK(10, 6) +#define XGMAC_L3DAIM0 BIT(5) +#define XGMAC_L3DAM0 BIT(4) +#define XGMAC_L3SAIM0 BIT(3) +#define XGMAC_L3SAM0 BIT(2) +#define XGMAC_L3PEN0 BIT(0) +#define XGMAC_L4_ADDR 0x1 +#define XGMAC_L4DP0 GENMASK(31, 16) +#define XGMAC_L4DP0_SHIFT 16 +#define XGMAC_L4SP0 GENMASK(15, 0) +#define XGMAC_L3_ADDR0 0x4 +#define XGMAC_L3_ADDR1 0x5 +#define XGMAC_L3_ADDR2 0x6 +#define XMGAC_L3_ADDR3 0x7 #define XGMAC_ARP_ADDR 0x00000c10 #define XGMAC_RSS_CTRL 0x00000c80 #define XGMAC_UDP4TE BIT(3) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index e534a3aaf4a3..78ac659da279 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -15,7 +15,6 @@ static void dwxgmac2_core_init(struct mac_device_info *hw, struct net_device *dev) { void __iomem *ioaddr = hw->pcsr; - int mtu = dev->mtu; u32 tx, rx; tx = readl(ioaddr + XGMAC_TX_CONFIG); @@ -24,16 +23,6 @@ static void dwxgmac2_core_init(struct mac_device_info *hw, tx |= XGMAC_CORE_INIT_TX; rx |= XGMAC_CORE_INIT_RX; - if (mtu >= 9000) { - rx |= XGMAC_CONFIG_GPSLCE; - rx |= XGMAC_JUMBO_LEN << XGMAC_CONFIG_GPSL_SHIFT; - rx |= XGMAC_CONFIG_WD; - } else if (mtu > 2000) { - rx |= XGMAC_CONFIG_JE; - } else if (mtu > 1500) { - rx |= XGMAC_CONFIG_S2KP; - } - if (hw->ps) { tx |= XGMAC_CONFIG_TE; tx &= ~hw->link.speed_mask; @@ -1163,6 +1152,197 @@ static void dwxgmac2_enable_vlan(struct mac_device_info *hw, u32 type) writel(value, ioaddr + XGMAC_VLAN_INCL); } +static int dwxgmac2_filter_wait(struct mac_device_info *hw) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + + if (readl_poll_timeout(ioaddr + XGMAC_L3L4_ADDR_CTRL, value, + !(value & XGMAC_XB), 100, 10000)) + return -EBUSY; + return 0; +} + +static int dwxgmac2_filter_read(struct mac_device_info *hw, u32 filter_no, + u8 reg, u32 *data) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + int ret; + + ret = dwxgmac2_filter_wait(hw); + if (ret) + return ret; + + value = ((filter_no << XGMAC_IDDR_FNUM) | reg) << XGMAC_IDDR_SHIFT; + value |= XGMAC_TT | XGMAC_XB; + writel(value, ioaddr + XGMAC_L3L4_ADDR_CTRL); + + ret = dwxgmac2_filter_wait(hw); + if (ret) + return ret; + + *data = readl(ioaddr + XGMAC_L3L4_DATA); + return 0; +} + +static int dwxgmac2_filter_write(struct mac_device_info *hw, u32 filter_no, + u8 reg, u32 data) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + int ret; + + ret = dwxgmac2_filter_wait(hw); + if (ret) + return ret; + + writel(data, ioaddr + XGMAC_L3L4_DATA); + + value = ((filter_no << XGMAC_IDDR_FNUM) | reg) << XGMAC_IDDR_SHIFT; + value |= XGMAC_XB; + writel(value, ioaddr + XGMAC_L3L4_ADDR_CTRL); + + return dwxgmac2_filter_wait(hw); +} + +static int dwxgmac2_config_l3_filter(struct mac_device_info *hw, u32 filter_no, + bool en, bool ipv6, bool sa, bool inv, + u32 match) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + int ret; + + value = readl(ioaddr + XGMAC_PACKET_FILTER); + value |= XGMAC_FILTER_IPFE; + writel(value, ioaddr + XGMAC_PACKET_FILTER); + + ret = dwxgmac2_filter_read(hw, filter_no, XGMAC_L3L4_CTRL, &value); + if (ret) + return ret; + + /* For IPv6 not both SA/DA filters can be active */ + if (ipv6) { + value |= XGMAC_L3PEN0; + value &= ~(XGMAC_L3SAM0 | XGMAC_L3SAIM0); + value &= ~(XGMAC_L3DAM0 | XGMAC_L3DAIM0); + if (sa) { + value |= XGMAC_L3SAM0; + if (inv) + value |= XGMAC_L3SAIM0; + } else { + value |= XGMAC_L3DAM0; + if (inv) + value |= XGMAC_L3DAIM0; + } + } else { + value &= ~XGMAC_L3PEN0; + if (sa) { + value |= XGMAC_L3SAM0; + if (inv) + value |= XGMAC_L3SAIM0; + } else { + value |= XGMAC_L3DAM0; + if (inv) + value |= XGMAC_L3DAIM0; + } + } + + ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, value); + if (ret) + return ret; + + if (sa) { + ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3_ADDR0, match); + if (ret) + return ret; + } else { + ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3_ADDR1, match); + if (ret) + return ret; + } + + if (!en) + return dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, 0); + + return 0; +} + +static int dwxgmac2_config_l4_filter(struct mac_device_info *hw, u32 filter_no, + bool en, bool udp, bool sa, bool inv, + u32 match) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + int ret; + + value = readl(ioaddr + XGMAC_PACKET_FILTER); + value |= XGMAC_FILTER_IPFE; + writel(value, ioaddr + XGMAC_PACKET_FILTER); + + ret = dwxgmac2_filter_read(hw, filter_no, XGMAC_L3L4_CTRL, &value); + if (ret) + return ret; + + if (udp) { + value |= XGMAC_L4PEN0; + } else { + value &= ~XGMAC_L4PEN0; + } + + value &= ~(XGMAC_L4SPM0 | XGMAC_L4SPIM0); + value &= ~(XGMAC_L4DPM0 | XGMAC_L4DPIM0); + if (sa) { + value |= XGMAC_L4SPM0; + if (inv) + value |= XGMAC_L4SPIM0; + } else { + value |= XGMAC_L4DPM0; + if (inv) + value |= XGMAC_L4DPIM0; + } + + ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, value); + if (ret) + return ret; + + if (sa) { + value = match & XGMAC_L4SP0; + + ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L4_ADDR, value); + if (ret) + return ret; + } else { + value = (match << XGMAC_L4DP0_SHIFT) & XGMAC_L4DP0; + + ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L4_ADDR, value); + if (ret) + return ret; + } + + if (!en) + return dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, 0); + + return 0; +} + +static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en, + u32 addr) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + + writel(addr, ioaddr + XGMAC_ARP_ADDR); + + value = readl(ioaddr + XGMAC_RX_CONFIG); + if (en) + value |= XGMAC_CONFIG_ARPEN; + else + value &= ~XGMAC_CONFIG_ARPEN; + writel(value, ioaddr + XGMAC_RX_CONFIG); +} + const struct stmmac_ops dwxgmac210_ops = { .core_init = dwxgmac2_core_init, .set_mac = dwxgmac2_set_mac, @@ -1203,6 +1383,9 @@ const struct stmmac_ops dwxgmac210_ops = { .flex_pps_config = dwxgmac2_flex_pps_config, .sarc_configure = dwxgmac2_sarc_configure, .enable_vlan = dwxgmac2_enable_vlan, + .config_l3_filter = dwxgmac2_config_l3_filter, + .config_l4_filter = dwxgmac2_config_l4_filter, + .set_arp_offload = dwxgmac2_set_arp_offload, }; int dwxgmac2_setup(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 64956465c030..53c4a40d8386 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -322,6 +322,10 @@ static int dwxgmac2_dma_interrupt(void __iomem *ioaddr, /* ABNORMAL interrupts */ if (unlikely(intr_status & XGMAC_AIS)) { + if (unlikely(intr_status & XGMAC_RBU)) { + x->rx_buf_unav_irq++; + ret |= handle_rx; + } if (unlikely(intr_status & XGMAC_TPS)) { x->tx_process_stopped_irq++; ret |= tx_hard_error; @@ -365,7 +369,8 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, dma_cap->eee = (hw_cap & XGMAC_HWFEAT_EEESEL) >> 13; dma_cap->atime_stamp = (hw_cap & XGMAC_HWFEAT_TSSEL) >> 12; dma_cap->av = (hw_cap & XGMAC_HWFEAT_AVSEL) >> 11; - dma_cap->av &= (hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10; + dma_cap->av &= !(hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10; + dma_cap->arpoffsel = (hw_cap & XGMAC_HWFEAT_ARPOFFSEL) >> 9; dma_cap->rmon = (hw_cap & XGMAC_HWFEAT_MMCSEL) >> 8; dma_cap->pmt_magic_frame = (hw_cap & XGMAC_HWFEAT_MGKSEL) >> 7; dma_cap->pmt_remote_wake_up = (hw_cap & XGMAC_HWFEAT_RWKSEL) >> 6; @@ -374,6 +379,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, /* MAC HW feature 1 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1); + dma_cap->l3l4fnum = (hw_cap & XGMAC_HWFEAT_L3L4FNUM) >> 27; dma_cap->rssen = (hw_cap & XGMAC_HWFEAT_RSSEN) >> 20; dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18; dma_cap->sphen = (hw_cap & XGMAC_HWFEAT_SPHEN) >> 17; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 9435b312495d..ddb851d99618 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -363,6 +363,14 @@ struct stmmac_ops { int (*get_mac_tx_timestamp)(struct mac_device_info *hw, u64 *ts); /* Source Address Insertion / Replacement */ void (*sarc_configure)(void __iomem *ioaddr, int val); + /* Filtering */ + int (*config_l3_filter)(struct mac_device_info *hw, u32 filter_no, + bool en, bool ipv6, bool sa, bool inv, + u32 match); + int (*config_l4_filter)(struct mac_device_info *hw, u32 filter_no, + bool en, bool udp, bool sa, bool inv, + u32 match); + void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr); }; #define stmmac_core_init(__priv, __args...) \ @@ -443,6 +451,12 @@ struct stmmac_ops { stmmac_do_callback(__priv, mac, get_mac_tx_timestamp, __args) #define stmmac_sarc_configure(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, sarc_configure, __args) +#define stmmac_config_l3_filter(__priv, __args...) \ + stmmac_do_callback(__priv, mac, config_l3_filter, __args) +#define stmmac_config_l4_filter(__priv, __args...) \ + stmmac_do_callback(__priv, mac, config_l4_filter, __args) +#define stmmac_set_arp_offload(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, set_arp_offload, __args) /* PTP and HW Timer helpers */ struct stmmac_hwtimestamp { @@ -499,6 +513,7 @@ struct stmmac_mode_ops { struct stmmac_priv; struct tc_cls_u32_offload; struct tc_cbs_qopt_offload; +struct flow_cls_offload; struct stmmac_tc_ops { int (*init)(struct stmmac_priv *priv); @@ -506,6 +521,8 @@ struct stmmac_tc_ops { struct tc_cls_u32_offload *cls); int (*setup_cbs)(struct stmmac_priv *priv, struct tc_cbs_qopt_offload *qopt); + int (*setup_cls)(struct stmmac_priv *priv, + struct flow_cls_offload *cls); }; #define stmmac_tc_init(__priv, __args...) \ @@ -514,6 +531,8 @@ struct stmmac_tc_ops { stmmac_do_callback(__priv, tc, setup_cls_u32, __args) #define stmmac_tc_setup_cbs(__priv, __args...) \ stmmac_do_callback(__priv, tc, setup_cbs, __args) +#define stmmac_tc_setup_cls(__priv, __args...) \ + stmmac_do_callback(__priv, tc, setup_cls, __args) struct stmmac_counters; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index dcb2e29a5717..d993fc7e82c3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -128,6 +128,16 @@ struct stmmac_rss { u32 table[STMMAC_RSS_MAX_TABLE_SIZE]; }; +#define STMMAC_FLOW_ACTION_DROP BIT(0) +struct stmmac_flow_entry { + unsigned long cookie; + unsigned long action; + u8 ip_proto; + int in_use; + int idx; + int is_l4; +}; + struct stmmac_priv { /* Frequently used values are kept adjacent for cache effect */ u32 tx_coal_frames; @@ -216,6 +226,8 @@ struct stmmac_priv { unsigned int tc_entries_max; unsigned int tc_off_max; struct stmmac_tc_entry *tc_entries; + unsigned int flow_entries_max; + struct stmmac_flow_entry *flow_entries; /* Pulse Per Second output */ struct stmmac_pps_cfg pps[STMMAC_PPS_MAX]; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 1c450105e5a6..1a768837ca72 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -746,8 +746,15 @@ static int stmmac_set_coalesce(struct net_device *dev, (ec->tx_max_coalesced_frames_high) || (ec->rate_sample_interval)) return -EOPNOTSUPP; - if (ec->rx_coalesce_usecs == 0) - return -EINVAL; + if (priv->use_riwt && (ec->rx_coalesce_usecs > 0)) { + rx_riwt = stmmac_usec2riwt(ec->rx_coalesce_usecs, priv); + + if ((rx_riwt > MAX_DMA_RIWT) || (rx_riwt < MIN_DMA_RIWT)) + return -EINVAL; + + priv->rx_riwt = rx_riwt; + stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt); + } if ((ec->tx_coalesce_usecs == 0) && (ec->tx_max_coalesced_frames == 0)) @@ -757,20 +764,10 @@ static int stmmac_set_coalesce(struct net_device *dev, (ec->tx_max_coalesced_frames > STMMAC_TX_MAX_FRAMES)) return -EINVAL; - rx_riwt = stmmac_usec2riwt(ec->rx_coalesce_usecs, priv); - - if ((rx_riwt > MAX_DMA_RIWT) || (rx_riwt < MIN_DMA_RIWT)) - return -EINVAL; - else if (!priv->use_riwt) - return -EOPNOTSUPP; - /* Only copy relevant parameters, ignore all others. */ priv->tx_coal_frames = ec->tx_max_coalesced_frames; priv->tx_coal_timer = ec->tx_coalesce_usecs; priv->rx_coal_frames = ec->rx_max_coalesced_frames; - priv->rx_riwt = rx_riwt; - stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt); - return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 06ccd216ae90..c3baca9f587b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3511,9 +3511,10 @@ read_again: &priv->xstats, rx_q->dma_erx + entry); if (unlikely(status == discard_frame)) { page_pool_recycle_direct(rx_q->page_pool, buf->page); - priv->dev->stats.rx_errors++; buf->page = NULL; error = 1; + if (!priv->hwts_rx_en) + priv->dev->stats.rx_errors++; } if (unlikely(error && (status & rx_not_ls))) @@ -3931,12 +3932,17 @@ static int stmmac_setup_tc_block_cb(enum tc_setup_type type, void *type_data, struct stmmac_priv *priv = cb_priv; int ret = -EOPNOTSUPP; + if (!tc_cls_can_offload_and_chain0(priv->dev, type_data)) + return ret; + stmmac_disable_all_queues(priv); switch (type) { case TC_SETUP_CLSU32: - if (tc_cls_can_offload_and_chain0(priv->dev, type_data)) - ret = stmmac_tc_setup_cls_u32(priv, priv, type_data); + ret = stmmac_tc_setup_cls_u32(priv, priv, type_data); + break; + case TC_SETUP_CLSFLOWER: + ret = stmmac_tc_setup_cls(priv, priv, type_data); break; default: break; @@ -4536,10 +4542,10 @@ int stmmac_dvr_probe(struct device *device, /* MTU range: 46 - hw-specific max */ ndev->min_mtu = ETH_ZLEN - ETH_HLEN; - if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00)) - ndev->max_mtu = JUMBO_LEN; - else if (priv->plat->has_xgmac) + if (priv->plat->has_xgmac) ndev->max_mtu = XGMAC_JUMBO_LEN; + else if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00)) + ndev->max_mtu = JUMBO_LEN; else ndev->max_mtu = SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN); /* Will not overwrite ndev->max_mtu if plat->maxmtu > ndev->max_mtu diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index eaf8f08f2e91..5de754a9fae9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -23,6 +23,7 @@ /** * dwmac1000_validate_mcast_bins - validates the number of Multicast filter bins + * @dev: struct device of the platform device * @mcast_bins: Multicast filtering bins * Description: * this function validates the number of Multicast filtering bins specified @@ -33,7 +34,7 @@ * invalid and will cause the filtering algorithm to use Multicast * promiscuous mode. */ -static int dwmac1000_validate_mcast_bins(int mcast_bins) +static int dwmac1000_validate_mcast_bins(struct device *dev, int mcast_bins) { int x = mcast_bins; @@ -44,8 +45,8 @@ static int dwmac1000_validate_mcast_bins(int mcast_bins) break; default: x = 0; - pr_info("Hash table entries set to unexpected value %d", - mcast_bins); + dev_info(dev, "Hash table entries set to unexpected value %d\n", + mcast_bins); break; } return x; @@ -53,6 +54,7 @@ static int dwmac1000_validate_mcast_bins(int mcast_bins) /** * dwmac1000_validate_ucast_entries - validate the Unicast address entries + * @dev: struct device of the platform device * @ucast_entries: number of Unicast address entries * Description: * This function validates the number of Unicast address entries supported @@ -62,7 +64,8 @@ static int dwmac1000_validate_mcast_bins(int mcast_bins) * selected, and defaults to 1 Unicast address if an unsupported * configuration is selected. */ -static int dwmac1000_validate_ucast_entries(int ucast_entries) +static int dwmac1000_validate_ucast_entries(struct device *dev, + int ucast_entries) { int x = ucast_entries; @@ -73,8 +76,8 @@ static int dwmac1000_validate_ucast_entries(int ucast_entries) break; default: x = 1; - pr_info("Unicast table entries set to unexpected value %d\n", - ucast_entries); + dev_info(dev, "Unicast table entries set to unexpected value %d\n", + ucast_entries); break; } return x; @@ -463,9 +466,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) of_property_read_u32(np, "snps,perfect-filter-entries", &plat->unicast_filter_entries); plat->unicast_filter_entries = dwmac1000_validate_ucast_entries( - plat->unicast_filter_entries); + &pdev->dev, plat->unicast_filter_entries); plat->multicast_filter_bins = dwmac1000_validate_mcast_bins( - plat->multicast_filter_bins); + &pdev->dev, plat->multicast_filter_bins); plat->has_gmac = 1; plat->pmt = 1; } @@ -514,7 +517,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode"); if (plat->force_thresh_dma_mode) { plat->force_sf_dma_mode = 0; - pr_warn("force_sf_dma_mode is ignored if force_thresh_dma_mode is set."); + dev_warn(&pdev->dev, + "force_sf_dma_mode is ignored if force_thresh_dma_mode is set.\n"); } of_property_read_u32(np, "snps,ps-speed", &plat->mac_port_sel_speed); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index ecc8602c6799..305d24935cf4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -43,9 +43,11 @@ struct stmmac_packet_attrs { int dont_wait; int timeout; int size; + int max_size; int remove_sa; u8 id; int sarc; + u16 queue_mapping; }; static u8 stmmac_test_next_id; @@ -73,12 +75,14 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv, else size += sizeof(struct udphdr); - skb = netdev_alloc_skb(priv->dev, size); + if (attr->max_size && (attr->max_size > size)) + size = attr->max_size; + + skb = netdev_alloc_skb_ip_align(priv->dev, size); if (!skb) return NULL; prefetchw(skb->data); - skb_reserve(skb, NET_IP_ALIGN); if (attr->vlan > 1) ehdr = skb_push(skb, ETH_HLEN + 8); @@ -147,6 +151,9 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv, uhdr->source = htons(attr->sport); uhdr->dest = htons(attr->dport); uhdr->len = htons(sizeof(*shdr) + sizeof(*uhdr) + attr->size); + if (attr->max_size) + uhdr->len = htons(attr->max_size - + (sizeof(*ihdr) + sizeof(*ehdr))); uhdr->check = 0; } @@ -162,9 +169,13 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv, iplen += sizeof(*thdr); else iplen += sizeof(*uhdr); + + if (attr->max_size) + iplen = attr->max_size - sizeof(*ehdr); + ihdr->tot_len = htons(iplen); ihdr->frag_off = 0; - ihdr->saddr = 0; + ihdr->saddr = htonl(attr->ip_src); ihdr->daddr = htonl(attr->ip_dst); ihdr->tos = 0; ihdr->id = 0; @@ -178,6 +189,8 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv, if (attr->size) skb_put(skb, attr->size); + if (attr->max_size && (attr->max_size > skb->len)) + skb_put(skb, attr->max_size - skb->len); skb->csum = 0; skb->ip_summed = CHECKSUM_PARTIAL; @@ -196,6 +209,24 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv, return skb; } +static struct sk_buff *stmmac_test_get_arp_skb(struct stmmac_priv *priv, + struct stmmac_packet_attrs *attr) +{ + __be32 ip_src = htonl(attr->ip_src); + __be32 ip_dst = htonl(attr->ip_dst); + struct sk_buff *skb = NULL; + + skb = arp_create(ARPOP_REQUEST, ETH_P_ARP, ip_dst, priv->dev, ip_src, + NULL, attr->src, attr->dst); + if (!skb) + return NULL; + + skb->pkt_type = PACKET_HOST; + skb->dev = priv->dev; + + return skb; +} + struct stmmac_test_priv { struct stmmac_packet_attrs *packet; struct packet_type pt; @@ -306,7 +337,7 @@ static int __stmmac_test_loopback(struct stmmac_priv *priv, goto cleanup; } - skb_set_queue_mapping(skb, 0); + skb_set_queue_mapping(skb, attr->queue_mapping); ret = dev_queue_xmit(skb); if (ret) goto cleanup; @@ -318,7 +349,7 @@ static int __stmmac_test_loopback(struct stmmac_priv *priv, attr->timeout = STMMAC_LB_TIMEOUT; wait_for_completion_timeout(&tpriv->comp, attr->timeout); - ret = !tpriv->ok; + ret = tpriv->ok ? 0 : -ETIMEDOUT; cleanup: if (!attr->dont_wait) @@ -480,7 +511,7 @@ static int stmmac_test_hfilt(struct stmmac_priv *priv) /* Shall NOT receive packet */ ret = __stmmac_test_loopback(priv, &attr); - ret = !ret; + ret = ret ? 0 : -EINVAL; cleanup: dev_mc_del(priv->dev, gd_addr); @@ -512,7 +543,7 @@ static int stmmac_test_pfilt(struct stmmac_priv *priv) /* Shall NOT receive packet */ ret = __stmmac_test_loopback(priv, &attr); - ret = !ret; + ret = ret ? 0 : -EINVAL; cleanup: dev_uc_del(priv->dev, gd_addr); @@ -562,7 +593,7 @@ static int stmmac_test_mcfilt(struct stmmac_priv *priv) /* Shall NOT receive packet */ ret = __stmmac_test_loopback(priv, &attr); - ret = !ret; + ret = ret ? 0 : -EINVAL; cleanup: dev_uc_del(priv->dev, uc_addr); @@ -600,7 +631,7 @@ static int stmmac_test_ucfilt(struct stmmac_priv *priv) /* Shall NOT receive packet */ ret = __stmmac_test_loopback(priv, &attr); - ret = !ret; + ret = ret ? 0 : -EINVAL; cleanup: dev_mc_del(priv->dev, mc_addr); @@ -699,7 +730,7 @@ static int stmmac_test_flowctrl(struct stmmac_priv *priv) } wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT); - ret = !tpriv->ok; + ret = tpriv->ok ? 0 : -ETIMEDOUT; cleanup: dev_mc_del(priv->dev, paddr); @@ -833,11 +864,11 @@ static int stmmac_test_vlanfilt(struct stmmac_priv *priv) goto vlan_del; wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT); - ret = !tpriv->ok; + ret = tpriv->ok ? 0 : -ETIMEDOUT; if (ret && !i) { goto vlan_del; } else if (!ret && i) { - ret = -1; + ret = -EINVAL; goto vlan_del; } else { ret = 0; @@ -909,11 +940,11 @@ static int stmmac_test_dvlanfilt(struct stmmac_priv *priv) goto vlan_del; wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT); - ret = !tpriv->ok; + ret = tpriv->ok ? 0 : -ETIMEDOUT; if (ret && !i) { goto vlan_del; } else if (!ret && i) { - ret = -1; + ret = -EINVAL; goto vlan_del; } else { ret = 0; @@ -998,7 +1029,7 @@ static int stmmac_test_rxp(struct stmmac_priv *priv) attr.src = addr; ret = __stmmac_test_loopback(priv, &attr); - ret = !ret; /* Shall NOT receive packet */ + ret = ret ? 0 : -EINVAL; /* Shall NOT receive packet */ cls_u32.command = TC_CLSU32_DELETE_KNODE; stmmac_tc_setup_cls_u32(priv, priv, &cls_u32); @@ -1168,6 +1199,392 @@ static int stmmac_test_svlanoff(struct stmmac_priv *priv) return stmmac_test_vlanoff_common(priv, true); } +#ifdef CONFIG_NET_CLS_ACT +static int __stmmac_test_l3filt(struct stmmac_priv *priv, u32 dst, u32 src, + u32 dst_mask, u32 src_mask) +{ + struct flow_dissector_key_ipv4_addrs key, mask; + unsigned long dummy_cookie = 0xdeadbeef; + struct stmmac_packet_attrs attr = { }; + struct flow_dissector *dissector; + struct flow_cls_offload *cls; + struct flow_rule *rule; + int ret; + + if (!tc_can_offload(priv->dev)) + return -EOPNOTSUPP; + if (!priv->dma_cap.l3l4fnum) + return -EOPNOTSUPP; + if (priv->rss.enable) { + struct stmmac_rss rss = { .enable = false, }; + + stmmac_rss_configure(priv, priv->hw, &rss, + priv->plat->rx_queues_to_use); + } + + dissector = kzalloc(sizeof(*dissector), GFP_KERNEL); + if (!dissector) { + ret = -ENOMEM; + goto cleanup_rss; + } + + dissector->used_keys |= (1 << FLOW_DISSECTOR_KEY_IPV4_ADDRS); + dissector->offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] = 0; + + cls = kzalloc(sizeof(*cls), GFP_KERNEL); + if (!cls) { + ret = -ENOMEM; + goto cleanup_dissector; + } + + cls->common.chain_index = 0; + cls->command = FLOW_CLS_REPLACE; + cls->cookie = dummy_cookie; + + rule = kzalloc(struct_size(rule, action.entries, 1), GFP_KERNEL); + if (!rule) { + ret = -ENOMEM; + goto cleanup_cls; + } + + rule->match.dissector = dissector; + rule->match.key = (void *)&key; + rule->match.mask = (void *)&mask; + + key.src = htonl(src); + key.dst = htonl(dst); + mask.src = src_mask; + mask.dst = dst_mask; + + cls->rule = rule; + + rule->action.entries[0].id = FLOW_ACTION_DROP; + rule->action.num_entries = 1; + + attr.dst = priv->dev->dev_addr; + attr.ip_dst = dst; + attr.ip_src = src; + + /* Shall receive packet */ + ret = __stmmac_test_loopback(priv, &attr); + if (ret) + goto cleanup_rule; + + ret = stmmac_tc_setup_cls(priv, priv, cls); + if (ret) + goto cleanup_rule; + + /* Shall NOT receive packet */ + ret = __stmmac_test_loopback(priv, &attr); + ret = ret ? 0 : -EINVAL; + + cls->command = FLOW_CLS_DESTROY; + stmmac_tc_setup_cls(priv, priv, cls); +cleanup_rule: + kfree(rule); +cleanup_cls: + kfree(cls); +cleanup_dissector: + kfree(dissector); +cleanup_rss: + if (priv->rss.enable) { + stmmac_rss_configure(priv, priv->hw, &priv->rss, + priv->plat->rx_queues_to_use); + } + + return ret; +} +#else +static int __stmmac_test_l3filt(struct stmmac_priv *priv, u32 dst, u32 src, + u32 dst_mask, u32 src_mask) +{ + return -EOPNOTSUPP; +} +#endif + +static int stmmac_test_l3filt_da(struct stmmac_priv *priv) +{ + u32 addr = 0x10203040; + + return __stmmac_test_l3filt(priv, addr, 0, ~0, 0); +} + +static int stmmac_test_l3filt_sa(struct stmmac_priv *priv) +{ + u32 addr = 0x10203040; + + return __stmmac_test_l3filt(priv, 0, addr, 0, ~0); +} + +#ifdef CONFIG_NET_CLS_ACT +static int __stmmac_test_l4filt(struct stmmac_priv *priv, u32 dst, u32 src, + u32 dst_mask, u32 src_mask, bool udp) +{ + struct { + struct flow_dissector_key_basic bkey; + struct flow_dissector_key_ports key; + } __aligned(BITS_PER_LONG / 8) keys; + struct { + struct flow_dissector_key_basic bmask; + struct flow_dissector_key_ports mask; + } __aligned(BITS_PER_LONG / 8) masks; + unsigned long dummy_cookie = 0xdeadbeef; + struct stmmac_packet_attrs attr = { }; + struct flow_dissector *dissector; + struct flow_cls_offload *cls; + struct flow_rule *rule; + int ret; + + if (!tc_can_offload(priv->dev)) + return -EOPNOTSUPP; + if (!priv->dma_cap.l3l4fnum) + return -EOPNOTSUPP; + if (priv->rss.enable) { + struct stmmac_rss rss = { .enable = false, }; + + stmmac_rss_configure(priv, priv->hw, &rss, + priv->plat->rx_queues_to_use); + } + + dissector = kzalloc(sizeof(*dissector), GFP_KERNEL); + if (!dissector) { + ret = -ENOMEM; + goto cleanup_rss; + } + + dissector->used_keys |= (1 << FLOW_DISSECTOR_KEY_BASIC); + dissector->used_keys |= (1 << FLOW_DISSECTOR_KEY_PORTS); + dissector->offset[FLOW_DISSECTOR_KEY_BASIC] = 0; + dissector->offset[FLOW_DISSECTOR_KEY_PORTS] = offsetof(typeof(keys), key); + + cls = kzalloc(sizeof(*cls), GFP_KERNEL); + if (!cls) { + ret = -ENOMEM; + goto cleanup_dissector; + } + + cls->common.chain_index = 0; + cls->command = FLOW_CLS_REPLACE; + cls->cookie = dummy_cookie; + + rule = kzalloc(struct_size(rule, action.entries, 1), GFP_KERNEL); + if (!rule) { + ret = -ENOMEM; + goto cleanup_cls; + } + + rule->match.dissector = dissector; + rule->match.key = (void *)&keys; + rule->match.mask = (void *)&masks; + + keys.bkey.ip_proto = udp ? IPPROTO_UDP : IPPROTO_TCP; + keys.key.src = htons(src); + keys.key.dst = htons(dst); + masks.mask.src = src_mask; + masks.mask.dst = dst_mask; + + cls->rule = rule; + + rule->action.entries[0].id = FLOW_ACTION_DROP; + rule->action.num_entries = 1; + + attr.dst = priv->dev->dev_addr; + attr.tcp = !udp; + attr.sport = src; + attr.dport = dst; + attr.ip_dst = 0; + + /* Shall receive packet */ + ret = __stmmac_test_loopback(priv, &attr); + if (ret) + goto cleanup_rule; + + ret = stmmac_tc_setup_cls(priv, priv, cls); + if (ret) + goto cleanup_rule; + + /* Shall NOT receive packet */ + ret = __stmmac_test_loopback(priv, &attr); + ret = ret ? 0 : -EINVAL; + + cls->command = FLOW_CLS_DESTROY; + stmmac_tc_setup_cls(priv, priv, cls); +cleanup_rule: + kfree(rule); +cleanup_cls: + kfree(cls); +cleanup_dissector: + kfree(dissector); +cleanup_rss: + if (priv->rss.enable) { + stmmac_rss_configure(priv, priv->hw, &priv->rss, + priv->plat->rx_queues_to_use); + } + + return ret; +} +#else +static int __stmmac_test_l4filt(struct stmmac_priv *priv, u32 dst, u32 src, + u32 dst_mask, u32 src_mask, bool udp) +{ + return -EOPNOTSUPP; +} +#endif + +static int stmmac_test_l4filt_da_tcp(struct stmmac_priv *priv) +{ + u16 dummy_port = 0x123; + + return __stmmac_test_l4filt(priv, dummy_port, 0, ~0, 0, false); +} + +static int stmmac_test_l4filt_sa_tcp(struct stmmac_priv *priv) +{ + u16 dummy_port = 0x123; + + return __stmmac_test_l4filt(priv, 0, dummy_port, 0, ~0, false); +} + +static int stmmac_test_l4filt_da_udp(struct stmmac_priv *priv) +{ + u16 dummy_port = 0x123; + + return __stmmac_test_l4filt(priv, dummy_port, 0, ~0, 0, true); +} + +static int stmmac_test_l4filt_sa_udp(struct stmmac_priv *priv) +{ + u16 dummy_port = 0x123; + + return __stmmac_test_l4filt(priv, 0, dummy_port, 0, ~0, true); +} + +static int stmmac_test_arp_validate(struct sk_buff *skb, + struct net_device *ndev, + struct packet_type *pt, + struct net_device *orig_ndev) +{ + struct stmmac_test_priv *tpriv = pt->af_packet_priv; + struct ethhdr *ehdr; + struct arphdr *ahdr; + + ehdr = (struct ethhdr *)skb_mac_header(skb); + if (!ether_addr_equal(ehdr->h_dest, tpriv->packet->src)) + goto out; + + ahdr = arp_hdr(skb); + if (ahdr->ar_op != htons(ARPOP_REPLY)) + goto out; + + tpriv->ok = true; + complete(&tpriv->comp); +out: + kfree_skb(skb); + return 0; +} + +static int stmmac_test_arpoffload(struct stmmac_priv *priv) +{ + unsigned char src[ETH_ALEN] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06}; + unsigned char dst[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + struct stmmac_packet_attrs attr = { }; + struct stmmac_test_priv *tpriv; + struct sk_buff *skb = NULL; + u32 ip_addr = 0xdeadcafe; + u32 ip_src = 0xdeadbeef; + int ret; + + if (!priv->dma_cap.arpoffsel) + return -EOPNOTSUPP; + + tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL); + if (!tpriv) + return -ENOMEM; + + tpriv->ok = false; + init_completion(&tpriv->comp); + + tpriv->pt.type = htons(ETH_P_ARP); + tpriv->pt.func = stmmac_test_arp_validate; + tpriv->pt.dev = priv->dev; + tpriv->pt.af_packet_priv = tpriv; + tpriv->packet = &attr; + dev_add_pack(&tpriv->pt); + + attr.src = src; + attr.ip_src = ip_src; + attr.dst = dst; + attr.ip_dst = ip_addr; + + skb = stmmac_test_get_arp_skb(priv, &attr); + if (!skb) { + ret = -ENOMEM; + goto cleanup; + } + + ret = stmmac_set_arp_offload(priv, priv->hw, true, ip_addr); + if (ret) + goto cleanup; + + ret = dev_set_promiscuity(priv->dev, 1); + if (ret) + goto cleanup; + + skb_set_queue_mapping(skb, 0); + ret = dev_queue_xmit(skb); + if (ret) + goto cleanup_promisc; + + wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT); + ret = tpriv->ok ? 0 : -ETIMEDOUT; + +cleanup_promisc: + dev_set_promiscuity(priv->dev, -1); +cleanup: + stmmac_set_arp_offload(priv, priv->hw, false, 0x0); + dev_remove_pack(&tpriv->pt); + kfree(tpriv); + return ret; +} + +static int __stmmac_test_jumbo(struct stmmac_priv *priv, u16 queue) +{ + struct stmmac_packet_attrs attr = { }; + int size = priv->dma_buf_sz; + + /* Only XGMAC has SW support for multiple RX descs in same packet */ + if (priv->plat->has_xgmac) + size = priv->dev->max_mtu; + + attr.dst = priv->dev->dev_addr; + attr.max_size = size - ETH_FCS_LEN; + attr.queue_mapping = queue; + + return __stmmac_test_loopback(priv, &attr); +} + +static int stmmac_test_jumbo(struct stmmac_priv *priv) +{ + return __stmmac_test_jumbo(priv, 0); +} + +static int stmmac_test_mjumbo(struct stmmac_priv *priv) +{ + u32 chan, tx_cnt = priv->plat->tx_queues_to_use; + int ret; + + if (tx_cnt <= 1) + return -EOPNOTSUPP; + + for (chan = 0; chan < tx_cnt; chan++) { + ret = __stmmac_test_jumbo(priv, chan); + if (ret) + return ret; + } + + return 0; +} + #define STMMAC_LOOPBACK_NONE 0 #define STMMAC_LOOPBACK_MAC 1 #define STMMAC_LOOPBACK_PHY 2 @@ -1253,6 +1670,42 @@ static const struct stmmac_test { .name = "SVLAN TX Insertion ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_svlanoff, + }, { + .name = "L3 DA Filtering ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_l3filt_da, + }, { + .name = "L3 SA Filtering ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_l3filt_sa, + }, { + .name = "L4 DA TCP Filtering ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_l4filt_da_tcp, + }, { + .name = "L4 SA TCP Filtering ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_l4filt_sa_tcp, + }, { + .name = "L4 DA UDP Filtering ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_l4filt_da_udp, + }, { + .name = "L4 SA UDP Filtering ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_l4filt_sa_udp, + }, { + .name = "ARP Offload ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_arpoffload, + }, { + .name = "Jumbo Frame ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_jumbo, + }, { + .name = "Multichannel Jumbo ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_mjumbo, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 6c305b6ecad0..e231098061b6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -242,9 +242,27 @@ static int tc_init(struct stmmac_priv *priv) { struct dma_features *dma_cap = &priv->dma_cap; unsigned int count; + int i; + + if (dma_cap->l3l4fnum) { + priv->flow_entries_max = dma_cap->l3l4fnum; + priv->flow_entries = devm_kcalloc(priv->device, + dma_cap->l3l4fnum, + sizeof(*priv->flow_entries), + GFP_KERNEL); + if (!priv->flow_entries) + return -ENOMEM; + for (i = 0; i < priv->flow_entries_max; i++) + priv->flow_entries[i].idx = i; + + dev_info(priv->device, "Enabled Flow TC (entries=%d)\n", + priv->flow_entries_max); + } + + /* Fail silently as we can still use remaining features, e.g. CBS */ if (!dma_cap->frpsel) - return -EINVAL; + return 0; switch (dma_cap->frpbs) { case 0x0: @@ -349,8 +367,235 @@ static int tc_setup_cbs(struct stmmac_priv *priv, return 0; } +static int tc_parse_flow_actions(struct stmmac_priv *priv, + struct flow_action *action, + struct stmmac_flow_entry *entry) +{ + struct flow_action_entry *act; + int i; + + if (!flow_action_has_entries(action)) + return -EINVAL; + + flow_action_for_each(i, act, action) { + switch (act->id) { + case FLOW_ACTION_DROP: + entry->action |= STMMAC_FLOW_ACTION_DROP; + return 0; + default: + break; + } + } + + /* Nothing to do, maybe inverse filter ? */ + return 0; +} + +static int tc_add_basic_flow(struct stmmac_priv *priv, + struct flow_cls_offload *cls, + struct stmmac_flow_entry *entry) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; + struct flow_match_basic match; + + /* Nothing to do here */ + if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_BASIC)) + return -EINVAL; + + flow_rule_match_basic(rule, &match); + entry->ip_proto = match.key->ip_proto; + return 0; +} + +static int tc_add_ip4_flow(struct stmmac_priv *priv, + struct flow_cls_offload *cls, + struct stmmac_flow_entry *entry) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; + bool inv = entry->action & STMMAC_FLOW_ACTION_DROP; + struct flow_match_ipv4_addrs match; + u32 hw_match; + int ret; + + /* Nothing to do here */ + if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) + return -EINVAL; + + flow_rule_match_ipv4_addrs(rule, &match); + hw_match = ntohl(match.key->src) & ntohl(match.mask->src); + if (hw_match) { + ret = stmmac_config_l3_filter(priv, priv->hw, entry->idx, true, + false, true, inv, hw_match); + if (ret) + return ret; + } + + hw_match = ntohl(match.key->dst) & ntohl(match.mask->dst); + if (hw_match) { + ret = stmmac_config_l3_filter(priv, priv->hw, entry->idx, true, + false, false, inv, hw_match); + if (ret) + return ret; + } + + return 0; +} + +static int tc_add_ports_flow(struct stmmac_priv *priv, + struct flow_cls_offload *cls, + struct stmmac_flow_entry *entry) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; + bool inv = entry->action & STMMAC_FLOW_ACTION_DROP; + struct flow_match_ports match; + u32 hw_match; + bool is_udp; + int ret; + + /* Nothing to do here */ + if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_PORTS)) + return -EINVAL; + + switch (entry->ip_proto) { + case IPPROTO_TCP: + is_udp = false; + break; + case IPPROTO_UDP: + is_udp = true; + break; + default: + return -EINVAL; + } + + flow_rule_match_ports(rule, &match); + + hw_match = ntohs(match.key->src) & ntohs(match.mask->src); + if (hw_match) { + ret = stmmac_config_l4_filter(priv, priv->hw, entry->idx, true, + is_udp, true, inv, hw_match); + if (ret) + return ret; + } + + hw_match = ntohs(match.key->dst) & ntohs(match.mask->dst); + if (hw_match) { + ret = stmmac_config_l4_filter(priv, priv->hw, entry->idx, true, + is_udp, false, inv, hw_match); + if (ret) + return ret; + } + + entry->is_l4 = true; + return 0; +} + +static struct stmmac_flow_entry *tc_find_flow(struct stmmac_priv *priv, + struct flow_cls_offload *cls, + bool get_free) +{ + int i; + + for (i = 0; i < priv->flow_entries_max; i++) { + struct stmmac_flow_entry *entry = &priv->flow_entries[i]; + + if (entry->cookie == cls->cookie) + return entry; + if (get_free && (entry->in_use == false)) + return entry; + } + + return NULL; +} + +struct { + int (*fn)(struct stmmac_priv *priv, struct flow_cls_offload *cls, + struct stmmac_flow_entry *entry); +} tc_flow_parsers[] = { + { .fn = tc_add_basic_flow }, + { .fn = tc_add_ip4_flow }, + { .fn = tc_add_ports_flow }, +}; + +static int tc_add_flow(struct stmmac_priv *priv, + struct flow_cls_offload *cls) +{ + struct stmmac_flow_entry *entry = tc_find_flow(priv, cls, false); + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + int i, ret; + + if (!entry) { + entry = tc_find_flow(priv, cls, true); + if (!entry) + return -ENOENT; + } + + ret = tc_parse_flow_actions(priv, &rule->action, entry); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(tc_flow_parsers); i++) { + ret = tc_flow_parsers[i].fn(priv, cls, entry); + if (!ret) { + entry->in_use = true; + continue; + } + } + + if (!entry->in_use) + return -EINVAL; + + entry->cookie = cls->cookie; + return 0; +} + +static int tc_del_flow(struct stmmac_priv *priv, + struct flow_cls_offload *cls) +{ + struct stmmac_flow_entry *entry = tc_find_flow(priv, cls, false); + int ret; + + if (!entry || !entry->in_use) + return -ENOENT; + + if (entry->is_l4) { + ret = stmmac_config_l4_filter(priv, priv->hw, entry->idx, false, + false, false, false, 0); + } else { + ret = stmmac_config_l3_filter(priv, priv->hw, entry->idx, false, + false, false, false, 0); + } + + entry->in_use = false; + entry->cookie = 0; + entry->is_l4 = false; + return ret; +} + +static int tc_setup_cls(struct stmmac_priv *priv, + struct flow_cls_offload *cls) +{ + int ret = 0; + + switch (cls->command) { + case FLOW_CLS_REPLACE: + ret = tc_add_flow(priv, cls); + break; + case FLOW_CLS_DESTROY: + ret = tc_del_flow(priv, cls); + break; + default: + return -EOPNOTSUPP; + } + + return ret; +} + const struct stmmac_tc_ops dwmac510_tc_ops = { .init = tc_init, .setup_cls_u32 = tc_setup_cls_u32, .setup_cbs = tc_setup_cbs, + .setup_cls = tc_setup_cls, }; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index cb2ea8facd8d..3ab24fdccd3b 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1345,7 +1345,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], info->key.u.ipv4.dst = nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); - if (IN_MULTICAST(ntohl(info->key.u.ipv4.dst))) { + if (ipv4_is_multicast(info->key.u.ipv4.dst)) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE], "Remote IPv4 address cannot be Multicast"); return -EINVAL; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 35d29a823af8..7c92afd36bbe 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -525,6 +525,12 @@ static int phy_check_link_status(struct phy_device *phydev) WARN_ON(!mutex_is_locked(&phydev->lock)); + /* Keep previous state if loopback is enabled because some PHYs + * report that Link is Down when loopback is enabled. + */ + if (phydev->loopback_enabled) + return 0; + err = phy_read_status(phydev); if (err) return err; diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index 2d1449345959..151c2a3f0b3a 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -29,7 +29,7 @@ struct gmii2rgmii { static int xgmiitorgmii_read_status(struct phy_device *phydev) { - struct gmii2rgmii *priv = phydev->priv; + struct gmii2rgmii *priv = mdiodev_get_drvdata(&phydev->mdio); struct mii_bus *bus = priv->mdio->bus; int addr = priv->mdio->addr; u16 val = 0; @@ -90,7 +90,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev) memcpy(&priv->conv_phy_drv, priv->phy_dev->drv, sizeof(struct phy_driver)); priv->conv_phy_drv.read_status = xgmiitorgmii_read_status; - priv->phy_dev->priv = priv; + mdiodev_set_drvdata(&priv->phy_dev->mdio, priv); priv->phy_dev->drv = &priv->conv_phy_drv; return 0; diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 778d27d1fb15..08726090570e 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -445,18 +445,18 @@ #define UPS_FLAGS_250M_CKDIV BIT(2) #define UPS_FLAGS_EN_ALDPS BIT(3) #define UPS_FLAGS_CTAP_SHORT_DIS BIT(4) -#define UPS_FLAGS_SPEED_MASK (0xf << 16) #define ups_flags_speed(x) ((x) << 16) #define UPS_FLAGS_EN_EEE BIT(20) #define UPS_FLAGS_EN_500M_EEE BIT(21) #define UPS_FLAGS_EN_EEE_CKDIV BIT(22) +#define UPS_FLAGS_EEE_PLLOFF_100 BIT(23) #define UPS_FLAGS_EEE_PLLOFF_GIGA BIT(24) #define UPS_FLAGS_EEE_CMOD_LV_EN BIT(25) #define UPS_FLAGS_EN_GREEN BIT(26) #define UPS_FLAGS_EN_FLOW_CTR BIT(27) enum spd_duplex { - NWAY_10M_HALF = 1, + NWAY_10M_HALF, NWAY_10M_FULL, NWAY_100M_HALF, NWAY_100M_FULL, @@ -749,6 +749,23 @@ struct r8152 { void (*autosuspend_en)(struct r8152 *tp, bool enable); } rtl_ops; + struct ups_info { + u32 _10m_ckdiv:1; + u32 _250m_ckdiv:1; + u32 aldps:1; + u32 lite_mode:2; + u32 speed_duplex:4; + u32 eee:1; + u32 eee_lite:1; + u32 eee_ckdiv:1; + u32 eee_plloff_100:1; + u32 eee_plloff_giga:1; + u32 eee_cmod_lv:1; + u32 green:1; + u32 flow_control:1; + u32 ctap_short_off:1; + } ups_info; + atomic_t rx_count; bool eee_en; @@ -757,6 +774,7 @@ struct r8152 { u32 msg_enable; u32 tx_qlen; u32 coalesce; + u32 advertising; u32 rx_buf_sz; u32 rx_copybreak; u32 rx_pending; @@ -790,6 +808,13 @@ enum tx_csum_stat { TX_CSUM_NONE }; +#define RTL_ADVERTISED_10_HALF BIT(0) +#define RTL_ADVERTISED_10_FULL BIT(1) +#define RTL_ADVERTISED_100_HALF BIT(2) +#define RTL_ADVERTISED_100_FULL BIT(3) +#define RTL_ADVERTISED_1000_HALF BIT(4) +#define RTL_ADVERTISED_1000_FULL BIT(5) + /* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). * The RTL chips use a 64 element hash table based on the Ethernet CRC. */ @@ -2857,14 +2882,76 @@ static void r8153_u2p3en(struct r8152 *tp, bool enable) ocp_write_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL, ocp_data); } -static void r8153b_ups_flags_w1w0(struct r8152 *tp, u32 set, u32 clear) +static void r8153b_ups_flags(struct r8152 *tp) { - u32 ocp_data; + u32 ups_flags = 0; + + if (tp->ups_info.green) + ups_flags |= UPS_FLAGS_EN_GREEN; + + if (tp->ups_info.aldps) + ups_flags |= UPS_FLAGS_EN_ALDPS; + + if (tp->ups_info.eee) + ups_flags |= UPS_FLAGS_EN_EEE; + + if (tp->ups_info.flow_control) + ups_flags |= UPS_FLAGS_EN_FLOW_CTR; + + if (tp->ups_info.eee_ckdiv) + ups_flags |= UPS_FLAGS_EN_EEE_CKDIV; + + if (tp->ups_info.eee_cmod_lv) + ups_flags |= UPS_FLAGS_EEE_CMOD_LV_EN; + + if (tp->ups_info._10m_ckdiv) + ups_flags |= UPS_FLAGS_EN_10M_CKDIV; + + if (tp->ups_info.eee_plloff_100) + ups_flags |= UPS_FLAGS_EEE_PLLOFF_100; + + if (tp->ups_info.eee_plloff_giga) + ups_flags |= UPS_FLAGS_EEE_PLLOFF_GIGA; + + if (tp->ups_info._250m_ckdiv) + ups_flags |= UPS_FLAGS_250M_CKDIV; + + if (tp->ups_info.ctap_short_off) + ups_flags |= UPS_FLAGS_CTAP_SHORT_DIS; + + switch (tp->ups_info.speed_duplex) { + case NWAY_10M_HALF: + ups_flags |= ups_flags_speed(1); + break; + case NWAY_10M_FULL: + ups_flags |= ups_flags_speed(2); + break; + case NWAY_100M_HALF: + ups_flags |= ups_flags_speed(3); + break; + case NWAY_100M_FULL: + ups_flags |= ups_flags_speed(4); + break; + case NWAY_1000M_FULL: + ups_flags |= ups_flags_speed(5); + break; + case FORCE_10M_HALF: + ups_flags |= ups_flags_speed(6); + break; + case FORCE_10M_FULL: + ups_flags |= ups_flags_speed(7); + break; + case FORCE_100M_HALF: + ups_flags |= ups_flags_speed(8); + break; + case FORCE_100M_FULL: + ups_flags |= ups_flags_speed(9); + break; + default: + break; + } - ocp_data = ocp_read_dword(tp, MCU_TYPE_USB, USB_UPS_FLAGS); - ocp_data &= ~clear; - ocp_data |= set; - ocp_write_dword(tp, MCU_TYPE_USB, USB_UPS_FLAGS, ocp_data); + ocp_write_dword(tp, MCU_TYPE_USB, USB_UPS_FLAGS, ups_flags); } static void r8153b_green_en(struct r8152 *tp, bool enable) @@ -2885,7 +2972,7 @@ static void r8153b_green_en(struct r8152 *tp, bool enable) data |= GREEN_ETH_EN; sram_write(tp, SRAM_GREEN_CFG, data); - r8153b_ups_flags_w1w0(tp, UPS_FLAGS_EN_GREEN, 0); + tp->ups_info.green = enable; } static u16 r8153_phy_status(struct r8152 *tp, u16 desired) @@ -2915,6 +3002,8 @@ static void r8153b_ups_en(struct r8152 *tp, bool enable) u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_POWER_CUT); if (enable) { + r8153b_ups_flags(tp); + ocp_data |= UPS_EN | USP_PREWAKE | PHASE2_EN; ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); @@ -3223,16 +3312,8 @@ static void r8153_eee_en(struct r8152 *tp, bool enable) ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); ocp_reg_write(tp, OCP_EEE_CFG, config); -} - -static void r8153b_eee_en(struct r8152 *tp, bool enable) -{ - r8153_eee_en(tp, enable); - if (enable) - r8153b_ups_flags_w1w0(tp, UPS_FLAGS_EN_EEE, 0); - else - r8153b_ups_flags_w1w0(tp, 0, UPS_FLAGS_EN_EEE); + tp->ups_info.eee = enable; } static void rtl_eee_enable(struct r8152 *tp, bool enable) @@ -3254,21 +3335,13 @@ static void rtl_eee_enable(struct r8152 *tp, bool enable) case RTL_VER_04: case RTL_VER_05: case RTL_VER_06: - if (enable) { - r8153_eee_en(tp, true); - ocp_reg_write(tp, OCP_EEE_ADV, tp->eee_adv); - } else { - r8153_eee_en(tp, false); - ocp_reg_write(tp, OCP_EEE_ADV, 0); - } - break; case RTL_VER_08: case RTL_VER_09: if (enable) { - r8153b_eee_en(tp, true); + r8153_eee_en(tp, true); ocp_reg_write(tp, OCP_EEE_ADV, tp->eee_adv); } else { - r8153b_eee_en(tp, false); + r8153_eee_en(tp, false); ocp_reg_write(tp, OCP_EEE_ADV, 0); } break; @@ -3284,6 +3357,8 @@ static void r8152b_enable_fc(struct r8152 *tp) anar = r8152_mdio_read(tp, MII_ADVERTISE); anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; r8152_mdio_write(tp, MII_ADVERTISE, anar); + + tp->ups_info.flow_control = true; } static void rtl8152_disable(struct r8152 *tp) @@ -3477,22 +3552,8 @@ static void r8153_aldps_en(struct r8152 *tp, bool enable) break; } } -} - -static void r8153b_aldps_en(struct r8152 *tp, bool enable) -{ - r8153_aldps_en(tp, enable); - if (enable) - r8153b_ups_flags_w1w0(tp, UPS_FLAGS_EN_ALDPS, 0); - else - r8153b_ups_flags_w1w0(tp, 0, UPS_FLAGS_EN_ALDPS); -} - -static void r8153b_enable_fc(struct r8152 *tp) -{ - r8152b_enable_fc(tp); - r8153b_ups_flags_w1w0(tp, UPS_FLAGS_EN_FLOW_CTR, 0); + tp->ups_info.aldps = enable; } static void r8153_hw_phy_cfg(struct r8152 *tp) @@ -3569,11 +3630,11 @@ static u32 r8152_efuse_read(struct r8152 *tp, u8 addr) static void r8153b_hw_phy_cfg(struct r8152 *tp) { - u32 ocp_data, ups_flags = 0; + u32 ocp_data; u16 data; /* disable ALDPS before updating the PHY parameters */ - r8153b_aldps_en(tp, false); + r8153_aldps_en(tp, false); /* disable EEE before updating the PHY parameters */ rtl_eee_enable(tp, false); @@ -3621,28 +3682,27 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) data = ocp_reg_read(tp, OCP_POWER_CFG); data |= EEE_CLKDIV_EN; ocp_reg_write(tp, OCP_POWER_CFG, data); + tp->ups_info.eee_ckdiv = true; data = ocp_reg_read(tp, OCP_DOWN_SPEED); data |= EN_EEE_CMODE | EN_EEE_1000 | EN_10M_CLKDIV; ocp_reg_write(tp, OCP_DOWN_SPEED, data); + tp->ups_info.eee_cmod_lv = true; + tp->ups_info._10m_ckdiv = true; + tp->ups_info.eee_plloff_giga = true; ocp_reg_write(tp, OCP_SYSCLK_CFG, 0); ocp_reg_write(tp, OCP_SYSCLK_CFG, clk_div_expo(5)); - - ups_flags |= UPS_FLAGS_EN_10M_CKDIV | UPS_FLAGS_250M_CKDIV | - UPS_FLAGS_EN_EEE_CKDIV | UPS_FLAGS_EEE_CMOD_LV_EN | - UPS_FLAGS_EEE_PLLOFF_GIGA; + tp->ups_info._250m_ckdiv = true; r8153_patch_request(tp, false); } - r8153b_ups_flags_w1w0(tp, ups_flags, 0); - if (tp->eee_en) rtl_eee_enable(tp, true); - r8153b_aldps_en(tp, true); - r8153b_enable_fc(tp); + r8153_aldps_en(tp, true); + r8152b_enable_fc(tp); r8153_u2p3en(tp, true); set_bit(PHY_RESET, &tp->flags); @@ -3793,111 +3853,118 @@ static void rtl8153_disable(struct r8152 *tp) r8153_aldps_en(tp, true); } -static void rtl8153b_disable(struct r8152 *tp) +static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex, + u32 advertising) { - r8153b_aldps_en(tp, false); - rtl_disable(tp); - rtl_reset_bmu(tp); - r8153b_aldps_en(tp, true); -} - -static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex) -{ - u16 bmcr, anar, gbcr; - enum spd_duplex speed_duplex; + u16 bmcr; int ret = 0; - anar = r8152_mdio_read(tp, MII_ADVERTISE); - anar &= ~(ADVERTISE_10HALF | ADVERTISE_10FULL | - ADVERTISE_100HALF | ADVERTISE_100FULL); - if (tp->mii.supports_gmii) { - gbcr = r8152_mdio_read(tp, MII_CTRL1000); - gbcr &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF); - } else { - gbcr = 0; - } - if (autoneg == AUTONEG_DISABLE) { - if (speed == SPEED_10) { - bmcr = 0; - anar |= ADVERTISE_10HALF | ADVERTISE_10FULL; - speed_duplex = FORCE_10M_HALF; - } else if (speed == SPEED_100) { - bmcr = BMCR_SPEED100; - anar |= ADVERTISE_100HALF | ADVERTISE_100FULL; - speed_duplex = FORCE_100M_HALF; - } else if (speed == SPEED_1000 && tp->mii.supports_gmii) { - bmcr = BMCR_SPEED1000; - gbcr |= ADVERTISE_1000FULL | ADVERTISE_1000HALF; - speed_duplex = NWAY_1000M_FULL; - } else { - ret = -EINVAL; - goto out; - } + if (duplex != DUPLEX_HALF && duplex != DUPLEX_FULL) + return -EINVAL; - if (duplex == DUPLEX_FULL) { - bmcr |= BMCR_FULLDPLX; - if (speed != SPEED_1000) - speed_duplex++; - } - } else { - if (speed == SPEED_10) { + switch (speed) { + case SPEED_10: + bmcr = BMCR_SPEED10; if (duplex == DUPLEX_FULL) { - anar |= ADVERTISE_10HALF | ADVERTISE_10FULL; - speed_duplex = NWAY_10M_FULL; + bmcr |= BMCR_FULLDPLX; + tp->ups_info.speed_duplex = FORCE_10M_FULL; } else { - anar |= ADVERTISE_10HALF; - speed_duplex = NWAY_10M_HALF; + tp->ups_info.speed_duplex = FORCE_10M_HALF; } - } else if (speed == SPEED_100) { + break; + case SPEED_100: + bmcr = BMCR_SPEED100; if (duplex == DUPLEX_FULL) { - anar |= ADVERTISE_10HALF | ADVERTISE_10FULL; - anar |= ADVERTISE_100HALF | ADVERTISE_100FULL; - speed_duplex = NWAY_100M_FULL; + bmcr |= BMCR_FULLDPLX; + tp->ups_info.speed_duplex = FORCE_100M_FULL; } else { - anar |= ADVERTISE_10HALF; - anar |= ADVERTISE_100HALF; - speed_duplex = NWAY_100M_HALF; + tp->ups_info.speed_duplex = FORCE_100M_HALF; } - } else if (speed == SPEED_1000 && tp->mii.supports_gmii) { - if (duplex == DUPLEX_FULL) { - anar |= ADVERTISE_10HALF | ADVERTISE_10FULL; - anar |= ADVERTISE_100HALF | ADVERTISE_100FULL; - gbcr |= ADVERTISE_1000FULL | ADVERTISE_1000HALF; - } else { - anar |= ADVERTISE_10HALF; - anar |= ADVERTISE_100HALF; - gbcr |= ADVERTISE_1000HALF; + break; + case SPEED_1000: + if (tp->mii.supports_gmii) { + bmcr = BMCR_SPEED1000 | BMCR_FULLDPLX; + tp->ups_info.speed_duplex = NWAY_1000M_FULL; + break; } - speed_duplex = NWAY_1000M_FULL; - } else { + /* fall through */ + default: ret = -EINVAL; goto out; } + if (duplex == DUPLEX_FULL) + tp->mii.full_duplex = 1; + else + tp->mii.full_duplex = 0; + + tp->mii.force_media = 1; + } else { + u16 anar, tmp1; + u32 support; + + support = RTL_ADVERTISED_10_HALF | RTL_ADVERTISED_10_FULL | + RTL_ADVERTISED_100_HALF | RTL_ADVERTISED_100_FULL; + + if (tp->mii.supports_gmii) + support |= RTL_ADVERTISED_1000_FULL; + + if (!(advertising & support)) + return -EINVAL; + + anar = r8152_mdio_read(tp, MII_ADVERTISE); + tmp1 = anar & ~(ADVERTISE_10HALF | ADVERTISE_10FULL | + ADVERTISE_100HALF | ADVERTISE_100FULL); + if (advertising & RTL_ADVERTISED_10_HALF) { + tmp1 |= ADVERTISE_10HALF; + tp->ups_info.speed_duplex = NWAY_10M_HALF; + } + if (advertising & RTL_ADVERTISED_10_FULL) { + tmp1 |= ADVERTISE_10FULL; + tp->ups_info.speed_duplex = NWAY_10M_FULL; + } + + if (advertising & RTL_ADVERTISED_100_HALF) { + tmp1 |= ADVERTISE_100HALF; + tp->ups_info.speed_duplex = NWAY_100M_HALF; + } + if (advertising & RTL_ADVERTISED_100_FULL) { + tmp1 |= ADVERTISE_100FULL; + tp->ups_info.speed_duplex = NWAY_100M_FULL; + } + + if (anar != tmp1) { + r8152_mdio_write(tp, MII_ADVERTISE, tmp1); + tp->mii.advertising = tmp1; + } + + if (tp->mii.supports_gmii) { + u16 gbcr; + + gbcr = r8152_mdio_read(tp, MII_CTRL1000); + tmp1 = gbcr & ~(ADVERTISE_1000FULL | + ADVERTISE_1000HALF); + + if (advertising & RTL_ADVERTISED_1000_FULL) { + tmp1 |= ADVERTISE_1000FULL; + tp->ups_info.speed_duplex = NWAY_1000M_FULL; + } + + if (gbcr != tmp1) + r8152_mdio_write(tp, MII_CTRL1000, tmp1); + } + bmcr = BMCR_ANENABLE | BMCR_ANRESTART; + + tp->mii.force_media = 0; } if (test_and_clear_bit(PHY_RESET, &tp->flags)) bmcr |= BMCR_RESET; - if (tp->mii.supports_gmii) - r8152_mdio_write(tp, MII_CTRL1000, gbcr); - - r8152_mdio_write(tp, MII_ADVERTISE, anar); r8152_mdio_write(tp, MII_BMCR, bmcr); - switch (tp->version) { - case RTL_VER_08: - case RTL_VER_09: - r8153b_ups_flags_w1w0(tp, ups_flags_speed(speed_duplex), - UPS_FLAGS_SPEED_MASK); - break; - - default: - break; - } - if (bmcr & BMCR_RESET) { int i; @@ -3982,12 +4049,12 @@ static void rtl8153b_up(struct r8152 *tp) r8153b_u1u2en(tp, false); r8153_u2p3en(tp, false); - r8153b_aldps_en(tp, false); + r8153_aldps_en(tp, false); r8153_first_init(tp); ocp_write_dword(tp, MCU_TYPE_USB, USB_RX_BUF_TH, RX_THR_B); - r8153b_aldps_en(tp, true); + r8153_aldps_en(tp, true); r8153_u2p3en(tp, true); r8153b_u1u2en(tp, true); } @@ -4002,9 +4069,9 @@ static void rtl8153b_down(struct r8152 *tp) r8153b_u1u2en(tp, false); r8153_u2p3en(tp, false); r8153b_power_cut_en(tp, false); - r8153b_aldps_en(tp, false); + r8153_aldps_en(tp, false); r8153_enter_oob(tp); - r8153b_aldps_en(tp, true); + r8153_aldps_en(tp, true); } static bool rtl8152_in_nway(struct r8152 *tp) @@ -4122,7 +4189,8 @@ static void rtl_hw_phy_work_func_t(struct work_struct *work) tp->rtl_ops.hw_phy_cfg(tp); - rtl8152_set_speed(tp, tp->autoneg, tp->speed, tp->duplex); + rtl8152_set_speed(tp, tp->autoneg, tp->speed, tp->duplex, + tp->advertising); mutex_unlock(&tp->control); @@ -4840,20 +4908,46 @@ static int rtl8152_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { struct r8152 *tp = netdev_priv(dev); + u32 advertising = 0; int ret; ret = usb_autopm_get_interface(tp->intf); if (ret < 0) goto out; + if (test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + cmd->link_modes.advertising)) + advertising |= RTL_ADVERTISED_10_HALF; + + if (test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + cmd->link_modes.advertising)) + advertising |= RTL_ADVERTISED_10_FULL; + + if (test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + cmd->link_modes.advertising)) + advertising |= RTL_ADVERTISED_100_HALF; + + if (test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + cmd->link_modes.advertising)) + advertising |= RTL_ADVERTISED_100_FULL; + + if (test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + cmd->link_modes.advertising)) + advertising |= RTL_ADVERTISED_1000_HALF; + + if (test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + cmd->link_modes.advertising)) + advertising |= RTL_ADVERTISED_1000_FULL; + mutex_lock(&tp->control); ret = rtl8152_set_speed(tp, cmd->base.autoneg, cmd->base.speed, - cmd->base.duplex); + cmd->base.duplex, advertising); if (!ret) { tp->autoneg = cmd->base.autoneg; tp->speed = cmd->base.speed; tp->duplex = cmd->base.duplex; + tp->advertising = advertising; } mutex_unlock(&tp->control); @@ -5383,7 +5477,7 @@ static int rtl_ops_init(struct r8152 *tp) case RTL_VER_09: ops->init = r8153b_init; ops->enable = rtl8153_enable; - ops->disable = rtl8153b_disable; + ops->disable = rtl8153_disable; ops->up = rtl8153b_up; ops->down = rtl8153b_down; ops->unload = rtl8153b_unload; @@ -5568,7 +5662,13 @@ static int rtl8152_probe(struct usb_interface *intf, tp->mii.phy_id = R8152_PHY_ID; tp->autoneg = AUTONEG_ENABLE; - tp->speed = tp->mii.supports_gmii ? SPEED_1000 : SPEED_100; + tp->speed = SPEED_100; + tp->advertising = RTL_ADVERTISED_10_HALF | RTL_ADVERTISED_10_FULL | + RTL_ADVERTISED_100_HALF | RTL_ADVERTISED_100_FULL; + if (tp->mii.supports_gmii) { + tp->speed = SPEED_1000; + tp->advertising |= RTL_ADVERTISED_1000_FULL; + } tp->duplex = DUPLEX_FULL; tp->rx_copybreak = RTL8152_RXFG_HEADSZ; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f9a506147c8a..5b9d22338606 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -24,6 +24,9 @@ struct seq_file; struct btf; struct btf_type; +extern struct idr btf_idr; +extern spinlock_t btf_idr_lock; + /* map is generic key/value storage optionally accesible by eBPF programs */ struct bpf_map_ops { /* funcs callable from userspace (via syscall) */ @@ -647,6 +650,8 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); +struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map, + bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5fe99f322b1c..26a6d58ca78c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -355,6 +355,7 @@ struct bpf_verifier_env { struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ + bool test_state_freq; /* test verifier with different pruning frequency */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ struct bpf_verifier_state_list *free_list; diff --git a/include/linux/can/can-ml.h b/include/linux/can/can-ml.h new file mode 100644 index 000000000000..2f5d731ae251 --- /dev/null +++ b/include/linux/can/can-ml.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* Copyright (c) 2002-2007 Volkswagen Group Electronic Research + * Copyright (c) 2017 Pengutronix, Marc Kleine-Budde <[email protected]> + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Volkswagen nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * The provided data structures and external interfaces from this code + * are not restricted to be used by modules with a GPL compatible license. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#ifndef CAN_ML_H +#define CAN_ML_H + +#include <linux/can.h> +#include <linux/list.h> + +#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS) +#define CAN_EFF_RCV_HASH_BITS 10 +#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS) + +enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX }; + +struct can_dev_rcv_lists { + struct hlist_head rx[RX_MAX]; + struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ]; + struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ]; + int entries; +}; + +struct can_ml_priv { + struct can_dev_rcv_lists dev_rcv_lists; +#ifdef CAN_J1939 + struct j1939_priv *j1939_priv; +#endif +}; + +#endif /* CAN_ML_H */ diff --git a/include/linux/can/core.h b/include/linux/can/core.h index 708c10d3417a..8339071ab08b 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -41,6 +41,14 @@ struct can_proto { struct proto *prot; }; +/* required_size + * macro to find the minimum size of a struct + * that includes a requested member + */ +#define CAN_REQUIRED_SIZE(struct_type, member) \ + (offsetof(typeof(struct_type), member) + \ + sizeof(((typeof(struct_type) *)(NULL))->member)) + /* function prototypes for the CAN networklayer core (af_can.c) */ extern int can_proto_register(const struct can_proto *cp); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index f01623aef2f7..9b3c720a31b1 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -169,7 +169,8 @@ void can_change_state(struct net_device *dev, struct can_frame *cf, void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx); -struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr); +struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, + u8 *len_ptr); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); void can_free_echo_skb(struct net_device *dev, unsigned int idx); diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index 9daa1119ea42..01219f2902bf 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -15,7 +15,8 @@ struct can_rx_offload { struct net_device *dev; - unsigned int (*mailbox_read)(struct can_rx_offload *offload, struct can_frame *cf, + unsigned int (*mailbox_read)(struct can_rx_offload *offload, + struct can_frame *cf, u32 *timestamp, unsigned int mb); struct sk_buff_head skb_queue; @@ -29,9 +30,13 @@ struct can_rx_offload { bool inc; }; -int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload *offload); -int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight); -int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg); +int can_rx_offload_add_timestamp(struct net_device *dev, + struct can_rx_offload *offload); +int can_rx_offload_add_fifo(struct net_device *dev, + struct can_rx_offload *offload, + unsigned int weight); +int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, + u64 reg); int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); int can_rx_offload_queue_sorted(struct can_rx_offload *offload, struct sk_buff *skb, u32 timestamp); diff --git a/include/linux/mdio.h b/include/linux/mdio.h index e8242ad88c81..a7604248777b 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -68,6 +68,17 @@ struct mdio_driver { #define to_mdio_driver(d) \ container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv) +/* device driver data */ +static inline void mdiodev_set_drvdata(struct mdio_device *mdio, void *data) +{ + dev_set_drvdata(&mdio->dev, data); +} + +static inline void *mdiodev_get_drvdata(struct mdio_device *mdio) +{ + return dev_get_drvdata(&mdio->dev); +} + void mdio_device_free(struct mdio_device *mdiodev); struct mdio_device *mdio_device_create(struct mii_bus *bus, int addr); int mdio_device_register(struct mdio_device *mdiodev); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index c2f056b5766d..8dd081051a79 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1162,6 +1162,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap) +#define MLX5_CAP64_FLOWTABLE(mdev, cap) \ + MLX5_GET64(flow_table_nic_cap, (mdev)->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap) + #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->caps.hca_max[MLX5_CAP_FLOW_TABLE], cap) @@ -1225,6 +1228,10 @@ enum mlx5_qcam_feature_groups { MLX5_GET(e_switch_cap, \ mdev->caps.hca_cur[MLX5_CAP_ESWITCH], cap) +#define MLX5_CAP64_ESW_FLOWTABLE(mdev, cap) \ + MLX5_GET64(flow_table_eswitch_cap, \ + (mdev)->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) + #define MLX5_CAP_ESW_MAX(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->caps.hca_max[MLX5_CAP_ESWITCH], cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 52a56d741f79..3e80f03a387f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -626,6 +626,11 @@ struct mlx5e_resources { struct mlx5_sq_bfreg bfreg; }; +enum mlx5_sw_icm_type { + MLX5_SW_ICM_TYPE_STEERING, + MLX5_SW_ICM_TYPE_HEADER_MODIFY, +}; + #define MLX5_MAX_RESERVED_GIDS 8 struct mlx5_rsvd_gids { @@ -657,11 +662,15 @@ struct mlx5_clock { struct mlx5_pps pps_info; }; +struct mlx5_dm; struct mlx5_fw_tracer; struct mlx5_vxlan; struct mlx5_geneve; struct mlx5_hv_vhca; +#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) +#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) + struct mlx5_core_dev { struct device *device; enum mlx5_coredev_type coredev_type; @@ -695,6 +704,7 @@ struct mlx5_core_dev { atomic_t num_qps; u32 issi; struct mlx5e_resources mlx5e_res; + struct mlx5_dm *dm; struct mlx5_vxlan *vxlan; struct mlx5_geneve *geneve; struct { @@ -1078,6 +1088,10 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, size_t *offsets); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); +int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, + u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id); +int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, + u64 length, u16 uid, phys_addr_t addr, u32 obj_id); #ifdef CONFIG_MLX5_CORE_IPOIB struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 38a70d16d8d5..98e667b176ef 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -60,7 +60,6 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, u16 vport_num); void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type); -u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport_num, u32 sqn); @@ -74,7 +73,14 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, u16 vport_num); +u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); #else /* CONFIG_MLX5_ESWITCH */ + +static inline u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) +{ + return MLX5_ESWITCH_NONE; +} + static inline enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 97ec6be62ac4..724d276ea133 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -84,6 +84,8 @@ enum { FDB_SLOW_PATH, }; +struct mlx5_pkt_reformat; +struct mlx5_modify_hdr; struct mlx5_flow_table; struct mlx5_flow_group; struct mlx5_flow_namespace; @@ -121,7 +123,7 @@ struct mlx5_flow_destination { struct { u16 num; u16 vhca_id; - u32 reformat_id; + struct mlx5_pkt_reformat *pkt_reformat; u8 flags; } vport; }; @@ -195,8 +197,8 @@ enum { struct mlx5_flow_act { u32 action; - u32 reformat_id; - u32 modify_id; + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_pkt_reformat *pkt_reformat; uintptr_t esp_id; u32 flags; struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; @@ -205,8 +207,6 @@ struct mlx5_flow_act { #define MLX5_DECLARE_FLOW_ACT(name) \ struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\ - .reformat_id = 0, \ - .modify_id = 0, \ .flags = 0, } /* Single destination per rule. @@ -236,19 +236,18 @@ u32 mlx5_fc_id(struct mlx5_fc *counter); int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); -int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, - u8 namespace, u8 num_actions, - void *modify_actions, u32 *modify_header_id); +struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 ns_type, u8 num_actions, + void *modify_actions); void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, - u32 modify_header_id); - -int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, - int reformat_type, - size_t size, - void *reformat_data, - enum mlx5_flow_namespace_type namespace, - u32 *packet_reformat_id); + struct mlx5_modify_hdr *modify_hdr); + +struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type ns_type); void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, - u32 packet_reformat_id); + struct mlx5_pkt_reformat *reformat); #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 66b60afd5e06..7d65c0578ac9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -282,6 +282,7 @@ enum { MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942, + MLX5_CMD_OP_SYNC_STEERING = 0xb00, MLX5_CMD_OP_FPGA_CREATE_QP = 0x960, MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961, MLX5_CMD_OP_FPGA_QUERY_QP = 0x962, @@ -485,7 +486,11 @@ union mlx5_ifc_gre_key_bits { }; struct mlx5_ifc_fte_match_set_misc_bits { - u8 reserved_at_0[0x8]; + u8 gre_c_present[0x1]; + u8 reserved_auto1[0x1]; + u8 gre_k_present[0x1]; + u8 gre_s_present[0x1]; + u8 source_vhca_port[0x4]; u8 source_sqn[0x18]; u8 source_eswitch_owner_vhca_id[0x10]; @@ -565,12 +570,38 @@ struct mlx5_ifc_fte_match_set_misc2_bits { u8 metadata_reg_a[0x20]; - u8 reserved_at_1a0[0x60]; + u8 metadata_reg_b[0x20]; + + u8 reserved_at_1c0[0x40]; }; struct mlx5_ifc_fte_match_set_misc3_bits { - u8 reserved_at_0[0x120]; + u8 inner_tcp_seq_num[0x20]; + + u8 outer_tcp_seq_num[0x20]; + + u8 inner_tcp_ack_num[0x20]; + + u8 outer_tcp_ack_num[0x20]; + + u8 reserved_at_80[0x8]; + u8 outer_vxlan_gpe_vni[0x18]; + + u8 outer_vxlan_gpe_next_protocol[0x8]; + u8 outer_vxlan_gpe_flags[0x8]; + u8 reserved_at_b0[0x10]; + + u8 icmp_header_data[0x20]; + + u8 icmpv6_header_data[0x20]; + + u8 icmp_type[0x8]; + u8 icmp_code[0x8]; + u8 icmpv6_type[0x8]; + u8 icmpv6_code[0x8]; + u8 geneve_tlv_option_0_data[0x20]; + u8 reserved_at_140[0xc0]; }; @@ -666,7 +697,15 @@ struct mlx5_ifc_flow_table_nic_cap_bits { struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer; - u8 reserved_at_e00[0x7200]; + u8 reserved_at_e00[0x1200]; + + u8 sw_steering_nic_rx_action_drop_icm_address[0x40]; + + u8 sw_steering_nic_tx_action_drop_icm_address[0x40]; + + u8 sw_steering_nic_tx_action_allow_icm_address[0x40]; + + u8 reserved_at_20c0[0x5f40]; }; enum { @@ -698,7 +737,17 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress; - u8 reserved_at_800[0x7800]; + u8 reserved_at_800[0x1000]; + + u8 sw_steering_fdb_action_drop_icm_address_rx[0x40]; + + u8 sw_steering_fdb_action_drop_icm_address_tx[0x40]; + + u8 sw_steering_uplink_icm_address_rx[0x40]; + + u8 sw_steering_uplink_icm_address_tx[0x40]; + + u8 reserved_at_1900[0x6700]; }; enum { @@ -849,6 +898,25 @@ struct mlx5_ifc_roce_cap_bits { u8 reserved_at_100[0x700]; }; +struct mlx5_ifc_sync_steering_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0xc0]; +}; + +struct mlx5_ifc_sync_steering_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_device_mem_cap_bits { u8 memic[0x1]; u8 reserved_at_1[0x1f]; @@ -1042,6 +1110,12 @@ enum { }; enum { + MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED = 1 << 7, + MLX5_FLEX_PARSER_ICMP_V4_ENABLED = 1 << 8, + MLX5_FLEX_PARSER_ICMP_V6_ENABLED = 1 << 9, +}; + +enum { MLX5_UCTX_CAP_RAW_TX = 1UL << 0, MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1, }; @@ -1414,7 +1488,14 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_6c0[0x4]; u8 flex_parser_id_geneve_tlv_option_0[0x4]; - u8 reserved_at_6c8[0x28]; + u8 flex_parser_id_icmp_dw1[0x4]; + u8 flex_parser_id_icmp_dw0[0x4]; + u8 flex_parser_id_icmpv6_dw1[0x4]; + u8 flex_parser_id_icmpv6_dw0[0x4]; + u8 flex_parser_id_outer_first_mpls_over_gre[0x4]; + u8 flex_parser_id_outer_first_mpls_over_udp_label[0x4]; + + u8 reserved_at_6e0[0x10]; u8 sf_base_id[0x10]; u8 reserved_at_700[0x80]; @@ -2652,6 +2733,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_debug_cap_bits debug_cap; struct mlx5_ifc_fpga_cap_bits fpga_cap; struct mlx5_ifc_tls_cap_bits tls_cap; + struct mlx5_ifc_device_mem_cap_bits device_mem_cap; u8 reserved_at_0[0x8000]; }; @@ -3255,7 +3337,11 @@ struct mlx5_ifc_esw_vport_context_bits { u8 cvlan_pcp[0x3]; u8 cvlan_id[0xc]; - u8 reserved_at_60[0x7a0]; + u8 reserved_at_60[0x720]; + + u8 sw_steering_vport_icm_address_rx[0x40]; + + u8 sw_steering_vport_icm_address_tx[0x40]; }; enum { @@ -4941,23 +5027,98 @@ struct mlx5_ifc_query_hca_cap_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_function[0x1]; + u8 reserved_at_41[0xf]; + u8 function_id[0x10]; + + u8 reserved_at_60[0x20]; }; -struct mlx5_ifc_query_flow_table_out_bits { +struct mlx5_ifc_other_hca_cap_bits { + u8 roce[0x1]; + u8 reserved_0[0x27f]; +}; + +struct mlx5_ifc_query_other_hca_cap_out_bits { u8 status[0x8]; - u8 reserved_at_8[0x18]; + u8 reserved_0[0x18]; u8 syndrome[0x20]; - u8 reserved_at_40[0x80]; + u8 reserved_1[0x40]; - u8 reserved_at_c0[0x8]; + struct mlx5_ifc_other_hca_cap_bits other_capability; +}; + +struct mlx5_ifc_query_other_hca_cap_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_modify_other_hca_cap_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_other_hca_cap_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + u8 field_select[0x20]; + + struct mlx5_ifc_other_hca_cap_bits other_capability; +}; + +struct mlx5_ifc_flow_table_context_bits { + u8 reformat_en[0x1]; + u8 decap_en[0x1]; + u8 sw_owner[0x1]; + u8 termination_table[0x1]; + u8 table_miss_action[0x4]; u8 level[0x8]; - u8 reserved_at_d0[0x8]; + u8 reserved_at_10[0x8]; u8 log_size[0x8]; - u8 reserved_at_e0[0x120]; + u8 reserved_at_20[0x8]; + u8 table_miss_id[0x18]; + + u8 reserved_at_40[0x8]; + u8 lag_master_next_table_id[0x18]; + + u8 reserved_at_60[0x60]; + + u8 sw_owner_icm_root_1[0x40]; + + u8 sw_owner_icm_root_0[0x40]; + +}; + +struct mlx5_ifc_query_flow_table_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x80]; + + struct mlx5_ifc_flow_table_context_bits flow_table_context; }; struct mlx5_ifc_query_flow_table_in_bits { @@ -5227,7 +5388,7 @@ struct mlx5_ifc_alloc_packet_reformat_context_out_bits { u8 reserved_at_60[0x20]; }; -enum { +enum mlx5_reformat_ctx_type { MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0, MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1, MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2, @@ -5323,7 +5484,16 @@ enum { MLX5_ACTION_IN_FIELD_OUT_DIPV4 = 0x16, MLX5_ACTION_IN_FIELD_OUT_FIRST_VID = 0x17, MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47, + MLX5_ACTION_IN_FIELD_METADATA_REG_A = 0x49, + MLX5_ACTION_IN_FIELD_METADATA_REG_B = 0x50, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0 = 0x51, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_1 = 0x52, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_2 = 0x53, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_3 = 0x54, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_4 = 0x55, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_5 = 0x56, + MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM = 0x59, + MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM = 0x5B, }; struct mlx5_ifc_alloc_modify_header_context_out_bits { @@ -7371,35 +7541,26 @@ struct mlx5_ifc_create_mkey_in_bits { u8 klm_pas_mtt[0][0x20]; }; +enum { + MLX5_FLOW_TABLE_TYPE_NIC_RX = 0x0, + MLX5_FLOW_TABLE_TYPE_NIC_TX = 0x1, + MLX5_FLOW_TABLE_TYPE_ESW_EGRESS_ACL = 0x2, + MLX5_FLOW_TABLE_TYPE_ESW_INGRESS_ACL = 0x3, + MLX5_FLOW_TABLE_TYPE_FDB = 0X4, + MLX5_FLOW_TABLE_TYPE_SNIFFER_RX = 0X5, + MLX5_FLOW_TABLE_TYPE_SNIFFER_TX = 0X6, +}; + struct mlx5_ifc_create_flow_table_out_bits { u8 status[0x8]; - u8 reserved_at_8[0x18]; + u8 icm_address_63_40[0x18]; u8 syndrome[0x20]; - u8 reserved_at_40[0x8]; + u8 icm_address_39_32[0x8]; u8 table_id[0x18]; - u8 reserved_at_60[0x20]; -}; - -struct mlx5_ifc_flow_table_context_bits { - u8 reformat_en[0x1]; - u8 decap_en[0x1]; - u8 reserved_at_2[0x1]; - u8 termination_table[0x1]; - u8 table_miss_action[0x4]; - u8 level[0x8]; - u8 reserved_at_10[0x8]; - u8 log_size[0x8]; - - u8 reserved_at_20[0x8]; - u8 table_miss_id[0x18]; - - u8 reserved_at_40[0x8]; - u8 lag_master_next_table_id[0x18]; - - u8 reserved_at_60[0xe0]; + u8 icm_address_31_0[0x20]; }; struct mlx5_ifc_create_flow_table_in_bits { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b5d28dadf964..d7d5626002e9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -901,6 +901,10 @@ struct netdev_bpf { }; }; +/* Flags for ndo_xsk_wakeup. */ +#define XDP_WAKEUP_RX (1 << 0) +#define XDP_WAKEUP_TX (1 << 1) + #ifdef CONFIG_XFRM_OFFLOAD struct xfrmdev_ops { int (*xdo_dev_state_add) (struct xfrm_state *x); @@ -1227,6 +1231,12 @@ struct tlsdev_ops; * that got dropped are freed/returned via xdp_return_frame(). * Returns negative number, means general error invoking ndo, meaning * no frames were xmit'ed and core-caller will free all frames. + * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); + * This function is used to wake up the softirq, ksoftirqd or kthread + * responsible for sending and/or receiving packets on a specific + * queue id bound to an AF_XDP socket. The flags field specifies if + * only RX, only Tx, or both should be woken up using the flags + * XDP_WAKEUP_RX and XDP_WAKEUP_TX. * struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev); * Get devlink port instance associated with a given netdev. * Called with a reference on the netdevice and devlink locks only, @@ -1426,8 +1436,8 @@ struct net_device_ops { int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp, u32 flags); - int (*ndo_xsk_async_xmit)(struct net_device *dev, - u32 queue_id); + int (*ndo_xsk_wakeup)(struct net_device *dev, + u32 queue_id, u32 flags); struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 77c6dc88e95d..028e684fa974 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -279,6 +279,16 @@ struct nf_bridge_info { }; #endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +/* Chain in tc_skb_ext will be used to share the tc chain with + * ovs recirc_id. It will be set to the current chain by tc + * and read by ovs to recirc_id. + */ +struct tc_skb_ext { + __u32 chain; +}; +#endif + struct sk_buff_head { /* These two members must be first. */ struct sk_buff *next; @@ -4058,6 +4068,9 @@ enum skb_ext_id { #ifdef CONFIG_XFRM SKB_EXT_SEC_PATH, #endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + TC_SKB_EXT, +#endif SKB_EXT_NUM, /* must be last */ }; diff --git a/include/linux/tnum.h b/include/linux/tnum.h index c7dc2b5902c0..c17af77f3fae 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -5,6 +5,10 @@ * propagate the unknown bits such that the tnum result represents all the * possible results for possible values of the operands. */ + +#ifndef _LINUX_TNUM_H +#define _LINUX_TNUM_H + #include <linux/types.h> struct tnum { @@ -81,3 +85,5 @@ bool tnum_in(struct tnum a, struct tnum b); int tnum_strn(char *str, size_t size, struct tnum a); /* Format a tnum as tristate binary expansion */ int tnum_sbin(char *str, size_t size, struct tnum a); + +#endif /* _LINUX_TNUM_H */ diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h index b9dcb02e756b..8e4f831d2e52 100644 --- a/include/net/bpf_sk_storage.h +++ b/include/net/bpf_sk_storage.h @@ -10,4 +10,14 @@ void bpf_sk_storage_free(struct sock *sk); extern const struct bpf_func_proto bpf_sk_storage_get_proto; extern const struct bpf_func_proto bpf_sk_storage_delete_proto; +#ifdef CONFIG_BPF_SYSCALL +int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk); +#else +static inline int bpf_sk_storage_clone(const struct sock *sk, + struct sock *newsk) +{ + return 0; +} +#endif + #endif /* _BPF_SK_STORAGE_H */ diff --git a/include/net/devlink.h b/include/net/devlink.h index 13523b0a0642..460bc629d1a4 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -458,6 +458,13 @@ enum devlink_param_generic_id { /* Maker of the board */ #define DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE "board.manufacture" +/* Part number, identifier of asic design */ +#define DEVLINK_INFO_VERSION_GENERIC_ASIC_ID "asic.id" +/* Revision of asic design */ +#define DEVLINK_INFO_VERSION_GENERIC_ASIC_REV "asic.rev" + +/* Overall FW version */ +#define DEVLINK_INFO_VERSION_GENERIC_FW "fw" /* Control processor FW version */ #define DEVLINK_INFO_VERSION_GENERIC_FW_MGMT "fw.mgmt" /* Data path microcode controlling high-speed packet processing */ diff --git a/include/net/netns/can.h b/include/net/netns/can.h index ca9bd9fba5b5..b6ab7d1530d7 100644 --- a/include/net/netns/can.h +++ b/include/net/netns/can.h @@ -9,8 +9,8 @@ #include <linux/spinlock.h> struct can_dev_rcv_lists; -struct s_stats; -struct s_pstats; +struct can_pkg_stats; +struct can_rcv_lists_stats; struct netns_can { #if IS_ENABLED(CONFIG_PROC_FS) @@ -28,11 +28,11 @@ struct netns_can { #endif /* receive filters subscribed for 'all' CAN devices */ - struct can_dev_rcv_lists *can_rx_alldev_list; - spinlock_t can_rcvlists_lock; - struct timer_list can_stattimer;/* timer for statistics update */ - struct s_stats *can_stats; /* packet statistics */ - struct s_pstats *can_pstats; /* receive list statistics */ + struct can_dev_rcv_lists *rx_alldev_list; + spinlock_t rcvlists_lock; + struct timer_list stattimer; /* timer for statistics update */ + struct can_pkg_stats *pkg_stats; + struct can_rcv_lists_stats *rcv_lists_stats; /* CAN GW per-net gateway jobs */ struct hlist_head cgw_list; diff --git a/include/net/tls.h b/include/net/tls.h index ec3c3ed2c6c3..c664e6dba0d1 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -275,16 +275,6 @@ struct tls_context { struct proto *sk_proto; void (*sk_destruct)(struct sock *sk); - void (*sk_proto_close)(struct sock *sk, long timeout); - - int (*setsockopt)(struct sock *sk, int level, - int optname, char __user *optval, - unsigned int optlen); - int (*getsockopt)(struct sock *sk, int level, - int optname, char __user *optval, - int __user *optlen); - int (*hash)(struct sock *sk); - void (*unhash)(struct sock *sk); union tls_crypto_context crypto_send; union tls_crypto_context crypto_recv; @@ -376,13 +366,9 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); -int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_device_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -void tls_device_free_resources_tx(struct sock *sk); -void tls_device_init(void); -void tls_device_cleanup(void); int tls_tx_records(struct sock *sk, int flags); struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, @@ -659,7 +645,6 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, unsigned char *record_type); void tls_register_device(struct tls_device *device); void tls_unregister_device(struct tls_device *device); -int tls_device_decrypted(struct sock *sk, struct sk_buff *skb); int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout); struct sk_buff *tls_encrypt_skb(struct sk_buff *skb); @@ -672,9 +657,40 @@ int tls_sw_fallback_init(struct sock *sk, struct tls_offload_context_tx *offload_ctx, struct tls_crypto_info *crypto_info); +#ifdef CONFIG_TLS_DEVICE +void tls_device_init(void); +void tls_device_cleanup(void); +int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); +void tls_device_free_resources_tx(struct sock *sk); int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx); - void tls_device_offload_cleanup_rx(struct sock *sk); void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq); +int tls_device_decrypted(struct sock *sk, struct sk_buff *skb); +#else +static inline void tls_device_init(void) {} +static inline void tls_device_cleanup(void) {} +static inline int +tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +{ + return -EOPNOTSUPP; +} + +static inline void tls_device_free_resources_tx(struct sock *sk) {} + +static inline int +tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) +{ + return -EOPNOTSUPP; +} + +static inline void tls_device_offload_cleanup_rx(struct sock *sk) {} +static inline void +tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {} + +static inline int tls_device_decrypted(struct sock *sk, struct sk_buff *skb) +{ + return 0; +} +#endif #endif /* _TLS_OFFLOAD_H */ diff --git a/include/net/vxlan.h b/include/net/vxlan.h index dc1583a1fb8a..335283dbe9b3 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -391,7 +391,7 @@ static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) if (ipa->sa.sa_family == AF_INET6) return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr); else - return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); + return ipv4_is_multicast(ipa->sin.sin_addr.s_addr); } #else /* !IS_ENABLED(CONFIG_IPV6) */ @@ -403,7 +403,7 @@ static inline bool vxlan_addr_any(const union vxlan_addr *ipa) static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) { - return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); + return ipv4_is_multicast(ipa->sin.sin_addr.s_addr); } #endif /* IS_ENABLED(CONFIG_IPV6) */ diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 69796d264f06..c9398ce7960f 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -16,6 +16,13 @@ struct net_device; struct xsk_queue; +/* Masks for xdp_umem_page flags. + * The low 12-bits of the addr will be 0 since this is the page address, so we + * can use them for flags. + */ +#define XSK_NEXT_PG_CONTIG_SHIFT 0 +#define XSK_NEXT_PG_CONTIG_MASK (1ULL << XSK_NEXT_PG_CONTIG_SHIFT) + struct xdp_umem_page { void *addr; dma_addr_t dma; @@ -27,6 +34,13 @@ struct xdp_umem_fq_reuse { u64 handles[]; }; +/* Flags for the umem flags field. + * + * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public + * flags. See inlude/uapi/include/linux/if_xdp.h. + */ +#define XDP_UMEM_USES_NEED_WAKEUP (1 << 1) + struct xdp_umem { struct xsk_queue *fq; struct xsk_queue *cq; @@ -41,15 +55,27 @@ struct xdp_umem { struct work_struct work; struct page **pgs; u32 npgs; + u16 queue_id; + u8 need_wakeup; + u8 flags; int id; struct net_device *dev; struct xdp_umem_fq_reuse *fq_reuse; - u16 queue_id; bool zc; spinlock_t xsk_list_lock; struct list_head xsk_list; }; +/* Nodes are linked in the struct xdp_sock map_list field, and used to + * track which maps a certain socket reside in. + */ +struct xsk_map; +struct xsk_map_node { + struct list_head node; + struct xsk_map *map; + struct xdp_sock **map_entry; +}; + struct xdp_sock { /* struct sock must be the first member of struct xdp_sock */ struct sock sk; @@ -75,6 +101,9 @@ struct xdp_sock { /* Protects generic receive. */ spinlock_t rx_lock; u64 rx_dropped; + struct list_head map_list; + /* Protects map_list */ + spinlock_t map_list_lock; }; struct xdp_buff; @@ -95,15 +124,47 @@ struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem, struct xdp_umem_fq_reuse *newq); void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq); struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id); +void xsk_set_rx_need_wakeup(struct xdp_umem *umem); +void xsk_set_tx_need_wakeup(struct xdp_umem *umem); +void xsk_clear_rx_need_wakeup(struct xdp_umem *umem); +void xsk_clear_tx_need_wakeup(struct xdp_umem *umem); +bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem); + +void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, + struct xdp_sock **map_entry); +int xsk_map_inc(struct xsk_map *map); +void xsk_map_put(struct xsk_map *map); + +static inline u64 xsk_umem_extract_addr(u64 addr) +{ + return addr & XSK_UNALIGNED_BUF_ADDR_MASK; +} + +static inline u64 xsk_umem_extract_offset(u64 addr) +{ + return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT; +} + +static inline u64 xsk_umem_add_offset_to_addr(u64 addr) +{ + return xsk_umem_extract_addr(addr) + xsk_umem_extract_offset(addr); +} static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) { - return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1)); + unsigned long page_addr; + + addr = xsk_umem_add_offset_to_addr(addr); + page_addr = (unsigned long)umem->pages[addr >> PAGE_SHIFT].addr; + + return (char *)(page_addr & PAGE_MASK) + (addr & ~PAGE_MASK); } static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) { - return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1)); + addr = xsk_umem_add_offset_to_addr(addr); + + return umem->pages[addr >> PAGE_SHIFT].dma + (addr & ~PAGE_MASK); } /* Reuse-queue aware version of FILL queue helpers */ @@ -144,6 +205,19 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr) rq->handles[rq->length++] = addr; } + +/* Handle the offset appropriately depending on aligned or unaligned mode. + * For unaligned mode, we store the offset in the upper 16-bits of the address. + * For aligned mode, we simply add the offset to the address. + */ +static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address, + u64 offset) +{ + if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) + return address + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); + else + return address + offset; +} #else static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { @@ -213,6 +287,21 @@ static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, return NULL; } +static inline u64 xsk_umem_extract_addr(u64 addr) +{ + return 0; +} + +static inline u64 xsk_umem_extract_offset(u64 addr) +{ + return 0; +} + +static inline u64 xsk_umem_add_offset_to_addr(u64 addr) +{ + return 0; +} + static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) { return NULL; @@ -241,6 +330,33 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr) { } +static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem) +{ +} + +static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem) +{ +} + +static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem) +{ +} + +static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem) +{ +} + +static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem) +{ + return false; +} + +static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle, + u64 offset) +{ + return 0; +} + #endif /* CONFIG_XDP_SOCKETS */ #endif /* _LINUX_XDP_SOCK_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0e66371bea13..77c6be96d676 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -106,6 +106,7 @@ enum bpf_cmd { BPF_TASK_FD_QUERY, BPF_MAP_LOOKUP_AND_DELETE_ELEM, BPF_MAP_FREEZE, + BPF_BTF_GET_NEXT_ID, }; enum bpf_map_type { @@ -284,6 +285,9 @@ enum bpf_attach_type { */ #define BPF_F_TEST_RND_HI32 (1U << 2) +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_STATE_FREQ (1U << 3) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * two extensions: * @@ -337,6 +341,9 @@ enum bpf_attach_type { #define BPF_F_RDONLY_PROG (1U << 7) #define BPF_F_WRONLY_PROG (1U << 8) +/* Clone map from listener for newly accepted socket */ +#define BPF_F_CLONE (1U << 9) + /* flags for BPF_PROG_QUERY */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) @@ -576,6 +583,8 @@ union bpf_attr { * limited to five). * * Each time the helper is called, it appends a line to the trace. + * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is + * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. * The format of the trace is customizable, and the exact output * one will get depends on the options set in * *\/sys/kernel/debug/tracing/trace_options* (see also the @@ -1014,7 +1023,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1076,7 +1085,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags) + * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1725,7 +1734,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_override_return(struct pt_reg *regs, u64 rc) + * int bpf_override_return(struct pt_regs *regs, u64 rc) * Description * Used for error injection, this helper uses kprobes to override * the return value of the probed function, and to set it to *rc*. diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 0afb7d8e867f..1e988fdeba34 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -157,7 +157,8 @@ struct canfd_frame { #define CAN_TP20 4 /* VAG Transport Protocol v2.0 */ #define CAN_MCNET 5 /* Bosch MCNet */ #define CAN_ISOTP 6 /* ISO 15765-2 Transport Protocol */ -#define CAN_NPROTO 7 +#define CAN_J1939 7 /* SAE J1939 */ +#define CAN_NPROTO 8 #define SOL_CAN_BASE 100 @@ -174,6 +175,23 @@ struct sockaddr_can { /* transport protocol class address information (e.g. ISOTP) */ struct { canid_t rx_id, tx_id; } tp; + /* J1939 address information */ + struct { + /* 8 byte name when using dynamic addressing */ + __u64 name; + + /* pgn: + * 8 bit: PS in PDU2 case, else 0 + * 8 bit: PF + * 1 bit: DP + * 1 bit: reserved + */ + __u32 pgn; + + /* 1 byte address */ + __u8 addr; + } j1939; + /* reserved for future CAN protocols address information */ } can_addr; }; diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h new file mode 100644 index 000000000000..c32325342d30 --- /dev/null +++ b/include/uapi/linux/can/j1939.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * j1939.h + * + * Copyright (c) 2010-2011 EIA Electronics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _UAPI_CAN_J1939_H_ +#define _UAPI_CAN_J1939_H_ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/can.h> + +#define J1939_MAX_UNICAST_ADDR 0xfd +#define J1939_IDLE_ADDR 0xfe +#define J1939_NO_ADDR 0xff /* == broadcast or no addr */ +#define J1939_NO_NAME 0 +#define J1939_PGN_REQUEST 0x0ea00 /* Request PG */ +#define J1939_PGN_ADDRESS_CLAIMED 0x0ee00 /* Address Claimed */ +#define J1939_PGN_ADDRESS_COMMANDED 0x0fed8 /* Commanded Address */ +#define J1939_PGN_PDU1_MAX 0x3ff00 +#define J1939_PGN_MAX 0x3ffff +#define J1939_NO_PGN 0x40000 + +/* J1939 Parameter Group Number + * + * bit 0-7 : PDU Specific (PS) + * bit 8-15 : PDU Format (PF) + * bit 16 : Data Page (DP) + * bit 17 : Reserved (R) + * bit 19-31 : set to zero + */ +typedef __u32 pgn_t; + +/* J1939 Priority + * + * bit 0-2 : Priority (P) + * bit 3-7 : set to zero + */ +typedef __u8 priority_t; + +/* J1939 NAME + * + * bit 0-20 : Identity Number + * bit 21-31 : Manufacturer Code + * bit 32-34 : ECU Instance + * bit 35-39 : Function Instance + * bit 40-47 : Function + * bit 48 : Reserved + * bit 49-55 : Vehicle System + * bit 56-59 : Vehicle System Instance + * bit 60-62 : Industry Group + * bit 63 : Arbitrary Address Capable + */ +typedef __u64 name_t; + +/* J1939 socket options */ +#define SOL_CAN_J1939 (SOL_CAN_BASE + CAN_J1939) +enum { + SO_J1939_FILTER = 1, /* set filters */ + SO_J1939_PROMISC = 2, /* set/clr promiscuous mode */ + SO_J1939_SEND_PRIO = 3, + SO_J1939_ERRQUEUE = 4, +}; + +enum { + SCM_J1939_DEST_ADDR = 1, + SCM_J1939_DEST_NAME = 2, + SCM_J1939_PRIO = 3, + SCM_J1939_ERRQUEUE = 4, +}; + +enum { + J1939_NLA_PAD, + J1939_NLA_BYTES_ACKED, +}; + +enum { + J1939_EE_INFO_NONE, + J1939_EE_INFO_TX_ABORT, +}; + +struct j1939_filter { + name_t name; + name_t name_mask; + pgn_t pgn; + pgn_t pgn_mask; + __u8 addr; + __u8 addr_mask; +}; + +#define J1939_FILTER_MAX 512 /* maximum number of j1939_filter set via setsockopt() */ + +#endif /* !_UAPI_CAN_J1939_H_ */ diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index faaa5ca2a117..be328c59389d 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -16,6 +16,18 @@ #define XDP_SHARED_UMEM (1 << 0) #define XDP_COPY (1 << 1) /* Force copy-mode */ #define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */ +/* If this option is set, the driver might go sleep and in that case + * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be + * set. If it is set, the application need to explicitly wake up the + * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are + * running the driver and the application on the same core, you should + * use this option so that the kernel will yield to the user space + * application. + */ +#define XDP_USE_NEED_WAKEUP (1 << 3) + +/* Flags for xsk_umem_config flags */ +#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) struct sockaddr_xdp { __u16 sxdp_family; @@ -25,10 +37,14 @@ struct sockaddr_xdp { __u32 sxdp_shared_umem_fd; }; +/* XDP_RING flags */ +#define XDP_RING_NEED_WAKEUP (1 << 0) + struct xdp_ring_offset { __u64 producer; __u64 consumer; __u64 desc; + __u64 flags; }; struct xdp_mmap_offsets { @@ -53,6 +69,7 @@ struct xdp_umem_reg { __u64 len; /* Length of packet data area */ __u32 chunk_size; __u32 headroom; + __u32 flags; }; struct xdp_statistics { @@ -74,6 +91,11 @@ struct xdp_options { #define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL #define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL +/* Masks for unaligned chunks mode */ +#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48 +#define XSK_UNALIGNED_BUF_ADDR_MASK \ + ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) + /* Rx/Tx descriptor */ struct xdp_desc { __u64 addr; diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index f271f1ec50ae..1887a451c388 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -123,6 +123,9 @@ struct ovs_vport_stats { /* Allow datapath to associate multiple Netlink PIDs to each vport */ #define OVS_DP_F_VPORT_PIDS (1 << 1) +/* Allow tc offload recirc sharing */ +#define OVS_DP_F_TC_RECIRC_SHARING (1 << 2) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index b057aeeb6338..a6aa466fac9e 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -160,6 +160,8 @@ enum { TCA_POLICE_RESULT, TCA_POLICE_TM, TCA_POLICE_PAD, + TCA_POLICE_RATE64, + TCA_POLICE_PEAKRATE64, __TCA_POLICE_MAX #define TCA_POLICE_RESULT TCA_POLICE_RESULT }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 5fcc7a17eb5a..adb3adcebe3c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -195,8 +195,8 @@ i < btf_type_vlen(struct_type); \ i++, member++) -static DEFINE_IDR(btf_idr); -static DEFINE_SPINLOCK(btf_idr_lock); +DEFINE_IDR(btf_idr); +DEFINE_SPINLOCK(btf_idr_lock); struct btf { void *data; @@ -3376,6 +3376,15 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, btf_type_ops(t)->seq_show(btf, t, type_id, obj, 0, m); } +#ifdef CONFIG_PROC_FS +static void bpf_btf_show_fdinfo(struct seq_file *m, struct file *filp) +{ + const struct btf *btf = filp->private_data; + + seq_printf(m, "btf_id:\t%u\n", btf->id); +} +#endif + static int btf_release(struct inode *inode, struct file *filp) { btf_put(filp->private_data); @@ -3383,6 +3392,9 @@ static int btf_release(struct inode *inode, struct file *filp) } const struct file_operations btf_fops = { +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_btf_show_fdinfo, +#endif .release = btf_release, }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 272071e9112f..82eabd4e38ad 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -683,8 +683,8 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) } /* map_idr_lock should have been held */ -static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, - bool uref) +static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, + bool uref) { int refold; @@ -704,6 +704,16 @@ static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, return map; } +struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, bool uref) +{ + spin_lock_bh(&map_idr_lock); + map = __bpf_map_inc_not_zero(map, uref); + spin_unlock_bh(&map_idr_lock); + + return map; +} +EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); + int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) { return -ENOTSUPP; @@ -1619,6 +1629,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT | + BPF_F_TEST_STATE_FREQ | BPF_F_TEST_RND_HI32)) return -EINVAL; @@ -2183,7 +2194,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) spin_lock_bh(&map_idr_lock); map = idr_find(&map_idr, id); if (map) - map = bpf_map_inc_not_zero(map, true); + map = __bpf_map_inc_not_zero(map, true); else map = ERR_PTR(-ENOENT); spin_unlock_bh(&map_idr_lock); @@ -2880,6 +2891,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz err = bpf_obj_get_next_id(&attr, uattr, &map_idr, &map_idr_lock); break; + case BPF_BTF_GET_NEXT_ID: + err = bpf_obj_get_next_id(&attr, uattr, + &btf_idr, &btf_idr_lock); + break; case BPF_PROG_GET_FD_BY_ID: err = bpf_prog_get_fd_by_id(&attr); break; diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c index 4659349fc795..7ae5dddd1fe6 100644 --- a/kernel/bpf/sysfs_btf.c +++ b/kernel/bpf/sysfs_btf.c @@ -30,17 +30,12 @@ static struct kobject *btf_kobj; static int __init btf_vmlinux_init(void) { - int err; - if (!_binary__btf_vmlinux_bin_start) return 0; btf_kobj = kobject_create_and_add("btf", kernel_kobj); - if (IS_ERR(btf_kobj)) { - err = PTR_ERR(btf_kobj); - btf_kobj = NULL; - return err; - } + if (!btf_kobj) + return -ENOMEM; bin_attr_btf_vmlinux.size = _binary__btf_vmlinux_bin_end - _binary__btf_vmlinux_bin_start; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 16d66bd7af09..3fb50757e812 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7223,7 +7223,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state_list *sl, **pprev; struct bpf_verifier_state *cur = env->cur_state, *new; int i, j, err, states_cnt = 0; - bool add_new_state = false; + bool add_new_state = env->test_state_freq ? true : false; cur->last_insn_idx = env->prev_insn_idx; if (!env->insn_aux_data[insn_idx].prune_point) @@ -9263,6 +9263,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, env->allow_ptr_leaks = is_priv; + if (is_priv) + env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; + ret = replace_map_fd_with_map_ptr(env); if (ret < 0) goto skip_full_check; diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 9bb96ace9fa1..942c662e2eed 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -13,8 +13,71 @@ struct xsk_map { struct bpf_map map; struct xdp_sock **xsk_map; struct list_head __percpu *flush_list; + spinlock_t lock; /* Synchronize map updates */ }; +int xsk_map_inc(struct xsk_map *map) +{ + struct bpf_map *m = &map->map; + + m = bpf_map_inc(m, false); + return PTR_ERR_OR_ZERO(m); +} + +void xsk_map_put(struct xsk_map *map) +{ + bpf_map_put(&map->map); +} + +static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, + struct xdp_sock **map_entry) +{ + struct xsk_map_node *node; + int err; + + node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN); + if (!node) + return NULL; + + err = xsk_map_inc(map); + if (err) { + kfree(node); + return ERR_PTR(err); + } + + node->map = map; + node->map_entry = map_entry; + return node; +} + +static void xsk_map_node_free(struct xsk_map_node *node) +{ + xsk_map_put(node->map); + kfree(node); +} + +static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node) +{ + spin_lock_bh(&xs->map_list_lock); + list_add_tail(&node->node, &xs->map_list); + spin_unlock_bh(&xs->map_list_lock); +} + +static void xsk_map_sock_delete(struct xdp_sock *xs, + struct xdp_sock **map_entry) +{ + struct xsk_map_node *n, *tmp; + + spin_lock_bh(&xs->map_list_lock); + list_for_each_entry_safe(n, tmp, &xs->map_list, node) { + if (map_entry == n->map_entry) { + list_del(&n->node); + xsk_map_node_free(n); + } + } + spin_unlock_bh(&xs->map_list_lock); +} + static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) { struct xsk_map *m; @@ -34,6 +97,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&m->map, attr); + spin_lock_init(&m->lock); cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *); cost += sizeof(struct list_head) * num_possible_cpus(); @@ -71,21 +135,9 @@ free_m: static void xsk_map_free(struct bpf_map *map) { struct xsk_map *m = container_of(map, struct xsk_map, map); - int i; bpf_clear_redirect_map(map); synchronize_net(); - - for (i = 0; i < map->max_entries; i++) { - struct xdp_sock *xs; - - xs = m->xsk_map[i]; - if (!xs) - continue; - - sock_put((struct sock *)xs); - } - free_percpu(m->flush_list); bpf_map_area_free(m->xsk_map); kfree(m); @@ -164,8 +216,9 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct xsk_map *m = container_of(map, struct xsk_map, map); + struct xdp_sock *xs, *old_xs, **map_entry; u32 i = *(u32 *)key, fd = *(u32 *)value; - struct xdp_sock *xs, *old_xs; + struct xsk_map_node *node; struct socket *sock; int err; @@ -173,8 +226,6 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, return -EINVAL; if (unlikely(i >= m->map.max_entries)) return -E2BIG; - if (unlikely(map_flags == BPF_NOEXIST)) - return -EEXIST; sock = sockfd_lookup(fd, &err); if (!sock) @@ -192,32 +243,70 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, return -EOPNOTSUPP; } - sock_hold(sock->sk); + map_entry = &m->xsk_map[i]; + node = xsk_map_node_alloc(m, map_entry); + if (IS_ERR(node)) { + sockfd_put(sock); + return PTR_ERR(node); + } - old_xs = xchg(&m->xsk_map[i], xs); + spin_lock_bh(&m->lock); + old_xs = READ_ONCE(*map_entry); + if (old_xs == xs) { + err = 0; + goto out; + } else if (old_xs && map_flags == BPF_NOEXIST) { + err = -EEXIST; + goto out; + } else if (!old_xs && map_flags == BPF_EXIST) { + err = -ENOENT; + goto out; + } + xsk_map_sock_add(xs, node); + WRITE_ONCE(*map_entry, xs); if (old_xs) - sock_put((struct sock *)old_xs); - + xsk_map_sock_delete(old_xs, map_entry); + spin_unlock_bh(&m->lock); sockfd_put(sock); return 0; + +out: + spin_unlock_bh(&m->lock); + sockfd_put(sock); + xsk_map_node_free(node); + return err; } static int xsk_map_delete_elem(struct bpf_map *map, void *key) { struct xsk_map *m = container_of(map, struct xsk_map, map); - struct xdp_sock *old_xs; + struct xdp_sock *old_xs, **map_entry; int k = *(u32 *)key; if (k >= map->max_entries) return -EINVAL; - old_xs = xchg(&m->xsk_map[k], NULL); + spin_lock_bh(&m->lock); + map_entry = &m->xsk_map[k]; + old_xs = xchg(map_entry, NULL); if (old_xs) - sock_put((struct sock *)old_xs); + xsk_map_sock_delete(old_xs, map_entry); + spin_unlock_bh(&m->lock); return 0; } +void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, + struct xdp_sock **map_entry) +{ + spin_lock_bh(&map->lock); + if (READ_ONCE(*map_entry) == xs) { + WRITE_ONCE(*map_entry, NULL); + xsk_map_sock_delete(xs, map_entry); + } + spin_unlock_bh(&map->lock); +} + const struct bpf_map_ops xsk_map_ops = { .map_alloc = xsk_map_alloc, .map_free = xsk_map_free, diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 98da8998c25c..b09d7b1ffffd 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -520,7 +520,8 @@ config BPF_EVENTS bool default y help - This allows the user to attach BPF programs to kprobe events. + This allows the user to attach BPF programs to kprobe, uprobe, and + tracepoint events. config DYNAMIC_EVENTS def_bool n diff --git a/lib/test_bpf.c b/lib/test_bpf.c index c41705835cba..5ef3eccee27c 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -867,7 +867,7 @@ static struct bpf_test tests[] = { }, CLASSIC, { }, - { { 4, 10 ^ 300 }, { 20, 10 ^ 300 } }, + { { 4, 0xA ^ 300 }, { 20, 0xA ^ 300 } }, }, { "SPILL_FILL", diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index 4bb418313720..3286f9d527d3 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -180,8 +180,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc) static void in_cache_put(in_cache_entry *entry) { if (refcount_dec_and_test(&entry->use)) { - memset(entry, 0, sizeof(in_cache_entry)); - kfree(entry); + kzfree(entry); } } @@ -416,8 +415,7 @@ static eg_cache_entry *eg_cache_get_by_src_ip(__be32 ipaddr, static void eg_cache_put(eg_cache_entry *entry) { if (refcount_dec_and_test(&entry->use)) { - memset(entry, 0, sizeof(eg_cache_entry)); - kfree(entry); + kzfree(entry); } } diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index bd3da9af5ef6..45d8e1d5d033 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -216,9 +216,7 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb) pvcc->chan.mtu += LLC_LEN; break; } - pr_debug("Couldn't autodetect yet (skb: %02X %02X %02X %02X %02X %02X)\n", - skb->data[0], skb->data[1], skb->data[2], - skb->data[3], skb->data[4], skb->data[5]); + pr_debug("Couldn't autodetect yet (skb: %6ph)\n", skb->data); goto error; case e_vc: break; diff --git a/net/can/Kconfig b/net/can/Kconfig index d4319aa3e1b1..d77042752457 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -53,6 +53,8 @@ config CAN_GW They can be modified with AND/OR/XOR/SET operations as configured by the netlink configuration interface known e.g. from iptables. +source "net/can/j1939/Kconfig" + source "drivers/net/can/Kconfig" endif diff --git a/net/can/Makefile b/net/can/Makefile index 1242bbbfe57f..08bd217fc051 100644 --- a/net/can/Makefile +++ b/net/can/Makefile @@ -15,3 +15,5 @@ can-bcm-y := bcm.o obj-$(CONFIG_CAN_GW) += can-gw.o can-gw-y := gw.o + +obj-$(CONFIG_CAN_J1939) += j1939/ diff --git a/net/can/af_can.c b/net/can/af_can.c index 9a9a51847c7c..5518a7d9eed9 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -58,6 +58,7 @@ #include <linux/can.h> #include <linux/can/core.h> #include <linux/can/skb.h> +#include <linux/can/can-ml.h> #include <linux/ratelimit.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -198,7 +199,7 @@ int can_send(struct sk_buff *skb, int loop) { struct sk_buff *newskb = NULL; struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - struct s_stats *can_stats = dev_net(skb->dev)->can.can_stats; + struct can_pkg_stats *pkg_stats = dev_net(skb->dev)->can.pkg_stats; int err = -EINVAL; if (skb->len == CAN_MTU) { @@ -285,8 +286,8 @@ int can_send(struct sk_buff *skb, int loop) netif_rx_ni(newskb); /* update statistics */ - can_stats->tx_frames++; - can_stats->tx_frames_delta++; + pkg_stats->tx_frames++; + pkg_stats->tx_frames_delta++; return 0; @@ -298,13 +299,15 @@ EXPORT_SYMBOL(can_send); /* af_can rx path */ -static struct can_dev_rcv_lists *find_dev_rcv_lists(struct net *net, - struct net_device *dev) +static struct can_dev_rcv_lists *can_dev_rcv_lists_find(struct net *net, + struct net_device *dev) { - if (!dev) - return net->can.can_rx_alldev_list; - else - return (struct can_dev_rcv_lists *)dev->ml_priv; + if (dev) { + struct can_ml_priv *ml_priv = dev->ml_priv; + return &ml_priv->dev_rcv_lists; + } else { + return net->can.rx_alldev_list; + } } /** @@ -331,7 +334,7 @@ static unsigned int effhash(canid_t can_id) } /** - * find_rcv_list - determine optimal filterlist inside device filter struct + * can_rcv_list_find - determine optimal filterlist inside device filter struct * @can_id: pointer to CAN identifier of a given can_filter * @mask: pointer to CAN mask of a given can_filter * @d: pointer to the device filter struct @@ -357,8 +360,8 @@ static unsigned int effhash(canid_t can_id) * Constistency checked mask. * Reduced can_id to have a preprocessed filter compare value. */ -static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, - struct can_dev_rcv_lists *d) +static struct hlist_head *can_rcv_list_find(canid_t *can_id, canid_t *mask, + struct can_dev_rcv_lists *dev_rcv_lists) { canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */ @@ -366,7 +369,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, if (*mask & CAN_ERR_FLAG) { /* clear CAN_ERR_FLAG in filter entry */ *mask &= CAN_ERR_MASK; - return &d->rx[RX_ERR]; + return &dev_rcv_lists->rx[RX_ERR]; } /* with cleared CAN_ERR_FLAG we have a simple mask/value filterpair */ @@ -382,26 +385,26 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, /* inverse can_id/can_mask filter */ if (inv) - return &d->rx[RX_INV]; + return &dev_rcv_lists->rx[RX_INV]; /* mask == 0 => no condition testing at receive time */ if (!(*mask)) - return &d->rx[RX_ALL]; + return &dev_rcv_lists->rx[RX_ALL]; /* extra filterlists for the subscription of a single non-RTR can_id */ if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS) && !(*can_id & CAN_RTR_FLAG)) { if (*can_id & CAN_EFF_FLAG) { if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) - return &d->rx_eff[effhash(*can_id)]; + return &dev_rcv_lists->rx_eff[effhash(*can_id)]; } else { if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS)) - return &d->rx_sff[*can_id]; + return &dev_rcv_lists->rx_sff[*can_id]; } } /* default: filter via can_id/can_mask */ - return &d->rx[RX_FIL]; + return &dev_rcv_lists->rx[RX_FIL]; } /** @@ -438,10 +441,10 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data, char *ident, struct sock *sk) { - struct receiver *r; - struct hlist_head *rl; - struct can_dev_rcv_lists *d; - struct s_pstats *can_pstats = net->can.can_pstats; + struct receiver *rcv; + struct hlist_head *rcv_list; + struct can_dev_rcv_lists *dev_rcv_lists; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; int err = 0; /* insert new receiver (dev,canid,mask) -> (func,data) */ @@ -452,36 +455,30 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, if (dev && !net_eq(net, dev_net(dev))) return -ENODEV; - r = kmem_cache_alloc(rcv_cache, GFP_KERNEL); - if (!r) + rcv = kmem_cache_alloc(rcv_cache, GFP_KERNEL); + if (!rcv) return -ENOMEM; - spin_lock(&net->can.can_rcvlists_lock); + spin_lock_bh(&net->can.rcvlists_lock); - d = find_dev_rcv_lists(net, dev); - if (d) { - rl = find_rcv_list(&can_id, &mask, d); + dev_rcv_lists = can_dev_rcv_lists_find(net, dev); + rcv_list = can_rcv_list_find(&can_id, &mask, dev_rcv_lists); - r->can_id = can_id; - r->mask = mask; - r->matches = 0; - r->func = func; - r->data = data; - r->ident = ident; - r->sk = sk; + rcv->can_id = can_id; + rcv->mask = mask; + rcv->matches = 0; + rcv->func = func; + rcv->data = data; + rcv->ident = ident; + rcv->sk = sk; - hlist_add_head_rcu(&r->list, rl); - d->entries++; - - can_pstats->rcv_entries++; - if (can_pstats->rcv_entries_max < can_pstats->rcv_entries) - can_pstats->rcv_entries_max = can_pstats->rcv_entries; - } else { - kmem_cache_free(rcv_cache, r); - err = -ENODEV; - } + hlist_add_head_rcu(&rcv->list, rcv_list); + dev_rcv_lists->entries++; - spin_unlock(&net->can.can_rcvlists_lock); + rcv_lists_stats->rcv_entries++; + rcv_lists_stats->rcv_entries_max = max(rcv_lists_stats->rcv_entries_max, + rcv_lists_stats->rcv_entries); + spin_unlock_bh(&net->can.rcvlists_lock); return err; } @@ -490,10 +487,10 @@ EXPORT_SYMBOL(can_rx_register); /* can_rx_delete_receiver - rcu callback for single receiver entry removal */ static void can_rx_delete_receiver(struct rcu_head *rp) { - struct receiver *r = container_of(rp, struct receiver, rcu); - struct sock *sk = r->sk; + struct receiver *rcv = container_of(rp, struct receiver, rcu); + struct sock *sk = rcv->sk; - kmem_cache_free(rcv_cache, r); + kmem_cache_free(rcv_cache, rcv); if (sk) sock_put(sk); } @@ -513,10 +510,10 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data) { - struct receiver *r = NULL; - struct hlist_head *rl; - struct s_pstats *can_pstats = net->can.can_pstats; - struct can_dev_rcv_lists *d; + struct receiver *rcv = NULL; + struct hlist_head *rcv_list; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; + struct can_dev_rcv_lists *dev_rcv_lists; if (dev && dev->type != ARPHRD_CAN) return; @@ -524,83 +521,69 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id, if (dev && !net_eq(net, dev_net(dev))) return; - spin_lock(&net->can.can_rcvlists_lock); - - d = find_dev_rcv_lists(net, dev); - if (!d) { - pr_err("BUG: receive list not found for dev %s, id %03X, mask %03X\n", - DNAME(dev), can_id, mask); - goto out; - } + spin_lock_bh(&net->can.rcvlists_lock); - rl = find_rcv_list(&can_id, &mask, d); + dev_rcv_lists = can_dev_rcv_lists_find(net, dev); + rcv_list = can_rcv_list_find(&can_id, &mask, dev_rcv_lists); /* Search the receiver list for the item to delete. This should * exist, since no receiver may be unregistered that hasn't * been registered before. */ - - hlist_for_each_entry_rcu(r, rl, list) { - if (r->can_id == can_id && r->mask == mask && - r->func == func && r->data == data) + hlist_for_each_entry_rcu(rcv, rcv_list, list) { + if (rcv->can_id == can_id && rcv->mask == mask && + rcv->func == func && rcv->data == data) break; } /* Check for bugs in CAN protocol implementations using af_can.c: - * 'r' will be NULL if no matching list item was found for removal. + * 'rcv' will be NULL if no matching list item was found for removal. */ - - if (!r) { + if (!rcv) { WARN(1, "BUG: receive list entry not found for dev %s, id %03X, mask %03X\n", DNAME(dev), can_id, mask); goto out; } - hlist_del_rcu(&r->list); - d->entries--; + hlist_del_rcu(&rcv->list); + dev_rcv_lists->entries--; - if (can_pstats->rcv_entries > 0) - can_pstats->rcv_entries--; - - /* remove device structure requested by NETDEV_UNREGISTER */ - if (d->remove_on_zero_entries && !d->entries) { - kfree(d); - dev->ml_priv = NULL; - } + if (rcv_lists_stats->rcv_entries > 0) + rcv_lists_stats->rcv_entries--; out: - spin_unlock(&net->can.can_rcvlists_lock); + spin_unlock_bh(&net->can.rcvlists_lock); /* schedule the receiver item for deletion */ - if (r) { - if (r->sk) - sock_hold(r->sk); - call_rcu(&r->rcu, can_rx_delete_receiver); + if (rcv) { + if (rcv->sk) + sock_hold(rcv->sk); + call_rcu(&rcv->rcu, can_rx_delete_receiver); } } EXPORT_SYMBOL(can_rx_unregister); -static inline void deliver(struct sk_buff *skb, struct receiver *r) +static inline void deliver(struct sk_buff *skb, struct receiver *rcv) { - r->func(skb, r->data); - r->matches++; + rcv->func(skb, rcv->data); + rcv->matches++; } -static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) +static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb) { - struct receiver *r; + struct receiver *rcv; int matches = 0; struct can_frame *cf = (struct can_frame *)skb->data; canid_t can_id = cf->can_id; - if (d->entries == 0) + if (dev_rcv_lists->entries == 0) return 0; if (can_id & CAN_ERR_FLAG) { /* check for error message frame entries only */ - hlist_for_each_entry_rcu(r, &d->rx[RX_ERR], list) { - if (can_id & r->mask) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_ERR], list) { + if (can_id & rcv->mask) { + deliver(skb, rcv); matches++; } } @@ -608,23 +591,23 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) } /* check for unfiltered entries */ - hlist_for_each_entry_rcu(r, &d->rx[RX_ALL], list) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_ALL], list) { + deliver(skb, rcv); matches++; } /* check for can_id/mask entries */ - hlist_for_each_entry_rcu(r, &d->rx[RX_FIL], list) { - if ((can_id & r->mask) == r->can_id) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_FIL], list) { + if ((can_id & rcv->mask) == rcv->can_id) { + deliver(skb, rcv); matches++; } } /* check for inverted can_id/mask entries */ - hlist_for_each_entry_rcu(r, &d->rx[RX_INV], list) { - if ((can_id & r->mask) != r->can_id) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_INV], list) { + if ((can_id & rcv->mask) != rcv->can_id) { + deliver(skb, rcv); matches++; } } @@ -634,16 +617,16 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) return matches; if (can_id & CAN_EFF_FLAG) { - hlist_for_each_entry_rcu(r, &d->rx_eff[effhash(can_id)], list) { - if (r->can_id == can_id) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx_eff[effhash(can_id)], list) { + if (rcv->can_id == can_id) { + deliver(skb, rcv); matches++; } } } else { can_id &= CAN_SFF_MASK; - hlist_for_each_entry_rcu(r, &d->rx_sff[can_id], list) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx_sff[can_id], list) { + deliver(skb, rcv); matches++; } } @@ -653,14 +636,14 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) static void can_receive(struct sk_buff *skb, struct net_device *dev) { - struct can_dev_rcv_lists *d; + struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = dev_net(dev); - struct s_stats *can_stats = net->can.can_stats; + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; int matches; /* update statistics */ - can_stats->rx_frames++; - can_stats->rx_frames_delta++; + pkg_stats->rx_frames++; + pkg_stats->rx_frames_delta++; /* create non-zero unique skb identifier together with *skb */ while (!(can_skb_prv(skb)->skbcnt)) @@ -669,12 +652,11 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ - matches = can_rcv_filter(net->can.can_rx_alldev_list, skb); + matches = can_rcv_filter(net->can.rx_alldev_list, skb); /* find receive list for this device */ - d = find_dev_rcv_lists(net, dev); - if (d) - matches += can_rcv_filter(d, skb); + dev_rcv_lists = can_dev_rcv_lists_find(net, dev); + matches += can_rcv_filter(dev_rcv_lists, skb); rcu_read_unlock(); @@ -682,8 +664,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) consume_skb(skb); if (matches > 0) { - can_stats->matches++; - can_stats->matches_delta++; + pkg_stats->matches++; + pkg_stats->matches_delta++; } } @@ -789,41 +771,14 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct can_dev_rcv_lists *d; if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; switch (msg) { case NETDEV_REGISTER: - - /* create new dev_rcv_lists for this device */ - d = kzalloc(sizeof(*d), GFP_KERNEL); - if (!d) - return NOTIFY_DONE; - BUG_ON(dev->ml_priv); - dev->ml_priv = d; - - break; - - case NETDEV_UNREGISTER: - spin_lock(&dev_net(dev)->can.can_rcvlists_lock); - - d = dev->ml_priv; - if (d) { - if (d->entries) { - d->remove_on_zero_entries = 1; - } else { - kfree(d); - dev->ml_priv = NULL; - } - } else { - pr_err("can: notifier: receive list not found for dev %s\n", - dev->name); - } - - spin_unlock(&dev_net(dev)->can.can_rcvlists_lock); - + WARN(!dev->ml_priv, + "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n"); break; } @@ -832,66 +787,51 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, static int can_pernet_init(struct net *net) { - spin_lock_init(&net->can.can_rcvlists_lock); - net->can.can_rx_alldev_list = - kzalloc(sizeof(*net->can.can_rx_alldev_list), GFP_KERNEL); - if (!net->can.can_rx_alldev_list) + spin_lock_init(&net->can.rcvlists_lock); + net->can.rx_alldev_list = + kzalloc(sizeof(*net->can.rx_alldev_list), GFP_KERNEL); + if (!net->can.rx_alldev_list) goto out; - net->can.can_stats = kzalloc(sizeof(*net->can.can_stats), GFP_KERNEL); - if (!net->can.can_stats) - goto out_free_alldev_list; - net->can.can_pstats = kzalloc(sizeof(*net->can.can_pstats), GFP_KERNEL); - if (!net->can.can_pstats) - goto out_free_can_stats; + net->can.pkg_stats = kzalloc(sizeof(*net->can.pkg_stats), GFP_KERNEL); + if (!net->can.pkg_stats) + goto out_free_rx_alldev_list; + net->can.rcv_lists_stats = kzalloc(sizeof(*net->can.rcv_lists_stats), GFP_KERNEL); + if (!net->can.rcv_lists_stats) + goto out_free_pkg_stats; if (IS_ENABLED(CONFIG_PROC_FS)) { /* the statistics are updated every second (timer triggered) */ if (stats_timer) { - timer_setup(&net->can.can_stattimer, can_stat_update, + timer_setup(&net->can.stattimer, can_stat_update, 0); - mod_timer(&net->can.can_stattimer, + mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ)); } - net->can.can_stats->jiffies_init = jiffies; + net->can.pkg_stats->jiffies_init = jiffies; can_init_proc(net); } return 0; - out_free_can_stats: - kfree(net->can.can_stats); - out_free_alldev_list: - kfree(net->can.can_rx_alldev_list); + out_free_pkg_stats: + kfree(net->can.pkg_stats); + out_free_rx_alldev_list: + kfree(net->can.rx_alldev_list); out: return -ENOMEM; } static void can_pernet_exit(struct net *net) { - struct net_device *dev; - if (IS_ENABLED(CONFIG_PROC_FS)) { can_remove_proc(net); if (stats_timer) - del_timer_sync(&net->can.can_stattimer); + del_timer_sync(&net->can.stattimer); } - /* remove created dev_rcv_lists from still registered CAN devices */ - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv) { - struct can_dev_rcv_lists *d = dev->ml_priv; - - BUG_ON(d->entries); - kfree(d); - dev->ml_priv = NULL; - } - } - rcu_read_unlock(); - - kfree(net->can.can_rx_alldev_list); - kfree(net->can.can_stats); - kfree(net->can.can_pstats); + kfree(net->can.rx_alldev_list); + kfree(net->can.pkg_stats); + kfree(net->can.rcv_lists_stats); } /* af_can module init/exit functions */ diff --git a/net/can/af_can.h b/net/can/af_can.h index 9cdb79083623..7c2d9161e224 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -60,25 +60,10 @@ struct receiver { struct rcu_head rcu; }; -#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS) -#define CAN_EFF_RCV_HASH_BITS 10 -#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS) - -enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX }; - -/* per device receive filters linked at dev->ml_priv */ -struct can_dev_rcv_lists { - struct hlist_head rx[RX_MAX]; - struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ]; - struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ]; - int remove_on_zero_entries; - int entries; -}; - /* statistic structures */ /* can be reset e.g. by can_init_stats() */ -struct s_stats { +struct can_pkg_stats { unsigned long jiffies_init; unsigned long rx_frames; @@ -103,7 +88,7 @@ struct s_stats { }; /* persistent statistics */ -struct s_pstats { +struct can_rcv_lists_stats { unsigned long stats_reset; unsigned long user_reset; unsigned long rcv_entries; diff --git a/net/can/bcm.c b/net/can/bcm.c index 28fd1a1c8487..c96fa0f33db3 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1294,7 +1294,7 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) /* no bound device as default => check msg_name */ DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); - if (msg->msg_namelen < sizeof(*addr)) + if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex)) return -EINVAL; if (addr->can_family != AF_CAN) @@ -1536,7 +1536,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, struct net *net = sock_net(sk); int ret = 0; - if (len < sizeof(*addr)) + if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex)) return -EINVAL; lock_sock(sk); diff --git a/net/can/j1939/Kconfig b/net/can/j1939/Kconfig new file mode 100644 index 000000000000..2998298b71ec --- /dev/null +++ b/net/can/j1939/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# SAE J1939 network layer core configuration +# + +config CAN_J1939 + tristate "SAE J1939" + depends on CAN + help + SAE J1939 + Say Y to have in-kernel support for j1939 socket type. This + allows communication according to SAE j1939. + The relevant parts in kernel are + SAE j1939-21 (datalink & transport protocol) + & SAE j1939-81 (network management). diff --git a/net/can/j1939/Makefile b/net/can/j1939/Makefile new file mode 100644 index 000000000000..19181bdae173 --- /dev/null +++ b/net/can/j1939/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_CAN_J1939) += can-j1939.o + +can-j1939-objs := \ + address-claim.o \ + bus.o \ + main.o \ + socket.o \ + transport.o diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c new file mode 100644 index 000000000000..f33c47327927 --- /dev/null +++ b/net/can/j1939/address-claim.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <[email protected]> +// Copyright (c) 2010-2011 EIA Electronics, +// Pieter Beyens <[email protected]> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <[email protected]> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <[email protected]> + +/* J1939 Address Claiming. + * Address Claiming in the kernel + * - keeps track of the AC states of ECU's, + * - resolves NAME<=>SA taking into account the AC states of ECU's. + * + * All Address Claim msgs (including host-originated msg) are processed + * at the receive path (a sent msg is always received again via CAN echo). + * As such, the processing of AC msgs is done in the order on which msgs + * are sent on the bus. + * + * This module doesn't send msgs itself (e.g. replies on Address Claims), + * this is the responsibility of a user space application or daemon. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/netdevice.h> +#include <linux/skbuff.h> + +#include "j1939-priv.h" + +static inline name_t j1939_skb_to_name(const struct sk_buff *skb) +{ + return le64_to_cpup((__le64 *)skb->data); +} + +static inline bool j1939_ac_msg_is_request(struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + int req_pgn; + + if (skb->len < 3 || skcb->addr.pgn != J1939_PGN_REQUEST) + return false; + + req_pgn = skb->data[0] | (skb->data[1] << 8) | (skb->data[2] << 16); + + return req_pgn == J1939_PGN_ADDRESS_CLAIMED; +} + +static int j1939_ac_verify_outgoing(struct j1939_priv *priv, + struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + + if (skb->len != 8) { + netdev_notice(priv->ndev, "tx address claim with dlc %i\n", + skb->len); + return -EPROTO; + } + + if (skcb->addr.src_name != j1939_skb_to_name(skb)) { + netdev_notice(priv->ndev, "tx address claim with different name\n"); + return -EPROTO; + } + + if (skcb->addr.sa == J1939_NO_ADDR) { + netdev_notice(priv->ndev, "tx address claim with broadcast sa\n"); + return -EPROTO; + } + + /* ac must always be a broadcast */ + if (skcb->addr.dst_name || skcb->addr.da != J1939_NO_ADDR) { + netdev_notice(priv->ndev, "tx address claim with dest, not broadcast\n"); + return -EPROTO; + } + return 0; +} + +int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + int ret; + u8 addr; + + /* network mgmt: address claiming msgs */ + if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) { + struct j1939_ecu *ecu; + + ret = j1939_ac_verify_outgoing(priv, skb); + /* return both when failure & when successful */ + if (ret < 0) + return ret; + ecu = j1939_ecu_get_by_name(priv, skcb->addr.src_name); + if (!ecu) + return -ENODEV; + + if (ecu->addr != skcb->addr.sa) + /* hold further traffic for ecu, remove from parent */ + j1939_ecu_unmap(ecu); + j1939_ecu_put(ecu); + } else if (skcb->addr.src_name) { + /* assign source address */ + addr = j1939_name_to_addr(priv, skcb->addr.src_name); + if (!j1939_address_is_unicast(addr) && + !j1939_ac_msg_is_request(skb)) { + netdev_notice(priv->ndev, "tx drop: invalid sa for name 0x%016llx\n", + skcb->addr.src_name); + return -EADDRNOTAVAIL; + } + skcb->addr.sa = addr; + } + + /* assign destination address */ + if (skcb->addr.dst_name) { + addr = j1939_name_to_addr(priv, skcb->addr.dst_name); + if (!j1939_address_is_unicast(addr)) { + netdev_notice(priv->ndev, "tx drop: invalid da for name 0x%016llx\n", + skcb->addr.dst_name); + return -EADDRNOTAVAIL; + } + skcb->addr.da = addr; + } + return 0; +} + +static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_ecu *ecu, *prev; + name_t name; + + if (skb->len != 8) { + netdev_notice(priv->ndev, "rx address claim with wrong dlc %i\n", + skb->len); + return; + } + + name = j1939_skb_to_name(skb); + skcb->addr.src_name = name; + if (!name) { + netdev_notice(priv->ndev, "rx address claim without name\n"); + return; + } + + if (!j1939_address_is_valid(skcb->addr.sa)) { + netdev_notice(priv->ndev, "rx address claim with broadcast sa\n"); + return; + } + + write_lock_bh(&priv->lock); + + /* Few words on the ECU ref counting: + * + * First we get an ECU handle, either with + * j1939_ecu_get_by_name_locked() (increments the ref counter) + * or j1939_ecu_create_locked() (initializes an ECU object + * with a ref counter of 1). + * + * j1939_ecu_unmap_locked() will decrement the ref counter, + * but only if the ECU was mapped before. So "ecu" still + * belongs to us. + * + * j1939_ecu_timer_start() will increment the ref counter + * before it starts the timer, so we can put the ecu when + * leaving this function. + */ + ecu = j1939_ecu_get_by_name_locked(priv, name); + if (!ecu && j1939_address_is_unicast(skcb->addr.sa)) + ecu = j1939_ecu_create_locked(priv, name); + + if (IS_ERR_OR_NULL(ecu)) + goto out_unlock_bh; + + /* cancel pending (previous) address claim */ + j1939_ecu_timer_cancel(ecu); + + if (j1939_address_is_idle(skcb->addr.sa)) { + j1939_ecu_unmap_locked(ecu); + goto out_ecu_put; + } + + /* save new addr */ + if (ecu->addr != skcb->addr.sa) + j1939_ecu_unmap_locked(ecu); + ecu->addr = skcb->addr.sa; + + prev = j1939_ecu_get_by_addr_locked(priv, skcb->addr.sa); + if (prev) { + if (ecu->name > prev->name) { + j1939_ecu_unmap_locked(ecu); + j1939_ecu_put(prev); + goto out_ecu_put; + } else { + /* kick prev if less or equal */ + j1939_ecu_unmap_locked(prev); + j1939_ecu_put(prev); + } + } + + j1939_ecu_timer_start(ecu); + out_ecu_put: + j1939_ecu_put(ecu); + out_unlock_bh: + write_unlock_bh(&priv->lock); +} + +void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_ecu *ecu; + + /* network mgmt */ + if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) { + j1939_ac_process(priv, skb); + } else if (j1939_address_is_unicast(skcb->addr.sa)) { + /* assign source name */ + ecu = j1939_ecu_get_by_addr(priv, skcb->addr.sa); + if (ecu) { + skcb->addr.src_name = ecu->name; + j1939_ecu_put(ecu); + } + } + + /* assign destination name */ + ecu = j1939_ecu_get_by_addr(priv, skcb->addr.da); + if (ecu) { + skcb->addr.dst_name = ecu->name; + j1939_ecu_put(ecu); + } +} diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c new file mode 100644 index 000000000000..486687901602 --- /dev/null +++ b/net/can/j1939/bus.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <[email protected]> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <[email protected]> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <[email protected]> + +/* bus for j1939 remote devices + * Since rtnetlink, no real bus is used. + */ + +#include <net/sock.h> + +#include "j1939-priv.h" + +static void __j1939_ecu_release(struct kref *kref) +{ + struct j1939_ecu *ecu = container_of(kref, struct j1939_ecu, kref); + struct j1939_priv *priv = ecu->priv; + + list_del(&ecu->list); + kfree(ecu); + j1939_priv_put(priv); +} + +void j1939_ecu_put(struct j1939_ecu *ecu) +{ + kref_put(&ecu->kref, __j1939_ecu_release); +} + +static void j1939_ecu_get(struct j1939_ecu *ecu) +{ + kref_get(&ecu->kref); +} + +static bool j1939_ecu_is_mapped_locked(struct j1939_ecu *ecu) +{ + struct j1939_priv *priv = ecu->priv; + + lockdep_assert_held(&priv->lock); + + return j1939_ecu_find_by_addr_locked(priv, ecu->addr) == ecu; +} + +/* ECU device interface */ +/* map ECU to a bus address space */ +static void j1939_ecu_map_locked(struct j1939_ecu *ecu) +{ + struct j1939_priv *priv = ecu->priv; + struct j1939_addr_ent *ent; + + lockdep_assert_held(&priv->lock); + + if (!j1939_address_is_unicast(ecu->addr)) + return; + + ent = &priv->ents[ecu->addr]; + + if (ent->ecu) { + netdev_warn(priv->ndev, "Trying to map already mapped ECU, addr: 0x%02x, name: 0x%016llx. Skip it.\n", + ecu->addr, ecu->name); + return; + } + + j1939_ecu_get(ecu); + ent->ecu = ecu; + ent->nusers += ecu->nusers; +} + +/* unmap ECU from a bus address space */ +void j1939_ecu_unmap_locked(struct j1939_ecu *ecu) +{ + struct j1939_priv *priv = ecu->priv; + struct j1939_addr_ent *ent; + + lockdep_assert_held(&priv->lock); + + if (!j1939_address_is_unicast(ecu->addr)) + return; + + if (!j1939_ecu_is_mapped_locked(ecu)) + return; + + ent = &priv->ents[ecu->addr]; + ent->ecu = NULL; + ent->nusers -= ecu->nusers; + j1939_ecu_put(ecu); +} + +void j1939_ecu_unmap(struct j1939_ecu *ecu) +{ + write_lock_bh(&ecu->priv->lock); + j1939_ecu_unmap_locked(ecu); + write_unlock_bh(&ecu->priv->lock); +} + +void j1939_ecu_unmap_all(struct j1939_priv *priv) +{ + int i; + + write_lock_bh(&priv->lock); + for (i = 0; i < ARRAY_SIZE(priv->ents); i++) + if (priv->ents[i].ecu) + j1939_ecu_unmap_locked(priv->ents[i].ecu); + write_unlock_bh(&priv->lock); +} + +void j1939_ecu_timer_start(struct j1939_ecu *ecu) +{ + /* The ECU is held here and released in the + * j1939_ecu_timer_handler() or j1939_ecu_timer_cancel(). + */ + j1939_ecu_get(ecu); + + /* Schedule timer in 250 msec to commit address change. */ + hrtimer_start(&ecu->ac_timer, ms_to_ktime(250), + HRTIMER_MODE_REL_SOFT); +} + +void j1939_ecu_timer_cancel(struct j1939_ecu *ecu) +{ + if (hrtimer_cancel(&ecu->ac_timer)) + j1939_ecu_put(ecu); +} + +static enum hrtimer_restart j1939_ecu_timer_handler(struct hrtimer *hrtimer) +{ + struct j1939_ecu *ecu = + container_of(hrtimer, struct j1939_ecu, ac_timer); + struct j1939_priv *priv = ecu->priv; + + write_lock_bh(&priv->lock); + /* TODO: can we test if ecu->addr is unicast before starting + * the timer? + */ + j1939_ecu_map_locked(ecu); + + /* The corresponding j1939_ecu_get() is in + * j1939_ecu_timer_start(). + */ + j1939_ecu_put(ecu); + write_unlock_bh(&priv->lock); + + return HRTIMER_NORESTART; +} + +struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name) +{ + struct j1939_ecu *ecu; + + lockdep_assert_held(&priv->lock); + + ecu = kzalloc(sizeof(*ecu), gfp_any()); + if (!ecu) + return ERR_PTR(-ENOMEM); + kref_init(&ecu->kref); + ecu->addr = J1939_IDLE_ADDR; + ecu->name = name; + + hrtimer_init(&ecu->ac_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + ecu->ac_timer.function = j1939_ecu_timer_handler; + INIT_LIST_HEAD(&ecu->list); + + j1939_priv_get(priv); + ecu->priv = priv; + list_add_tail(&ecu->list, &priv->ecus); + + return ecu; +} + +struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv, + u8 addr) +{ + lockdep_assert_held(&priv->lock); + + return priv->ents[addr].ecu; +} + +struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv, u8 addr) +{ + struct j1939_ecu *ecu; + + lockdep_assert_held(&priv->lock); + + if (!j1939_address_is_unicast(addr)) + return NULL; + + ecu = j1939_ecu_find_by_addr_locked(priv, addr); + if (ecu) + j1939_ecu_get(ecu); + + return ecu; +} + +struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr) +{ + struct j1939_ecu *ecu; + + read_lock_bh(&priv->lock); + ecu = j1939_ecu_get_by_addr_locked(priv, addr); + read_unlock_bh(&priv->lock); + + return ecu; +} + +/* get pointer to ecu without increasing ref counter */ +static struct j1939_ecu *j1939_ecu_find_by_name_locked(struct j1939_priv *priv, + name_t name) +{ + struct j1939_ecu *ecu; + + lockdep_assert_held(&priv->lock); + + list_for_each_entry(ecu, &priv->ecus, list) { + if (ecu->name == name) + return ecu; + } + + return NULL; +} + +struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv, + name_t name) +{ + struct j1939_ecu *ecu; + + lockdep_assert_held(&priv->lock); + + if (!name) + return NULL; + + ecu = j1939_ecu_find_by_name_locked(priv, name); + if (ecu) + j1939_ecu_get(ecu); + + return ecu; +} + +struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name) +{ + struct j1939_ecu *ecu; + + read_lock_bh(&priv->lock); + ecu = j1939_ecu_get_by_name_locked(priv, name); + read_unlock_bh(&priv->lock); + + return ecu; +} + +u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name) +{ + struct j1939_ecu *ecu; + int addr = J1939_IDLE_ADDR; + + if (!name) + return J1939_NO_ADDR; + + read_lock_bh(&priv->lock); + ecu = j1939_ecu_find_by_name_locked(priv, name); + if (ecu && j1939_ecu_is_mapped_locked(ecu)) + /* ecu's SA is registered */ + addr = ecu->addr; + + read_unlock_bh(&priv->lock); + + return addr; +} + +/* TX addr/name accounting + * Transport protocol needs to know if a SA is local or not + * These functions originate from userspace manipulating sockets, + * so locking is straigforward + */ + +int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa) +{ + struct j1939_ecu *ecu; + int err = 0; + + write_lock_bh(&priv->lock); + + if (j1939_address_is_unicast(sa)) + priv->ents[sa].nusers++; + + if (!name) + goto done; + + ecu = j1939_ecu_get_by_name_locked(priv, name); + if (!ecu) + ecu = j1939_ecu_create_locked(priv, name); + err = PTR_ERR_OR_ZERO(ecu); + if (err) + goto done; + + ecu->nusers++; + /* TODO: do we care if ecu->addr != sa? */ + if (j1939_ecu_is_mapped_locked(ecu)) + /* ecu's sa is active already */ + priv->ents[ecu->addr].nusers++; + + done: + write_unlock_bh(&priv->lock); + + return err; +} + +void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa) +{ + struct j1939_ecu *ecu; + + write_lock_bh(&priv->lock); + + if (j1939_address_is_unicast(sa)) + priv->ents[sa].nusers--; + + if (!name) + goto done; + + ecu = j1939_ecu_find_by_name_locked(priv, name); + if (WARN_ON_ONCE(!ecu)) + goto done; + + ecu->nusers--; + /* TODO: do we care if ecu->addr != sa? */ + if (j1939_ecu_is_mapped_locked(ecu)) + /* ecu's sa is active already */ + priv->ents[ecu->addr].nusers--; + j1939_ecu_put(ecu); + + done: + write_unlock_bh(&priv->lock); +} diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h new file mode 100644 index 000000000000..12369b604ce9 --- /dev/null +++ b/net/can/j1939/j1939-priv.h @@ -0,0 +1,338 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <[email protected]> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <[email protected]> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <[email protected]> + +#ifndef _J1939_PRIV_H_ +#define _J1939_PRIV_H_ + +#include <linux/can/j1939.h> +#include <net/sock.h> + +/* Timeout to receive the abort signal over loop back. In case CAN + * bus is open, the timeout should be triggered. + */ +#define J1939_XTP_ABORT_TIMEOUT_MS 500 +#define J1939_SIMPLE_ECHO_TIMEOUT_MS (10 * 1000) + +struct j1939_session; +enum j1939_sk_errqueue_type { + J1939_ERRQUEUE_ACK, + J1939_ERRQUEUE_SCHED, + J1939_ERRQUEUE_ABORT, +}; + +/* j1939 devices */ +struct j1939_ecu { + struct list_head list; + name_t name; + u8 addr; + + /* indicates that this ecu successfully claimed @sa as its address */ + struct hrtimer ac_timer; + struct kref kref; + struct j1939_priv *priv; + + /* count users, to help transport protocol decide for interaction */ + int nusers; +}; + +struct j1939_priv { + struct list_head ecus; + /* local list entry in priv + * These allow irq (& softirq) context lookups on j1939 devices + * This approach (separate lists) is done as the other 2 alternatives + * are not easier or even wrong + * 1) using the pure kobject methods involves mutexes, which are not + * allowed in irq context. + * 2) duplicating data structures would require a lot of synchronization + * code + * usage: + */ + + /* segments need a lock to protect the above list */ + rwlock_t lock; + + struct net_device *ndev; + + /* list of 256 ecu ptrs, that cache the claimed addresses. + * also protected by the above lock + */ + struct j1939_addr_ent { + struct j1939_ecu *ecu; + /* count users, to help transport protocol */ + int nusers; + } ents[256]; + + struct kref kref; + + /* List of active sessions to prevent start of conflicting + * one. + * + * Do not start two sessions of same type, addresses and + * direction. + */ + struct list_head active_session_list; + + /* protects active_session_list */ + spinlock_t active_session_list_lock; + + unsigned int tp_max_packet_size; + + /* lock for j1939_socks list */ + spinlock_t j1939_socks_lock; + struct list_head j1939_socks; + + struct kref rx_kref; +}; + +void j1939_ecu_put(struct j1939_ecu *ecu); + +/* keep the cache of what is local */ +int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa); +void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa); + +static inline bool j1939_address_is_unicast(u8 addr) +{ + return addr <= J1939_MAX_UNICAST_ADDR; +} + +static inline bool j1939_address_is_idle(u8 addr) +{ + return addr == J1939_IDLE_ADDR; +} + +static inline bool j1939_address_is_valid(u8 addr) +{ + return addr != J1939_NO_ADDR; +} + +static inline bool j1939_pgn_is_pdu1(pgn_t pgn) +{ + /* ignore dp & res bits for this */ + return (pgn & 0xff00) < 0xf000; +} + +/* utility to correctly unmap an ECU */ +void j1939_ecu_unmap_locked(struct j1939_ecu *ecu); +void j1939_ecu_unmap(struct j1939_ecu *ecu); + +u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name); +struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv, + u8 addr); +struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr); +struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv, + u8 addr); +struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name); +struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv, + name_t name); + +enum j1939_transfer_type { + J1939_TP, + J1939_ETP, + J1939_SIMPLE, +}; + +struct j1939_addr { + name_t src_name; + name_t dst_name; + pgn_t pgn; + + u8 sa; + u8 da; + + u8 type; +}; + +/* control buffer of the sk_buff */ +struct j1939_sk_buff_cb { + /* Offset in bytes within one ETP session */ + u32 offset; + + /* for tx, MSG_SYN will be used to sync on sockets */ + u32 msg_flags; + u32 tskey; + + struct j1939_addr addr; + + /* Flags for quick lookups during skb processing. + * These are set in the receive path only. + */ +#define J1939_ECU_LOCAL_SRC BIT(0) +#define J1939_ECU_LOCAL_DST BIT(1) + u8 flags; + + priority_t priority; +}; + +static inline +struct j1939_sk_buff_cb *j1939_skb_to_cb(const struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct j1939_sk_buff_cb) > sizeof(skb->cb)); + + return (struct j1939_sk_buff_cb *)skb->cb; +} + +int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb); +void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb); +bool j1939_sk_recv_match(struct j1939_priv *priv, + struct j1939_sk_buff_cb *skcb); +void j1939_sk_send_loop_abort(struct sock *sk, int err); +void j1939_sk_errqueue(struct j1939_session *session, + enum j1939_sk_errqueue_type type); +void j1939_sk_queue_activate_next(struct j1939_session *session); + +/* stack entries */ +struct j1939_session *j1939_tp_send(struct j1939_priv *priv, + struct sk_buff *skb, size_t size); +int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb); +int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb); +void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb); +void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb); + +/* network management */ +struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name); + +void j1939_ecu_timer_start(struct j1939_ecu *ecu); +void j1939_ecu_timer_cancel(struct j1939_ecu *ecu); +void j1939_ecu_unmap_all(struct j1939_priv *priv); + +struct j1939_priv *j1939_netdev_start(struct net_device *ndev); +void j1939_netdev_stop(struct j1939_priv *priv); + +void j1939_priv_put(struct j1939_priv *priv); +void j1939_priv_get(struct j1939_priv *priv); + +/* notify/alert all j1939 sockets bound to ifindex */ +void j1939_sk_netdev_event_netdown(struct j1939_priv *priv); +int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk); +void j1939_tp_init(struct j1939_priv *priv); + +/* decrement pending skb for a j1939 socket */ +void j1939_sock_pending_del(struct sock *sk); + +enum j1939_session_state { + J1939_SESSION_NEW, + J1939_SESSION_ACTIVE, + /* waiting for abort signal on the bus */ + J1939_SESSION_WAITING_ABORT, + J1939_SESSION_ACTIVE_MAX, + J1939_SESSION_DONE, +}; + +struct j1939_session { + struct j1939_priv *priv; + struct list_head active_session_list_entry; + struct list_head sk_session_queue_entry; + struct kref kref; + struct sock *sk; + + /* ifindex, src, dst, pgn define the session block + * the are _never_ modified after insertion in the list + * this decreases locking problems a _lot_ + */ + struct j1939_sk_buff_cb skcb; + struct sk_buff_head skb_queue; + + /* all tx related stuff (last_txcmd, pkt.tx) + * is protected (modified only) with the txtimer hrtimer + * 'total' & 'block' are never changed, + * last_cmd, last & block are protected by ->lock + * this means that the tx may run after cts is received that should + * have stopped tx, but this time discrepancy is never avoided anyhow + */ + u8 last_cmd, last_txcmd; + bool transmission; + bool extd; + /* Total message size, number of bytes */ + unsigned int total_message_size; + /* Total number of bytes queue from socket to the session */ + unsigned int total_queued_size; + unsigned int tx_retry; + + int err; + u32 tskey; + enum j1939_session_state state; + + /* Packets counters for a (extended) transfer session. The packet is + * maximal of 7 bytes. + */ + struct { + /* total - total number of packets for this session */ + unsigned int total; + /* last - last packet of a transfer block after which + * responder should send ETP.CM_CTS and originator + * ETP.CM_DPO + */ + unsigned int last; + /* tx - number of packets send by originator node. + * this counter can be set back if responder node + * didn't received all packets send by originator. + */ + unsigned int tx; + unsigned int tx_acked; + /* rx - number of packets received */ + unsigned int rx; + /* block - amount of packets expected in one block */ + unsigned int block; + /* dpo - ETP.CM_DPO, Data Packet Offset */ + unsigned int dpo; + } pkt; + struct hrtimer txtimer, rxtimer; +}; + +struct j1939_sock { + struct sock sk; /* must be first to skip with memset */ + struct j1939_priv *priv; + struct list_head list; + +#define J1939_SOCK_BOUND BIT(0) +#define J1939_SOCK_CONNECTED BIT(1) +#define J1939_SOCK_PROMISC BIT(2) +#define J1939_SOCK_ERRQUEUE BIT(3) + int state; + + int ifindex; + struct j1939_addr addr; + struct j1939_filter *filters; + int nfilters; + pgn_t pgn_rx_filter; + + /* j1939 may emit equal PGN (!= equal CAN-id's) out of order + * when transport protocol comes in. + * To allow emitting in order, keep a 'pending' nr. of packets + */ + atomic_t skb_pending; + wait_queue_head_t waitq; + + /* lock for the sk_session_queue list */ + spinlock_t sk_session_queue_lock; + struct list_head sk_session_queue; +}; + +static inline struct j1939_sock *j1939_sk(const struct sock *sk) +{ + return container_of(sk, struct j1939_sock, sk); +} + +void j1939_session_get(struct j1939_session *session); +void j1939_session_put(struct j1939_session *session); +void j1939_session_skb_queue(struct j1939_session *session, + struct sk_buff *skb); +int j1939_session_activate(struct j1939_session *session); +void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec); +void j1939_session_timers_cancel(struct j1939_session *session); + +#define J1939_MAX_TP_PACKET_SIZE (7 * 0xff) +#define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff) + +#define J1939_REGULAR 0 +#define J1939_EXTENDED 1 + +/* CAN protocol */ +extern const struct can_proto j1939_can_proto; + +#endif /* _J1939_PRIV_H_ */ diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c new file mode 100644 index 000000000000..def2f813ffce --- /dev/null +++ b/net/can/j1939/main.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Pieter Beyens <[email protected]> +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <[email protected]> +// Copyright (c) 2018 Protonic, +// Robin van der Gracht <[email protected]> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <[email protected]> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <[email protected]> + +/* Core of can-j1939 that links j1939 to CAN. */ + +#include <linux/can/can-ml.h> +#include <linux/can/core.h> +#include <linux/can/skb.h> +#include <linux/if_arp.h> +#include <linux/module.h> + +#include "j1939-priv.h" + +MODULE_DESCRIPTION("PF_CAN SAE J1939"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("EIA Electronics (Kurt Van Dijck & Pieter Beyens)"); +MODULE_ALIAS("can-proto-" __stringify(CAN_J1939)); + +/* LOWLEVEL CAN interface */ + +/* CAN_HDR: #bytes before can_frame data part */ +#define J1939_CAN_HDR (offsetof(struct can_frame, data)) + +/* CAN_FTR: #bytes beyond data part */ +#define J1939_CAN_FTR (sizeof(struct can_frame) - J1939_CAN_HDR - \ + sizeof(((struct can_frame *)0)->data)) + +/* lowest layer */ +static void j1939_can_recv(struct sk_buff *iskb, void *data) +{ + struct j1939_priv *priv = data; + struct sk_buff *skb; + struct j1939_sk_buff_cb *skcb, *iskcb; + struct can_frame *cf; + + /* create a copy of the skb + * j1939 only delivers the real data bytes, + * the header goes into sockaddr. + * j1939 may not touch the incoming skb in such way + */ + skb = skb_clone(iskb, GFP_ATOMIC); + if (!skb) + return; + + can_skb_set_owner(skb, iskb->sk); + + /* get a pointer to the header of the skb + * the skb payload (pointer) is moved, so that the next skb_data + * returns the actual payload + */ + cf = (void *)skb->data; + skb_pull(skb, J1939_CAN_HDR); + + /* fix length, set to dlc, with 8 maximum */ + skb_trim(skb, min_t(uint8_t, cf->can_dlc, 8)); + + /* set addr */ + skcb = j1939_skb_to_cb(skb); + memset(skcb, 0, sizeof(*skcb)); + + iskcb = j1939_skb_to_cb(iskb); + skcb->tskey = iskcb->tskey; + skcb->priority = (cf->can_id >> 26) & 0x7; + skcb->addr.sa = cf->can_id; + skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX; + /* set default message type */ + skcb->addr.type = J1939_TP; + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) { + /* Type 1: with destination address */ + skcb->addr.da = skcb->addr.pgn; + /* normalize pgn: strip dst address */ + skcb->addr.pgn &= 0x3ff00; + } else { + /* set broadcast address */ + skcb->addr.da = J1939_NO_ADDR; + } + + /* update localflags */ + read_lock_bh(&priv->lock); + if (j1939_address_is_unicast(skcb->addr.sa) && + priv->ents[skcb->addr.sa].nusers) + skcb->flags |= J1939_ECU_LOCAL_SRC; + if (j1939_address_is_unicast(skcb->addr.da) && + priv->ents[skcb->addr.da].nusers) + skcb->flags |= J1939_ECU_LOCAL_DST; + read_unlock_bh(&priv->lock); + + /* deliver into the j1939 stack ... */ + j1939_ac_recv(priv, skb); + + if (j1939_tp_recv(priv, skb)) + /* this means the transport layer processed the message */ + goto done; + + j1939_simple_recv(priv, skb); + j1939_sk_recv(priv, skb); + done: + kfree_skb(skb); +} + +/* NETDEV MANAGEMENT */ + +/* values for can_rx_(un)register */ +#define J1939_CAN_ID CAN_EFF_FLAG +#define J1939_CAN_MASK (CAN_EFF_FLAG | CAN_RTR_FLAG) + +static DEFINE_SPINLOCK(j1939_netdev_lock); + +static struct j1939_priv *j1939_priv_create(struct net_device *ndev) +{ + struct j1939_priv *priv; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + rwlock_init(&priv->lock); + INIT_LIST_HEAD(&priv->ecus); + priv->ndev = ndev; + kref_init(&priv->kref); + kref_init(&priv->rx_kref); + dev_hold(ndev); + + netdev_dbg(priv->ndev, "%s : 0x%p\n", __func__, priv); + + return priv; +} + +static inline void j1939_priv_set(struct net_device *ndev, + struct j1939_priv *priv) +{ + struct can_ml_priv *can_ml_priv = ndev->ml_priv; + + can_ml_priv->j1939_priv = priv; +} + +static void __j1939_priv_release(struct kref *kref) +{ + struct j1939_priv *priv = container_of(kref, struct j1939_priv, kref); + struct net_device *ndev = priv->ndev; + + netdev_dbg(priv->ndev, "%s: 0x%p\n", __func__, priv); + + dev_put(ndev); + kfree(priv); +} + +void j1939_priv_put(struct j1939_priv *priv) +{ + kref_put(&priv->kref, __j1939_priv_release); +} + +void j1939_priv_get(struct j1939_priv *priv) +{ + kref_get(&priv->kref); +} + +static int j1939_can_rx_register(struct j1939_priv *priv) +{ + struct net_device *ndev = priv->ndev; + int ret; + + j1939_priv_get(priv); + ret = can_rx_register(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK, + j1939_can_recv, priv, "j1939", NULL); + if (ret < 0) { + j1939_priv_put(priv); + return ret; + } + + return 0; +} + +static void j1939_can_rx_unregister(struct j1939_priv *priv) +{ + struct net_device *ndev = priv->ndev; + + can_rx_unregister(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK, + j1939_can_recv, priv); + + j1939_priv_put(priv); +} + +static void __j1939_rx_release(struct kref *kref) + __releases(&j1939_netdev_lock) +{ + struct j1939_priv *priv = container_of(kref, struct j1939_priv, + rx_kref); + + j1939_can_rx_unregister(priv); + j1939_ecu_unmap_all(priv); + j1939_priv_set(priv->ndev, NULL); + spin_unlock(&j1939_netdev_lock); +} + +/* get pointer to priv without increasing ref counter */ +static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev) +{ + struct can_ml_priv *can_ml_priv = ndev->ml_priv; + + return can_ml_priv->j1939_priv; +} + +static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev) +{ + struct j1939_priv *priv; + + lockdep_assert_held(&j1939_netdev_lock); + + if (ndev->type != ARPHRD_CAN) + return NULL; + + priv = j1939_ndev_to_priv(ndev); + if (priv) + j1939_priv_get(priv); + + return priv; +} + +static struct j1939_priv *j1939_priv_get_by_ndev(struct net_device *ndev) +{ + struct j1939_priv *priv; + + spin_lock(&j1939_netdev_lock); + priv = j1939_priv_get_by_ndev_locked(ndev); + spin_unlock(&j1939_netdev_lock); + + return priv; +} + +struct j1939_priv *j1939_netdev_start(struct net_device *ndev) +{ + struct j1939_priv *priv, *priv_new; + int ret; + + priv = j1939_priv_get_by_ndev(ndev); + if (priv) { + kref_get(&priv->rx_kref); + return priv; + } + + priv = j1939_priv_create(ndev); + if (!priv) + return ERR_PTR(-ENOMEM); + + j1939_tp_init(priv); + spin_lock_init(&priv->j1939_socks_lock); + INIT_LIST_HEAD(&priv->j1939_socks); + + spin_lock(&j1939_netdev_lock); + priv_new = j1939_priv_get_by_ndev_locked(ndev); + if (priv_new) { + /* Someone was faster than us, use their priv and roll + * back our's. + */ + spin_unlock(&j1939_netdev_lock); + dev_put(ndev); + kfree(priv); + kref_get(&priv_new->rx_kref); + return priv_new; + } + j1939_priv_set(ndev, priv); + spin_unlock(&j1939_netdev_lock); + + ret = j1939_can_rx_register(priv); + if (ret < 0) + goto out_priv_put; + + return priv; + + out_priv_put: + j1939_priv_set(ndev, NULL); + dev_put(ndev); + kfree(priv); + + return ERR_PTR(ret); +} + +void j1939_netdev_stop(struct j1939_priv *priv) +{ + kref_put_lock(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock); + j1939_priv_put(priv); +} + +int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb) +{ + int ret, dlc; + canid_t canid; + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct can_frame *cf; + + /* apply sanity checks */ + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) + skcb->addr.pgn &= J1939_PGN_PDU1_MAX; + else + skcb->addr.pgn &= J1939_PGN_MAX; + + if (skcb->priority > 7) + skcb->priority = 6; + + ret = j1939_ac_fixup(priv, skb); + if (unlikely(ret)) + goto failed; + dlc = skb->len; + + /* re-claim the CAN_HDR from the SKB */ + cf = skb_push(skb, J1939_CAN_HDR); + + /* make it a full can frame again */ + skb_put(skb, J1939_CAN_FTR + (8 - dlc)); + + canid = CAN_EFF_FLAG | + (skcb->priority << 26) | + (skcb->addr.pgn << 8) | + skcb->addr.sa; + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) + canid |= skcb->addr.da << 8; + + cf->can_id = canid; + cf->can_dlc = dlc; + + return can_send(skb, 1); + + failed: + kfree_skb(skb); + return ret; +} + +static int j1939_netdev_notify(struct notifier_block *nb, + unsigned long msg, void *data) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(data); + struct j1939_priv *priv; + + priv = j1939_priv_get_by_ndev(ndev); + if (!priv) + goto notify_done; + + if (ndev->type != ARPHRD_CAN) + goto notify_put; + + switch (msg) { + case NETDEV_DOWN: + j1939_cancel_active_session(priv, NULL); + j1939_sk_netdev_event_netdown(priv); + j1939_ecu_unmap_all(priv); + break; + } + +notify_put: + j1939_priv_put(priv); + +notify_done: + return NOTIFY_DONE; +} + +static struct notifier_block j1939_netdev_notifier = { + .notifier_call = j1939_netdev_notify, +}; + +/* MODULE interface */ +static __init int j1939_module_init(void) +{ + int ret; + + pr_info("can: SAE J1939\n"); + + ret = register_netdevice_notifier(&j1939_netdev_notifier); + if (ret) + goto fail_notifier; + + ret = can_proto_register(&j1939_can_proto); + if (ret < 0) { + pr_err("can: registration of j1939 protocol failed\n"); + goto fail_sk; + } + + return 0; + + fail_sk: + unregister_netdevice_notifier(&j1939_netdev_notifier); + fail_notifier: + return ret; +} + +static __exit void j1939_module_exit(void) +{ + can_proto_unregister(&j1939_can_proto); + + unregister_netdevice_notifier(&j1939_netdev_notifier); +} + +module_init(j1939_module_init); +module_exit(j1939_module_exit); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c new file mode 100644 index 000000000000..37c1040bcb9c --- /dev/null +++ b/net/can/j1939/socket.c @@ -0,0 +1,1160 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Pieter Beyens <[email protected]> +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <[email protected]> +// Copyright (c) 2018 Protonic, +// Robin van der Gracht <[email protected]> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <[email protected]> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <[email protected]> + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/can/core.h> +#include <linux/can/skb.h> +#include <linux/errqueue.h> +#include <linux/if_arp.h> + +#include "j1939-priv.h" + +#define J1939_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.j1939) + +/* conversion function between struct sock::sk_priority from linux and + * j1939 priority field + */ +static inline priority_t j1939_prio(u32 sk_priority) +{ + sk_priority = min(sk_priority, 7U); + + return 7 - sk_priority; +} + +static inline u32 j1939_to_sk_priority(priority_t prio) +{ + return 7 - prio; +} + +/* function to see if pgn is to be evaluated */ +static inline bool j1939_pgn_is_valid(pgn_t pgn) +{ + return pgn <= J1939_PGN_MAX; +} + +/* test function to avoid non-zero DA placeholder for pdu1 pgn's */ +static inline bool j1939_pgn_is_clean_pdu(pgn_t pgn) +{ + if (j1939_pgn_is_pdu1(pgn)) + return !(pgn & 0xff); + else + return true; +} + +static inline void j1939_sock_pending_add(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + atomic_inc(&jsk->skb_pending); +} + +static int j1939_sock_pending_get(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + return atomic_read(&jsk->skb_pending); +} + +void j1939_sock_pending_del(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + /* atomic_dec_return returns the new value */ + if (!atomic_dec_return(&jsk->skb_pending)) + wake_up(&jsk->waitq); /* no pending SKB's */ +} + +static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk) +{ + jsk->state |= J1939_SOCK_BOUND; + j1939_priv_get(priv); + jsk->priv = priv; + + spin_lock_bh(&priv->j1939_socks_lock); + list_add_tail(&jsk->list, &priv->j1939_socks); + spin_unlock_bh(&priv->j1939_socks_lock); +} + +static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk) +{ + spin_lock_bh(&priv->j1939_socks_lock); + list_del_init(&jsk->list); + spin_unlock_bh(&priv->j1939_socks_lock); + + jsk->priv = NULL; + j1939_priv_put(priv); + jsk->state &= ~J1939_SOCK_BOUND; +} + +static bool j1939_sk_queue_session(struct j1939_session *session) +{ + struct j1939_sock *jsk = j1939_sk(session->sk); + bool empty; + + spin_lock_bh(&jsk->sk_session_queue_lock); + empty = list_empty(&jsk->sk_session_queue); + j1939_session_get(session); + list_add_tail(&session->sk_session_queue_entry, &jsk->sk_session_queue); + spin_unlock_bh(&jsk->sk_session_queue_lock); + j1939_sock_pending_add(&jsk->sk); + + return empty; +} + +static struct +j1939_session *j1939_sk_get_incomplete_session(struct j1939_sock *jsk) +{ + struct j1939_session *session = NULL; + + spin_lock_bh(&jsk->sk_session_queue_lock); + if (!list_empty(&jsk->sk_session_queue)) { + session = list_last_entry(&jsk->sk_session_queue, + struct j1939_session, + sk_session_queue_entry); + if (session->total_queued_size == session->total_message_size) + session = NULL; + else + j1939_session_get(session); + } + spin_unlock_bh(&jsk->sk_session_queue_lock); + + return session; +} + +static void j1939_sk_queue_drop_all(struct j1939_priv *priv, + struct j1939_sock *jsk, int err) +{ + struct j1939_session *session, *tmp; + + netdev_dbg(priv->ndev, "%s: err: %i\n", __func__, err); + spin_lock_bh(&jsk->sk_session_queue_lock); + list_for_each_entry_safe(session, tmp, &jsk->sk_session_queue, + sk_session_queue_entry) { + list_del_init(&session->sk_session_queue_entry); + session->err = err; + j1939_session_put(session); + } + spin_unlock_bh(&jsk->sk_session_queue_lock); +} + +static void j1939_sk_queue_activate_next_locked(struct j1939_session *session) +{ + struct j1939_sock *jsk; + struct j1939_session *first; + int err; + + /* RX-Session don't have a socket (yet) */ + if (!session->sk) + return; + + jsk = j1939_sk(session->sk); + lockdep_assert_held(&jsk->sk_session_queue_lock); + + err = session->err; + + first = list_first_entry_or_null(&jsk->sk_session_queue, + struct j1939_session, + sk_session_queue_entry); + + /* Some else has already activated the next session */ + if (first != session) + return; + +activate_next: + list_del_init(&first->sk_session_queue_entry); + j1939_session_put(first); + first = list_first_entry_or_null(&jsk->sk_session_queue, + struct j1939_session, + sk_session_queue_entry); + if (!first) + return; + + if (WARN_ON_ONCE(j1939_session_activate(first))) { + first->err = -EBUSY; + goto activate_next; + } else { + /* Give receiver some time (arbitrary chosen) to recover */ + int time_ms = 0; + + if (err) + time_ms = 10 + prandom_u32_max(16); + + j1939_tp_schedule_txtimer(first, time_ms); + } +} + +void j1939_sk_queue_activate_next(struct j1939_session *session) +{ + struct j1939_sock *jsk; + + if (!session->sk) + return; + + jsk = j1939_sk(session->sk); + + spin_lock_bh(&jsk->sk_session_queue_lock); + j1939_sk_queue_activate_next_locked(session); + spin_unlock_bh(&jsk->sk_session_queue_lock); +} + +static bool j1939_sk_match_dst(struct j1939_sock *jsk, + const struct j1939_sk_buff_cb *skcb) +{ + if ((jsk->state & J1939_SOCK_PROMISC)) + return true; + + /* Destination address filter */ + if (jsk->addr.src_name && skcb->addr.dst_name) { + if (jsk->addr.src_name != skcb->addr.dst_name) + return false; + } else { + /* receive (all sockets) if + * - all packages that match our bind() address + * - all broadcast on a socket if SO_BROADCAST + * is set + */ + if (j1939_address_is_unicast(skcb->addr.da)) { + if (jsk->addr.sa != skcb->addr.da) + return false; + } else if (!sock_flag(&jsk->sk, SOCK_BROADCAST)) { + /* receiving broadcast without SO_BROADCAST + * flag is not allowed + */ + return false; + } + } + + /* Source address filter */ + if (jsk->state & J1939_SOCK_CONNECTED) { + /* receive (all sockets) if + * - all packages that match our connect() name or address + */ + if (jsk->addr.dst_name && skcb->addr.src_name) { + if (jsk->addr.dst_name != skcb->addr.src_name) + return false; + } else { + if (jsk->addr.da != skcb->addr.sa) + return false; + } + } + + /* PGN filter */ + if (j1939_pgn_is_valid(jsk->pgn_rx_filter) && + jsk->pgn_rx_filter != skcb->addr.pgn) + return false; + + return true; +} + +/* matches skb control buffer (addr) with a j1939 filter */ +static bool j1939_sk_match_filter(struct j1939_sock *jsk, + const struct j1939_sk_buff_cb *skcb) +{ + const struct j1939_filter *f = jsk->filters; + int nfilter = jsk->nfilters; + + if (!nfilter) + /* receive all when no filters are assigned */ + return true; + + for (; nfilter; ++f, --nfilter) { + if ((skcb->addr.pgn & f->pgn_mask) != f->pgn) + continue; + if ((skcb->addr.sa & f->addr_mask) != f->addr) + continue; + if ((skcb->addr.src_name & f->name_mask) != f->name) + continue; + return true; + } + return false; +} + +static bool j1939_sk_recv_match_one(struct j1939_sock *jsk, + const struct j1939_sk_buff_cb *skcb) +{ + if (!(jsk->state & J1939_SOCK_BOUND)) + return false; + + if (!j1939_sk_match_dst(jsk, skcb)) + return false; + + if (!j1939_sk_match_filter(jsk, skcb)) + return false; + + return true; +} + +static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb) +{ + const struct j1939_sk_buff_cb *oskcb = j1939_skb_to_cb(oskb); + struct j1939_sk_buff_cb *skcb; + struct sk_buff *skb; + + if (oskb->sk == &jsk->sk) + return; + + if (!j1939_sk_recv_match_one(jsk, oskcb)) + return; + + skb = skb_clone(oskb, GFP_ATOMIC); + if (!skb) { + pr_warn("skb clone failed\n"); + return; + } + can_skb_set_owner(skb, oskb->sk); + + skcb = j1939_skb_to_cb(skb); + skcb->msg_flags &= ~(MSG_DONTROUTE); + if (skb->sk) + skcb->msg_flags |= MSG_DONTROUTE; + + if (sock_queue_rcv_skb(&jsk->sk, skb) < 0) + kfree_skb(skb); +} + +bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb) +{ + struct j1939_sock *jsk; + bool match = false; + + spin_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + match = j1939_sk_recv_match_one(jsk, skcb); + if (match) + break; + } + spin_unlock_bh(&priv->j1939_socks_lock); + + return match; +} + +void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sock *jsk; + + spin_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + j1939_sk_recv_one(jsk, skb); + } + spin_unlock_bh(&priv->j1939_socks_lock); +} + +static int j1939_sk_init(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + /* Ensure that "sk" is first member in "struct j1939_sock", so that we + * can skip it during memset(). + */ + BUILD_BUG_ON(offsetof(struct j1939_sock, sk) != 0); + memset((void *)jsk + sizeof(jsk->sk), 0x0, + sizeof(*jsk) - sizeof(jsk->sk)); + + INIT_LIST_HEAD(&jsk->list); + init_waitqueue_head(&jsk->waitq); + jsk->sk.sk_priority = j1939_to_sk_priority(6); + jsk->sk.sk_reuse = 1; /* per default */ + jsk->addr.sa = J1939_NO_ADDR; + jsk->addr.da = J1939_NO_ADDR; + jsk->addr.pgn = J1939_NO_PGN; + jsk->pgn_rx_filter = J1939_NO_PGN; + atomic_set(&jsk->skb_pending, 0); + spin_lock_init(&jsk->sk_session_queue_lock); + INIT_LIST_HEAD(&jsk->sk_session_queue); + + return 0; +} + +static int j1939_sk_sanity_check(struct sockaddr_can *addr, int len) +{ + if (!addr) + return -EDESTADDRREQ; + if (len < J1939_MIN_NAMELEN) + return -EINVAL; + if (addr->can_family != AF_CAN) + return -EINVAL; + if (!addr->can_ifindex) + return -ENODEV; + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) && + !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) + return -EINVAL; + + return 0; +} + +static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) +{ + struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; + struct j1939_sock *jsk = j1939_sk(sock->sk); + struct j1939_priv *priv = jsk->priv; + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + int ret = 0; + + ret = j1939_sk_sanity_check(addr, len); + if (ret) + return ret; + + lock_sock(sock->sk); + + /* Already bound to an interface? */ + if (jsk->state & J1939_SOCK_BOUND) { + /* A re-bind() to a different interface is not + * supported. + */ + if (jsk->ifindex != addr->can_ifindex) { + ret = -EINVAL; + goto out_release_sock; + } + + /* drop old references */ + j1939_jsk_del(priv, jsk); + j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa); + } else { + struct net_device *ndev; + + ndev = dev_get_by_index(net, addr->can_ifindex); + if (!ndev) { + ret = -ENODEV; + goto out_release_sock; + } + + if (ndev->type != ARPHRD_CAN) { + dev_put(ndev); + ret = -ENODEV; + goto out_release_sock; + } + + priv = j1939_netdev_start(ndev); + dev_put(ndev); + if (IS_ERR(priv)) { + ret = PTR_ERR(priv); + goto out_release_sock; + } + + jsk->ifindex = addr->can_ifindex; + } + + /* set default transmit pgn */ + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn)) + jsk->pgn_rx_filter = addr->can_addr.j1939.pgn; + jsk->addr.src_name = addr->can_addr.j1939.name; + jsk->addr.sa = addr->can_addr.j1939.addr; + + /* get new references */ + ret = j1939_local_ecu_get(priv, jsk->addr.src_name, jsk->addr.sa); + if (ret) { + j1939_netdev_stop(priv); + goto out_release_sock; + } + + j1939_jsk_add(priv, jsk); + + out_release_sock: /* fall through */ + release_sock(sock->sk); + + return ret; +} + +static int j1939_sk_connect(struct socket *sock, struct sockaddr *uaddr, + int len, int flags) +{ + struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; + struct j1939_sock *jsk = j1939_sk(sock->sk); + int ret = 0; + + ret = j1939_sk_sanity_check(addr, len); + if (ret) + return ret; + + lock_sock(sock->sk); + + /* bind() before connect() is mandatory */ + if (!(jsk->state & J1939_SOCK_BOUND)) { + ret = -EINVAL; + goto out_release_sock; + } + + /* A connect() to a different interface is not supported. */ + if (jsk->ifindex != addr->can_ifindex) { + ret = -EINVAL; + goto out_release_sock; + } + + if (!addr->can_addr.j1939.name && + addr->can_addr.j1939.addr == J1939_NO_ADDR && + !sock_flag(&jsk->sk, SOCK_BROADCAST)) { + /* broadcast, but SO_BROADCAST not set */ + ret = -EACCES; + goto out_release_sock; + } + + jsk->addr.dst_name = addr->can_addr.j1939.name; + jsk->addr.da = addr->can_addr.j1939.addr; + + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn)) + jsk->addr.pgn = addr->can_addr.j1939.pgn; + + jsk->state |= J1939_SOCK_CONNECTED; + + out_release_sock: /* fall through */ + release_sock(sock->sk); + + return ret; +} + +static void j1939_sk_sock2sockaddr_can(struct sockaddr_can *addr, + const struct j1939_sock *jsk, int peer) +{ + addr->can_family = AF_CAN; + addr->can_ifindex = jsk->ifindex; + addr->can_addr.j1939.pgn = jsk->addr.pgn; + if (peer) { + addr->can_addr.j1939.name = jsk->addr.dst_name; + addr->can_addr.j1939.addr = jsk->addr.da; + } else { + addr->can_addr.j1939.name = jsk->addr.src_name; + addr->can_addr.j1939.addr = jsk->addr.sa; + } +} + +static int j1939_sk_getname(struct socket *sock, struct sockaddr *uaddr, + int peer) +{ + struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; + struct sock *sk = sock->sk; + struct j1939_sock *jsk = j1939_sk(sk); + int ret = 0; + + lock_sock(sk); + + if (peer && !(jsk->state & J1939_SOCK_CONNECTED)) { + ret = -EADDRNOTAVAIL; + goto failure; + } + + j1939_sk_sock2sockaddr_can(addr, jsk, peer); + ret = J1939_MIN_NAMELEN; + + failure: + release_sock(sk); + + return ret; +} + +static int j1939_sk_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct j1939_sock *jsk; + + if (!sk) + return 0; + + jsk = j1939_sk(sk); + lock_sock(sk); + + if (jsk->state & J1939_SOCK_BOUND) { + struct j1939_priv *priv = jsk->priv; + + if (wait_event_interruptible(jsk->waitq, + !j1939_sock_pending_get(&jsk->sk))) { + j1939_cancel_active_session(priv, sk); + j1939_sk_queue_drop_all(priv, jsk, ESHUTDOWN); + } + + j1939_jsk_del(priv, jsk); + + j1939_local_ecu_put(priv, jsk->addr.src_name, + jsk->addr.sa); + + j1939_netdev_stop(priv); + } + + sock_orphan(sk); + sock->sk = NULL; + + release_sock(sk); + sock_put(sk); + + return 0; +} + +static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval, + unsigned int optlen, int flag) +{ + int tmp; + + if (optlen != sizeof(tmp)) + return -EINVAL; + if (copy_from_user(&tmp, optval, optlen)) + return -EFAULT; + lock_sock(&jsk->sk); + if (tmp) + jsk->state |= flag; + else + jsk->state &= ~flag; + release_sock(&jsk->sk); + return tmp; +} + +static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct j1939_sock *jsk = j1939_sk(sk); + int tmp, count = 0, ret = 0; + struct j1939_filter *filters = NULL, *ofilters; + + if (level != SOL_CAN_J1939) + return -EINVAL; + + switch (optname) { + case SO_J1939_FILTER: + if (optval) { + struct j1939_filter *f; + int c; + + if (optlen % sizeof(*filters) != 0) + return -EINVAL; + + if (optlen > J1939_FILTER_MAX * + sizeof(struct j1939_filter)) + return -EINVAL; + + count = optlen / sizeof(*filters); + filters = memdup_user(optval, optlen); + if (IS_ERR(filters)) + return PTR_ERR(filters); + + for (f = filters, c = count; c; f++, c--) { + f->name &= f->name_mask; + f->pgn &= f->pgn_mask; + f->addr &= f->addr_mask; + } + } + + lock_sock(&jsk->sk); + ofilters = jsk->filters; + jsk->filters = filters; + jsk->nfilters = count; + release_sock(&jsk->sk); + kfree(ofilters); + return 0; + case SO_J1939_PROMISC: + return j1939_sk_setsockopt_flag(jsk, optval, optlen, + J1939_SOCK_PROMISC); + case SO_J1939_ERRQUEUE: + ret = j1939_sk_setsockopt_flag(jsk, optval, optlen, + J1939_SOCK_ERRQUEUE); + if (ret < 0) + return ret; + + if (!(jsk->state & J1939_SOCK_ERRQUEUE)) + skb_queue_purge(&sk->sk_error_queue); + return ret; + case SO_J1939_SEND_PRIO: + if (optlen != sizeof(tmp)) + return -EINVAL; + if (copy_from_user(&tmp, optval, optlen)) + return -EFAULT; + if (tmp < 0 || tmp > 7) + return -EDOM; + if (tmp < 2 && !capable(CAP_NET_ADMIN)) + return -EPERM; + lock_sock(&jsk->sk); + jsk->sk.sk_priority = j1939_to_sk_priority(tmp); + release_sock(&jsk->sk); + return 0; + default: + return -ENOPROTOOPT; + } +} + +static int j1939_sk_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct j1939_sock *jsk = j1939_sk(sk); + int ret, ulen; + /* set defaults for using 'int' properties */ + int tmp = 0; + int len = sizeof(tmp); + void *val = &tmp; + + if (level != SOL_CAN_J1939) + return -EINVAL; + if (get_user(ulen, optlen)) + return -EFAULT; + if (ulen < 0) + return -EINVAL; + + lock_sock(&jsk->sk); + switch (optname) { + case SO_J1939_PROMISC: + tmp = (jsk->state & J1939_SOCK_PROMISC) ? 1 : 0; + break; + case SO_J1939_ERRQUEUE: + tmp = (jsk->state & J1939_SOCK_ERRQUEUE) ? 1 : 0; + break; + case SO_J1939_SEND_PRIO: + tmp = j1939_prio(jsk->sk.sk_priority); + break; + default: + ret = -ENOPROTOOPT; + goto no_copy; + } + + /* copy to user, based on 'len' & 'val' + * but most sockopt's are 'int' properties, and have 'len' & 'val' + * left unchanged, but instead modified 'tmp' + */ + if (len > ulen) + ret = -EFAULT; + else if (put_user(len, optlen)) + ret = -EFAULT; + else if (copy_to_user(optval, val, len)) + ret = -EFAULT; + else + ret = 0; + no_copy: + release_sock(&jsk->sk); + return ret; +} + +static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) +{ + struct sock *sk = sock->sk; + struct sk_buff *skb; + struct j1939_sk_buff_cb *skcb; + int ret = 0; + + if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE)) + return -EINVAL; + + if (flags & MSG_ERRQUEUE) + return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939, + SCM_J1939_ERRQUEUE); + + skb = skb_recv_datagram(sk, flags, 0, &ret); + if (!skb) + return ret; + + if (size < skb->len) + msg->msg_flags |= MSG_TRUNC; + else + size = skb->len; + + ret = memcpy_to_msg(msg, skb->data, size); + if (ret < 0) { + skb_free_datagram(sk, skb); + return ret; + } + + skcb = j1939_skb_to_cb(skb); + if (j1939_address_is_valid(skcb->addr.da)) + put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_ADDR, + sizeof(skcb->addr.da), &skcb->addr.da); + + if (skcb->addr.dst_name) + put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_NAME, + sizeof(skcb->addr.dst_name), &skcb->addr.dst_name); + + put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_PRIO, + sizeof(skcb->priority), &skcb->priority); + + if (msg->msg_name) { + struct sockaddr_can *paddr = msg->msg_name; + + msg->msg_namelen = J1939_MIN_NAMELEN; + memset(msg->msg_name, 0, msg->msg_namelen); + paddr->can_family = AF_CAN; + paddr->can_ifindex = skb->skb_iif; + paddr->can_addr.j1939.name = skcb->addr.src_name; + paddr->can_addr.j1939.addr = skcb->addr.sa; + paddr->can_addr.j1939.pgn = skcb->addr.pgn; + } + + sock_recv_ts_and_drops(msg, sk, skb); + msg->msg_flags |= skcb->msg_flags; + skb_free_datagram(sk, skb); + + return size; +} + +static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev, + struct sock *sk, + struct msghdr *msg, size_t size, + int *errcode) +{ + struct j1939_sock *jsk = j1939_sk(sk); + struct j1939_sk_buff_cb *skcb; + struct sk_buff *skb; + int ret; + + skb = sock_alloc_send_skb(sk, + size + + sizeof(struct can_frame) - + sizeof(((struct can_frame *)NULL)->data) + + sizeof(struct can_skb_priv), + msg->msg_flags & MSG_DONTWAIT, &ret); + if (!skb) + goto failure; + + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = ndev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + skb_reserve(skb, offsetof(struct can_frame, data)); + + ret = memcpy_from_msg(skb_put(skb, size), msg, size); + if (ret < 0) + goto free_skb; + + skb->dev = ndev; + + skcb = j1939_skb_to_cb(skb); + memset(skcb, 0, sizeof(*skcb)); + skcb->addr = jsk->addr; + skcb->priority = j1939_prio(sk->sk_priority); + + if (msg->msg_name) { + struct sockaddr_can *addr = msg->msg_name; + + if (addr->can_addr.j1939.name || + addr->can_addr.j1939.addr != J1939_NO_ADDR) { + skcb->addr.dst_name = addr->can_addr.j1939.name; + skcb->addr.da = addr->can_addr.j1939.addr; + } + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn)) + skcb->addr.pgn = addr->can_addr.j1939.pgn; + } + + *errcode = ret; + return skb; + +free_skb: + kfree_skb(skb); +failure: + *errcode = ret; + return NULL; +} + +static size_t j1939_sk_opt_stats_get_size(void) +{ + return + nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */ + 0; +} + +static struct sk_buff * +j1939_sk_get_timestamping_opt_stats(struct j1939_session *session) +{ + struct sk_buff *stats; + u32 size; + + stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC); + if (!stats) + return NULL; + + if (session->skcb.addr.type == J1939_SIMPLE) + size = session->total_message_size; + else + size = min(session->pkt.tx_acked * 7, + session->total_message_size); + + nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size); + + return stats; +} + +void j1939_sk_errqueue(struct j1939_session *session, + enum j1939_sk_errqueue_type type) +{ + struct j1939_priv *priv = session->priv; + struct sock *sk = session->sk; + struct j1939_sock *jsk; + struct sock_exterr_skb *serr; + struct sk_buff *skb; + char *state = "UNK"; + int err; + + /* currently we have no sk for the RX session */ + if (!sk) + return; + + jsk = j1939_sk(sk); + + if (!(jsk->state & J1939_SOCK_ERRQUEUE)) + return; + + skb = j1939_sk_get_timestamping_opt_stats(session); + if (!skb) + return; + + skb->tstamp = ktime_get_real(); + + BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb)); + + serr = SKB_EXT_ERR(skb); + memset(serr, 0, sizeof(*serr)); + switch (type) { + case J1939_ERRQUEUE_ACK: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) + return; + + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + serr->ee.ee_info = SCM_TSTAMP_ACK; + state = "ACK"; + break; + case J1939_ERRQUEUE_SCHED: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) + return; + + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + serr->ee.ee_info = SCM_TSTAMP_SCHED; + state = "SCH"; + break; + case J1939_ERRQUEUE_ABORT: + serr->ee.ee_errno = session->err; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_info = J1939_EE_INFO_TX_ABORT; + state = "ABT"; + break; + default: + netdev_err(priv->ndev, "Unknown errqueue type %i\n", type); + } + + serr->opt_stats = true; + if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) + serr->ee.ee_data = session->tskey; + + netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n", + __func__, session, session->tskey, state); + err = sock_queue_err_skb(sk, skb); + + if (err) + kfree_skb(skb); +}; + +void j1939_sk_send_loop_abort(struct sock *sk, int err) +{ + sk->sk_err = err; + + sk->sk_error_report(sk); +} + +static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk, + struct msghdr *msg, size_t size) + +{ + struct j1939_sock *jsk = j1939_sk(sk); + struct j1939_session *session = j1939_sk_get_incomplete_session(jsk); + struct sk_buff *skb; + size_t segment_size, todo_size; + int ret = 0; + + if (session && + session->total_message_size != session->total_queued_size + size) { + j1939_session_put(session); + return -EIO; + } + + todo_size = size; + + while (todo_size) { + struct j1939_sk_buff_cb *skcb; + + segment_size = min_t(size_t, J1939_MAX_TP_PACKET_SIZE, + todo_size); + + /* Allocate skb for one segment */ + skb = j1939_sk_alloc_skb(priv->ndev, sk, msg, segment_size, + &ret); + if (ret) + break; + + skcb = j1939_skb_to_cb(skb); + + if (!session) { + /* at this point the size should be full size + * of the session + */ + skcb->offset = 0; + session = j1939_tp_send(priv, skb, size); + if (IS_ERR(session)) { + ret = PTR_ERR(session); + goto kfree_skb; + } + if (j1939_sk_queue_session(session)) { + /* try to activate session if we a + * fist in the queue + */ + if (!j1939_session_activate(session)) { + j1939_tp_schedule_txtimer(session, 0); + } else { + ret = -EBUSY; + session->err = ret; + j1939_sk_queue_drop_all(priv, jsk, + EBUSY); + break; + } + } + } else { + skcb->offset = session->total_queued_size; + j1939_session_skb_queue(session, skb); + } + + todo_size -= segment_size; + session->total_queued_size += segment_size; + } + + switch (ret) { + case 0: /* OK */ + if (todo_size) + netdev_warn(priv->ndev, + "no error found and not completely queued?! %zu\n", + todo_size); + ret = size; + break; + case -ERESTARTSYS: + ret = -EINTR; + /* fall through */ + case -EAGAIN: /* OK */ + if (todo_size != size) + ret = size - todo_size; + break; + default: /* ERROR */ + break; + } + + if (session) + j1939_session_put(session); + + return ret; + + kfree_skb: + kfree_skb(skb); + return ret; +} + +static int j1939_sk_sendmsg(struct socket *sock, struct msghdr *msg, + size_t size) +{ + struct sock *sk = sock->sk; + struct j1939_sock *jsk = j1939_sk(sk); + struct j1939_priv *priv = jsk->priv; + int ifindex; + int ret; + + /* various socket state tests */ + if (!(jsk->state & J1939_SOCK_BOUND)) + return -EBADFD; + + ifindex = jsk->ifindex; + + if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR) + /* no source address assigned yet */ + return -EBADFD; + + /* deal with provided destination address info */ + if (msg->msg_name) { + struct sockaddr_can *addr = msg->msg_name; + + if (msg->msg_namelen < J1939_MIN_NAMELEN) + return -EINVAL; + + if (addr->can_family != AF_CAN) + return -EINVAL; + + if (addr->can_ifindex && addr->can_ifindex != ifindex) + return -EBADFD; + + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) && + !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) + return -EINVAL; + + if (!addr->can_addr.j1939.name && + addr->can_addr.j1939.addr == J1939_NO_ADDR && + !sock_flag(sk, SOCK_BROADCAST)) + /* broadcast, but SO_BROADCAST not set */ + return -EACCES; + } else { + if (!jsk->addr.dst_name && jsk->addr.da == J1939_NO_ADDR && + !sock_flag(sk, SOCK_BROADCAST)) + /* broadcast, but SO_BROADCAST not set */ + return -EACCES; + } + + ret = j1939_sk_send_loop(priv, sk, msg, size); + + return ret; +} + +void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) +{ + struct j1939_sock *jsk; + int error_code = ENETDOWN; + + spin_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + jsk->sk.sk_err = error_code; + if (!sock_flag(&jsk->sk, SOCK_DEAD)) + jsk->sk.sk_error_report(&jsk->sk); + + j1939_sk_queue_drop_all(priv, jsk, error_code); + } + spin_unlock_bh(&priv->j1939_socks_lock); +} + +static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + /* no ioctls for socket layer -> hand it down to NIC layer */ + return -ENOIOCTLCMD; +} + +static const struct proto_ops j1939_ops = { + .family = PF_CAN, + .release = j1939_sk_release, + .bind = j1939_sk_bind, + .connect = j1939_sk_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = j1939_sk_getname, + .poll = datagram_poll, + .ioctl = j1939_sk_no_ioctlcmd, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = j1939_sk_setsockopt, + .getsockopt = j1939_sk_getsockopt, + .sendmsg = j1939_sk_sendmsg, + .recvmsg = j1939_sk_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static struct proto j1939_proto __read_mostly = { + .name = "CAN_J1939", + .owner = THIS_MODULE, + .obj_size = sizeof(struct j1939_sock), + .init = j1939_sk_init, +}; + +const struct can_proto j1939_can_proto = { + .type = SOCK_DGRAM, + .protocol = CAN_J1939, + .ops = &j1939_ops, + .prot = &j1939_proto, +}; diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c new file mode 100644 index 000000000000..fe000ea757ea --- /dev/null +++ b/net/can/j1939/transport.c @@ -0,0 +1,2027 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <[email protected]> +// Copyright (c) 2018 Protonic, +// Robin van der Gracht <[email protected]> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <[email protected]> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <[email protected]> + +#include <linux/can/skb.h> + +#include "j1939-priv.h" + +#define J1939_XTP_TX_RETRY_LIMIT 100 + +#define J1939_ETP_PGN_CTL 0xc800 +#define J1939_ETP_PGN_DAT 0xc700 +#define J1939_TP_PGN_CTL 0xec00 +#define J1939_TP_PGN_DAT 0xeb00 + +#define J1939_TP_CMD_RTS 0x10 +#define J1939_TP_CMD_CTS 0x11 +#define J1939_TP_CMD_EOMA 0x13 +#define J1939_TP_CMD_BAM 0x20 +#define J1939_TP_CMD_ABORT 0xff + +#define J1939_ETP_CMD_RTS 0x14 +#define J1939_ETP_CMD_CTS 0x15 +#define J1939_ETP_CMD_DPO 0x16 +#define J1939_ETP_CMD_EOMA 0x17 +#define J1939_ETP_CMD_ABORT 0xff + +enum j1939_xtp_abort { + J1939_XTP_NO_ABORT = 0, + J1939_XTP_ABORT_BUSY = 1, + /* Already in one or more connection managed sessions and + * cannot support another. + * + * EALREADY: + * Operation already in progress + */ + + J1939_XTP_ABORT_RESOURCE = 2, + /* System resources were needed for another task so this + * connection managed session was terminated. + * + * EMSGSIZE: + * The socket type requires that message be sent atomically, + * and the size of the message to be sent made this + * impossible. + */ + + J1939_XTP_ABORT_TIMEOUT = 3, + /* A timeout occurred and this is the connection abort to + * close the session. + * + * EHOSTUNREACH: + * The destination host cannot be reached (probably because + * the host is down or a remote router cannot reach it). + */ + + J1939_XTP_ABORT_GENERIC = 4, + /* CTS messages received when data transfer is in progress + * + * EBADMSG: + * Not a data message + */ + + J1939_XTP_ABORT_FAULT = 5, + /* Maximal retransmit request limit reached + * + * ENOTRECOVERABLE: + * State not recoverable + */ + + J1939_XTP_ABORT_UNEXPECTED_DATA = 6, + /* Unexpected data transfer packet + * + * ENOTCONN: + * Transport endpoint is not connected + */ + + J1939_XTP_ABORT_BAD_SEQ = 7, + /* Bad sequence number (and software is not able to recover) + * + * EILSEQ: + * Illegal byte sequence + */ + + J1939_XTP_ABORT_DUP_SEQ = 8, + /* Duplicate sequence number (and software is not able to + * recover) + */ + + J1939_XTP_ABORT_EDPO_UNEXPECTED = 9, + /* Unexpected EDPO packet (ETP) or Message size > 1785 bytes + * (TP) + */ + + J1939_XTP_ABORT_BAD_EDPO_PGN = 10, + /* Unexpected EDPO PGN (PGN in EDPO is bad) */ + + J1939_XTP_ABORT_EDPO_OUTOF_CTS = 11, + /* EDPO number of packets is greater than CTS */ + + J1939_XTP_ABORT_BAD_EDPO_OFFSET = 12, + /* Bad EDPO offset */ + + J1939_XTP_ABORT_OTHER_DEPRECATED = 13, + /* Deprecated. Use 250 instead (Any other reason) */ + + J1939_XTP_ABORT_ECTS_UNXPECTED_PGN = 14, + /* Unexpected ECTS PGN (PGN in ECTS is bad) */ + + J1939_XTP_ABORT_ECTS_TOO_BIG = 15, + /* ECTS requested packets exceeds message size */ + + J1939_XTP_ABORT_OTHER = 250, + /* Any other reason (if a Connection Abort reason is + * identified that is not listed in the table use code 250) + */ +}; + +static unsigned int j1939_tp_block = 255; +static unsigned int j1939_tp_packet_delay; +static unsigned int j1939_tp_padding = 1; + +/* helpers */ +static const char *j1939_xtp_abort_to_str(enum j1939_xtp_abort abort) +{ + switch (abort) { + case J1939_XTP_ABORT_BUSY: + return "Already in one or more connection managed sessions and cannot support another."; + case J1939_XTP_ABORT_RESOURCE: + return "System resources were needed for another task so this connection managed session was terminated."; + case J1939_XTP_ABORT_TIMEOUT: + return "A timeout occurred and this is the connection abort to close the session."; + case J1939_XTP_ABORT_GENERIC: + return "CTS messages received when data transfer is in progress"; + case J1939_XTP_ABORT_FAULT: + return "Maximal retransmit request limit reached"; + case J1939_XTP_ABORT_UNEXPECTED_DATA: + return "Unexpected data transfer packet"; + case J1939_XTP_ABORT_BAD_SEQ: + return "Bad sequence number (and software is not able to recover)"; + case J1939_XTP_ABORT_DUP_SEQ: + return "Duplicate sequence number (and software is not able to recover)"; + case J1939_XTP_ABORT_EDPO_UNEXPECTED: + return "Unexpected EDPO packet (ETP) or Message size > 1785 bytes (TP)"; + case J1939_XTP_ABORT_BAD_EDPO_PGN: + return "Unexpected EDPO PGN (PGN in EDPO is bad)"; + case J1939_XTP_ABORT_EDPO_OUTOF_CTS: + return "EDPO number of packets is greater than CTS"; + case J1939_XTP_ABORT_BAD_EDPO_OFFSET: + return "Bad EDPO offset"; + case J1939_XTP_ABORT_OTHER_DEPRECATED: + return "Deprecated. Use 250 instead (Any other reason)"; + case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN: + return "Unexpected ECTS PGN (PGN in ECTS is bad)"; + case J1939_XTP_ABORT_ECTS_TOO_BIG: + return "ECTS requested packets exceeds message size"; + case J1939_XTP_ABORT_OTHER: + return "Any other reason (if a Connection Abort reason is identified that is not listed in the table use code 250)"; + default: + return "<unknown>"; + } +} + +static int j1939_xtp_abort_to_errno(struct j1939_priv *priv, + enum j1939_xtp_abort abort) +{ + int err; + + switch (abort) { + case J1939_XTP_NO_ABORT: + WARN_ON_ONCE(abort == J1939_XTP_NO_ABORT); + err = 0; + break; + case J1939_XTP_ABORT_BUSY: + err = EALREADY; + break; + case J1939_XTP_ABORT_RESOURCE: + err = EMSGSIZE; + break; + case J1939_XTP_ABORT_TIMEOUT: + err = EHOSTUNREACH; + break; + case J1939_XTP_ABORT_GENERIC: + err = EBADMSG; + break; + case J1939_XTP_ABORT_FAULT: + err = ENOTRECOVERABLE; + break; + case J1939_XTP_ABORT_UNEXPECTED_DATA: + err = ENOTCONN; + break; + case J1939_XTP_ABORT_BAD_SEQ: + err = EILSEQ; + break; + case J1939_XTP_ABORT_DUP_SEQ: + err = EPROTO; + break; + case J1939_XTP_ABORT_EDPO_UNEXPECTED: + err = EPROTO; + break; + case J1939_XTP_ABORT_BAD_EDPO_PGN: + err = EPROTO; + break; + case J1939_XTP_ABORT_EDPO_OUTOF_CTS: + err = EPROTO; + break; + case J1939_XTP_ABORT_BAD_EDPO_OFFSET: + err = EPROTO; + break; + case J1939_XTP_ABORT_OTHER_DEPRECATED: + err = EPROTO; + break; + case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN: + err = EPROTO; + break; + case J1939_XTP_ABORT_ECTS_TOO_BIG: + err = EPROTO; + break; + case J1939_XTP_ABORT_OTHER: + err = EPROTO; + break; + default: + netdev_warn(priv->ndev, "Unknown abort code %i", abort); + err = EPROTO; + } + + return err; +} + +static inline void j1939_session_list_lock(struct j1939_priv *priv) +{ + spin_lock_bh(&priv->active_session_list_lock); +} + +static inline void j1939_session_list_unlock(struct j1939_priv *priv) +{ + spin_unlock_bh(&priv->active_session_list_lock); +} + +void j1939_session_get(struct j1939_session *session) +{ + kref_get(&session->kref); +} + +/* session completion functions */ +static void __j1939_session_drop(struct j1939_session *session) +{ + if (!session->transmission) + return; + + j1939_sock_pending_del(session->sk); +} + +static void j1939_session_destroy(struct j1939_session *session) +{ + if (session->err) + j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT); + else + j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK); + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + skb_queue_purge(&session->skb_queue); + __j1939_session_drop(session); + j1939_priv_put(session->priv); + kfree(session); +} + +static void __j1939_session_release(struct kref *kref) +{ + struct j1939_session *session = container_of(kref, struct j1939_session, + kref); + + j1939_session_destroy(session); +} + +void j1939_session_put(struct j1939_session *session) +{ + kref_put(&session->kref, __j1939_session_release); +} + +static void j1939_session_txtimer_cancel(struct j1939_session *session) +{ + if (hrtimer_cancel(&session->txtimer)) + j1939_session_put(session); +} + +static void j1939_session_rxtimer_cancel(struct j1939_session *session) +{ + if (hrtimer_cancel(&session->rxtimer)) + j1939_session_put(session); +} + +void j1939_session_timers_cancel(struct j1939_session *session) +{ + j1939_session_txtimer_cancel(session); + j1939_session_rxtimer_cancel(session); +} + +static inline bool j1939_cb_is_broadcast(const struct j1939_sk_buff_cb *skcb) +{ + return (!skcb->addr.dst_name && (skcb->addr.da == 0xff)); +} + +static void j1939_session_skb_drop_old(struct j1939_session *session) +{ + struct sk_buff *do_skb; + struct j1939_sk_buff_cb *do_skcb; + unsigned int offset_start; + unsigned long flags; + + if (skb_queue_len(&session->skb_queue) < 2) + return; + + offset_start = session->pkt.tx_acked * 7; + + spin_lock_irqsave(&session->skb_queue.lock, flags); + do_skb = skb_peek(&session->skb_queue); + do_skcb = j1939_skb_to_cb(do_skb); + + if ((do_skcb->offset + do_skb->len) < offset_start) { + __skb_unlink(do_skb, &session->skb_queue); + kfree_skb(do_skb); + } + spin_unlock_irqrestore(&session->skb_queue.lock, flags); +} + +void j1939_session_skb_queue(struct j1939_session *session, + struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_priv *priv = session->priv; + + j1939_ac_fixup(priv, skb); + + if (j1939_address_is_unicast(skcb->addr.da) && + priv->ents[skcb->addr.da].nusers) + skcb->flags |= J1939_ECU_LOCAL_DST; + + skcb->flags |= J1939_ECU_LOCAL_SRC; + + skb_queue_tail(&session->skb_queue, skb); +} + +static struct sk_buff *j1939_session_skb_find(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + struct sk_buff *skb = NULL; + struct sk_buff *do_skb; + struct j1939_sk_buff_cb *do_skcb; + unsigned int offset_start; + unsigned long flags; + + offset_start = session->pkt.dpo * 7; + + spin_lock_irqsave(&session->skb_queue.lock, flags); + skb_queue_walk(&session->skb_queue, do_skb) { + do_skcb = j1939_skb_to_cb(do_skb); + + if (offset_start >= do_skcb->offset && + offset_start < (do_skcb->offset + do_skb->len)) { + skb = do_skb; + } + } + spin_unlock_irqrestore(&session->skb_queue.lock, flags); + + if (!skb) + netdev_dbg(priv->ndev, "%s: 0x%p: no skb found for start: %i, queue size: %i\n", + __func__, session, offset_start, + skb_queue_len(&session->skb_queue)); + + return skb; +} + +/* see if we are receiver + * returns 0 for broadcasts, although we will receive them + */ +static inline int j1939_tp_im_receiver(const struct j1939_sk_buff_cb *skcb) +{ + return skcb->flags & J1939_ECU_LOCAL_DST; +} + +/* see if we are sender */ +static inline int j1939_tp_im_transmitter(const struct j1939_sk_buff_cb *skcb) +{ + return skcb->flags & J1939_ECU_LOCAL_SRC; +} + +/* see if we are involved as either receiver or transmitter */ +static int j1939_tp_im_involved(const struct j1939_sk_buff_cb *skcb, bool swap) +{ + if (swap) + return j1939_tp_im_receiver(skcb); + else + return j1939_tp_im_transmitter(skcb); +} + +static int j1939_tp_im_involved_anydir(struct j1939_sk_buff_cb *skcb) +{ + return skcb->flags & (J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST); +} + +/* extract pgn from flow-ctl message */ +static inline pgn_t j1939_xtp_ctl_to_pgn(const u8 *dat) +{ + pgn_t pgn; + + pgn = (dat[7] << 16) | (dat[6] << 8) | (dat[5] << 0); + if (j1939_pgn_is_pdu1(pgn)) + pgn &= 0xffff00; + return pgn; +} + +static inline unsigned int j1939_tp_ctl_to_size(const u8 *dat) +{ + return (dat[2] << 8) + (dat[1] << 0); +} + +static inline unsigned int j1939_etp_ctl_to_packet(const u8 *dat) +{ + return (dat[4] << 16) | (dat[3] << 8) | (dat[2] << 0); +} + +static inline unsigned int j1939_etp_ctl_to_size(const u8 *dat) +{ + return (dat[4] << 24) | (dat[3] << 16) | + (dat[2] << 8) | (dat[1] << 0); +} + +/* find existing session: + * reverse: swap cb's src & dst + * there is no problem with matching broadcasts, since + * broadcasts (no dst, no da) would never call this + * with reverse == true + */ +static bool j1939_session_match(struct j1939_addr *se_addr, + struct j1939_addr *sk_addr, bool reverse) +{ + if (se_addr->type != sk_addr->type) + return false; + + if (reverse) { + if (se_addr->src_name) { + if (se_addr->src_name != sk_addr->dst_name) + return false; + } else if (se_addr->sa != sk_addr->da) { + return false; + } + + if (se_addr->dst_name) { + if (se_addr->dst_name != sk_addr->src_name) + return false; + } else if (se_addr->da != sk_addr->sa) { + return false; + } + } else { + if (se_addr->src_name) { + if (se_addr->src_name != sk_addr->src_name) + return false; + } else if (se_addr->sa != sk_addr->sa) { + return false; + } + + if (se_addr->dst_name) { + if (se_addr->dst_name != sk_addr->dst_name) + return false; + } else if (se_addr->da != sk_addr->da) { + return false; + } + } + + return true; +} + +static struct +j1939_session *j1939_session_get_by_addr_locked(struct j1939_priv *priv, + struct list_head *root, + struct j1939_addr *addr, + bool reverse, bool transmitter) +{ + struct j1939_session *session; + + lockdep_assert_held(&priv->active_session_list_lock); + + list_for_each_entry(session, root, active_session_list_entry) { + j1939_session_get(session); + if (j1939_session_match(&session->skcb.addr, addr, reverse) && + session->transmission == transmitter) + return session; + j1939_session_put(session); + } + + return NULL; +} + +static struct +j1939_session *j1939_session_get_simple(struct j1939_priv *priv, + struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + + lockdep_assert_held(&priv->active_session_list_lock); + + list_for_each_entry(session, &priv->active_session_list, + active_session_list_entry) { + j1939_session_get(session); + if (session->skcb.addr.type == J1939_SIMPLE && + session->tskey == skcb->tskey && session->sk == skb->sk) + return session; + j1939_session_put(session); + } + + return NULL; +} + +static struct +j1939_session *j1939_session_get_by_addr(struct j1939_priv *priv, + struct j1939_addr *addr, + bool reverse, bool transmitter) +{ + struct j1939_session *session; + + j1939_session_list_lock(priv); + session = j1939_session_get_by_addr_locked(priv, + &priv->active_session_list, + addr, reverse, transmitter); + j1939_session_list_unlock(priv); + + return session; +} + +static void j1939_skbcb_swap(struct j1939_sk_buff_cb *skcb) +{ + u8 tmp = 0; + + swap(skcb->addr.dst_name, skcb->addr.src_name); + swap(skcb->addr.da, skcb->addr.sa); + + /* swap SRC and DST flags, leave other untouched */ + if (skcb->flags & J1939_ECU_LOCAL_SRC) + tmp |= J1939_ECU_LOCAL_DST; + if (skcb->flags & J1939_ECU_LOCAL_DST) + tmp |= J1939_ECU_LOCAL_SRC; + skcb->flags &= ~(J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST); + skcb->flags |= tmp; +} + +static struct +sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv, + const struct j1939_sk_buff_cb *re_skcb, + bool ctl, + bool swap_src_dst) +{ + struct sk_buff *skb; + struct j1939_sk_buff_cb *skcb; + + skb = alloc_skb(sizeof(struct can_frame) + sizeof(struct can_skb_priv), + GFP_ATOMIC); + if (unlikely(!skb)) + return ERR_PTR(-ENOMEM); + + skb->dev = priv->ndev; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = priv->ndev->ifindex; + /* reserve CAN header */ + skb_reserve(skb, offsetof(struct can_frame, data)); + + memcpy(skb->cb, re_skcb, sizeof(skb->cb)); + skcb = j1939_skb_to_cb(skb); + if (swap_src_dst) + j1939_skbcb_swap(skcb); + + if (ctl) { + if (skcb->addr.type == J1939_ETP) + skcb->addr.pgn = J1939_ETP_PGN_CTL; + else + skcb->addr.pgn = J1939_TP_PGN_CTL; + } else { + if (skcb->addr.type == J1939_ETP) + skcb->addr.pgn = J1939_ETP_PGN_DAT; + else + skcb->addr.pgn = J1939_TP_PGN_DAT; + } + + return skb; +} + +/* TP transmit packet functions */ +static int j1939_tp_tx_dat(struct j1939_session *session, + const u8 *dat, int len) +{ + struct j1939_priv *priv = session->priv; + struct sk_buff *skb; + + skb = j1939_tp_tx_dat_new(priv, &session->skcb, + false, false); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + skb_put_data(skb, dat, len); + if (j1939_tp_padding && len < 8) + memset(skb_put(skb, 8 - len), 0xff, 8 - len); + + return j1939_send_one(priv, skb); +} + +static int j1939_xtp_do_tx_ctl(struct j1939_priv *priv, + const struct j1939_sk_buff_cb *re_skcb, + bool swap_src_dst, pgn_t pgn, const u8 *dat) +{ + struct sk_buff *skb; + u8 *skdat; + + if (!j1939_tp_im_involved(re_skcb, swap_src_dst)) + return 0; + + skb = j1939_tp_tx_dat_new(priv, re_skcb, true, swap_src_dst); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + skdat = skb_put(skb, 8); + memcpy(skdat, dat, 5); + skdat[5] = (pgn >> 0); + skdat[6] = (pgn >> 8); + skdat[7] = (pgn >> 16); + + return j1939_send_one(priv, skb); +} + +static inline int j1939_tp_tx_ctl(struct j1939_session *session, + bool swap_src_dst, const u8 *dat) +{ + struct j1939_priv *priv = session->priv; + + return j1939_xtp_do_tx_ctl(priv, &session->skcb, + swap_src_dst, + session->skcb.addr.pgn, dat); +} + +static int j1939_xtp_tx_abort(struct j1939_priv *priv, + const struct j1939_sk_buff_cb *re_skcb, + bool swap_src_dst, + enum j1939_xtp_abort err, + pgn_t pgn) +{ + u8 dat[5]; + + if (!j1939_tp_im_involved(re_skcb, swap_src_dst)) + return 0; + + memset(dat, 0xff, sizeof(dat)); + dat[0] = J1939_TP_CMD_ABORT; + dat[1] = err; + return j1939_xtp_do_tx_ctl(priv, re_skcb, swap_src_dst, pgn, dat); +} + +void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec) +{ + j1939_session_get(session); + hrtimer_start(&session->txtimer, ms_to_ktime(msec), + HRTIMER_MODE_REL_SOFT); +} + +static inline void j1939_tp_set_rxtimeout(struct j1939_session *session, + int msec) +{ + j1939_session_rxtimer_cancel(session); + j1939_session_get(session); + hrtimer_start(&session->rxtimer, ms_to_ktime(msec), + HRTIMER_MODE_REL_SOFT); +} + +static int j1939_session_tx_rts(struct j1939_session *session) +{ + u8 dat[8]; + int ret; + + memset(dat, 0xff, sizeof(dat)); + + dat[1] = (session->total_message_size >> 0); + dat[2] = (session->total_message_size >> 8); + dat[3] = session->pkt.total; + + if (session->skcb.addr.type == J1939_ETP) { + dat[0] = J1939_ETP_CMD_RTS; + dat[1] = (session->total_message_size >> 0); + dat[2] = (session->total_message_size >> 8); + dat[3] = (session->total_message_size >> 16); + dat[4] = (session->total_message_size >> 24); + } else if (j1939_cb_is_broadcast(&session->skcb)) { + dat[0] = J1939_TP_CMD_BAM; + /* fake cts for broadcast */ + session->pkt.tx = 0; + } else { + dat[0] = J1939_TP_CMD_RTS; + dat[4] = dat[3]; + } + + if (dat[0] == session->last_txcmd) + /* done already */ + return 0; + + ret = j1939_tp_tx_ctl(session, false, dat); + if (ret < 0) + return ret; + + session->last_txcmd = dat[0]; + if (dat[0] == J1939_TP_CMD_BAM) + j1939_tp_schedule_txtimer(session, 50); + + j1939_tp_set_rxtimeout(session, 1250); + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + return 0; +} + +static int j1939_session_tx_dpo(struct j1939_session *session) +{ + unsigned int pkt; + u8 dat[8]; + int ret; + + memset(dat, 0xff, sizeof(dat)); + + dat[0] = J1939_ETP_CMD_DPO; + session->pkt.dpo = session->pkt.tx_acked; + pkt = session->pkt.dpo; + dat[1] = session->pkt.last - session->pkt.tx_acked; + dat[2] = (pkt >> 0); + dat[3] = (pkt >> 8); + dat[4] = (pkt >> 16); + + ret = j1939_tp_tx_ctl(session, false, dat); + if (ret < 0) + return ret; + + session->last_txcmd = dat[0]; + j1939_tp_set_rxtimeout(session, 1250); + session->pkt.tx = session->pkt.tx_acked; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + return 0; +} + +static int j1939_session_tx_dat(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + struct j1939_sk_buff_cb *skcb; + int offset, pkt_done, pkt_end; + unsigned int len, pdelay; + struct sk_buff *se_skb; + const u8 *tpdat; + int ret = 0; + u8 dat[8]; + + se_skb = j1939_session_skb_find(session); + if (!se_skb) + return -ENOBUFS; + + skcb = j1939_skb_to_cb(se_skb); + tpdat = se_skb->data; + ret = 0; + pkt_done = 0; + if (session->skcb.addr.type != J1939_ETP && + j1939_cb_is_broadcast(&session->skcb)) + pkt_end = session->pkt.total; + else + pkt_end = session->pkt.last; + + while (session->pkt.tx < pkt_end) { + dat[0] = session->pkt.tx - session->pkt.dpo + 1; + offset = (session->pkt.tx * 7) - skcb->offset; + len = se_skb->len - offset; + if (len > 7) + len = 7; + + memcpy(&dat[1], &tpdat[offset], len); + ret = j1939_tp_tx_dat(session, dat, len + 1); + if (ret < 0) { + /* ENOBUS == CAN interface TX queue is full */ + if (ret != -ENOBUFS) + netdev_alert(priv->ndev, + "%s: 0x%p: queue data error: %i\n", + __func__, session, ret); + break; + } + + session->last_txcmd = 0xff; + pkt_done++; + session->pkt.tx++; + pdelay = j1939_cb_is_broadcast(&session->skcb) ? 50 : + j1939_tp_packet_delay; + + if (session->pkt.tx < session->pkt.total && pdelay) { + j1939_tp_schedule_txtimer(session, pdelay); + break; + } + } + + if (pkt_done) + j1939_tp_set_rxtimeout(session, 250); + + return ret; +} + +static int j1939_xtp_txnext_transmiter(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + int ret = 0; + + if (!j1939_tp_im_transmitter(&session->skcb)) { + netdev_alert(priv->ndev, "%s: 0x%p: called by not transmitter!\n", + __func__, session); + return -EINVAL; + } + + switch (session->last_cmd) { + case 0: + ret = j1939_session_tx_rts(session); + break; + + case J1939_ETP_CMD_CTS: + if (session->last_txcmd != J1939_ETP_CMD_DPO) { + ret = j1939_session_tx_dpo(session); + if (ret) + return ret; + } + + /* fall through */ + case J1939_TP_CMD_CTS: + case 0xff: /* did some data */ + case J1939_ETP_CMD_DPO: + case J1939_TP_CMD_BAM: + ret = j1939_session_tx_dat(session); + + break; + default: + netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n", + __func__, session, session->last_cmd); + } + + return ret; +} + +static int j1939_session_tx_cts(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + unsigned int pkt, len; + int ret; + u8 dat[8]; + + if (!j1939_sk_recv_match(priv, &session->skcb)) + return -ENOENT; + + len = session->pkt.total - session->pkt.rx; + len = min3(len, session->pkt.block, j1939_tp_block ?: 255); + memset(dat, 0xff, sizeof(dat)); + + if (session->skcb.addr.type == J1939_ETP) { + pkt = session->pkt.rx + 1; + dat[0] = J1939_ETP_CMD_CTS; + dat[1] = len; + dat[2] = (pkt >> 0); + dat[3] = (pkt >> 8); + dat[4] = (pkt >> 16); + } else { + dat[0] = J1939_TP_CMD_CTS; + dat[1] = len; + dat[2] = session->pkt.rx + 1; + } + + if (dat[0] == session->last_txcmd) + /* done already */ + return 0; + + ret = j1939_tp_tx_ctl(session, true, dat); + if (ret < 0) + return ret; + + if (len) + /* only mark cts done when len is set */ + session->last_txcmd = dat[0]; + j1939_tp_set_rxtimeout(session, 1250); + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + return 0; +} + +static int j1939_session_tx_eoma(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + u8 dat[8]; + int ret; + + if (!j1939_sk_recv_match(priv, &session->skcb)) + return -ENOENT; + + memset(dat, 0xff, sizeof(dat)); + + if (session->skcb.addr.type == J1939_ETP) { + dat[0] = J1939_ETP_CMD_EOMA; + dat[1] = session->total_message_size >> 0; + dat[2] = session->total_message_size >> 8; + dat[3] = session->total_message_size >> 16; + dat[4] = session->total_message_size >> 24; + } else { + dat[0] = J1939_TP_CMD_EOMA; + dat[1] = session->total_message_size; + dat[2] = session->total_message_size >> 8; + dat[3] = session->pkt.total; + } + + if (dat[0] == session->last_txcmd) + /* done already */ + return 0; + + ret = j1939_tp_tx_ctl(session, true, dat); + if (ret < 0) + return ret; + + session->last_txcmd = dat[0]; + + /* wait for the EOMA packet to come in */ + j1939_tp_set_rxtimeout(session, 1250); + + netdev_dbg(session->priv->ndev, "%p: 0x%p\n", __func__, session); + + return 0; +} + +static int j1939_xtp_txnext_receiver(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + int ret = 0; + + if (!j1939_tp_im_receiver(&session->skcb)) { + netdev_alert(priv->ndev, "%s: 0x%p: called by not receiver!\n", + __func__, session); + return -EINVAL; + } + + switch (session->last_cmd) { + case J1939_TP_CMD_RTS: + case J1939_ETP_CMD_RTS: + ret = j1939_session_tx_cts(session); + break; + + case J1939_ETP_CMD_CTS: + case J1939_TP_CMD_CTS: + case 0xff: /* did some data */ + case J1939_ETP_CMD_DPO: + if ((session->skcb.addr.type == J1939_TP && + j1939_cb_is_broadcast(&session->skcb))) + break; + + if (session->pkt.rx >= session->pkt.total) { + ret = j1939_session_tx_eoma(session); + } else if (session->pkt.rx >= session->pkt.last) { + session->last_txcmd = 0; + ret = j1939_session_tx_cts(session); + } + break; + default: + netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n", + __func__, session, session->last_cmd); + } + + return ret; +} + +static int j1939_simple_txnext(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + struct sk_buff *se_skb = j1939_session_skb_find(session); + struct sk_buff *skb; + int ret; + + if (!se_skb) + return 0; + + skb = skb_clone(se_skb, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + can_skb_set_owner(skb, se_skb->sk); + + j1939_tp_set_rxtimeout(session, J1939_SIMPLE_ECHO_TIMEOUT_MS); + + ret = j1939_send_one(priv, skb); + if (ret) + return ret; + + j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED); + j1939_sk_queue_activate_next(session); + + return 0; +} + +static bool j1939_session_deactivate_locked(struct j1939_session *session) +{ + bool active = false; + + lockdep_assert_held(&session->priv->active_session_list_lock); + + if (session->state >= J1939_SESSION_ACTIVE && + session->state < J1939_SESSION_ACTIVE_MAX) { + active = true; + + list_del_init(&session->active_session_list_entry); + session->state = J1939_SESSION_DONE; + j1939_session_put(session); + } + + return active; +} + +static bool j1939_session_deactivate(struct j1939_session *session) +{ + bool active; + + j1939_session_list_lock(session->priv); + active = j1939_session_deactivate_locked(session); + j1939_session_list_unlock(session->priv); + + return active; +} + +static void +j1939_session_deactivate_activate_next(struct j1939_session *session) +{ + if (j1939_session_deactivate(session)) + j1939_sk_queue_activate_next(session); +} + +static void j1939_session_cancel(struct j1939_session *session, + enum j1939_xtp_abort err) +{ + struct j1939_priv *priv = session->priv; + + WARN_ON_ONCE(!err); + + session->err = j1939_xtp_abort_to_errno(priv, err); + /* do not send aborts on incoming broadcasts */ + if (!j1939_cb_is_broadcast(&session->skcb)) { + session->state = J1939_SESSION_WAITING_ABORT; + j1939_xtp_tx_abort(priv, &session->skcb, + !session->transmission, + err, session->skcb.addr.pgn); + } + + if (session->sk) + j1939_sk_send_loop_abort(session->sk, session->err); +} + +static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) +{ + struct j1939_session *session = + container_of(hrtimer, struct j1939_session, txtimer); + struct j1939_priv *priv = session->priv; + int ret = 0; + + if (session->skcb.addr.type == J1939_SIMPLE) { + ret = j1939_simple_txnext(session); + } else { + if (session->transmission) + ret = j1939_xtp_txnext_transmiter(session); + else + ret = j1939_xtp_txnext_receiver(session); + } + + switch (ret) { + case -ENOBUFS: + /* Retry limit is currently arbitrary chosen */ + if (session->tx_retry < J1939_XTP_TX_RETRY_LIMIT) { + session->tx_retry++; + j1939_tp_schedule_txtimer(session, + 10 + prandom_u32_max(16)); + } else { + netdev_alert(priv->ndev, "%s: 0x%p: tx retry count reached\n", + __func__, session); + session->err = -ENETUNREACH; + j1939_session_rxtimer_cancel(session); + j1939_session_deactivate_activate_next(session); + } + break; + case -ENETDOWN: + /* In this case we should get a netdev_event(), all active + * sessions will be cleared by + * j1939_cancel_all_active_sessions(). So handle this as an + * error, but let j1939_cancel_all_active_sessions() do the + * cleanup including propagation of the error to user space. + */ + break; + case 0: + session->tx_retry = 0; + break; + default: + netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n", + __func__, session, ret); + if (session->skcb.addr.type != J1939_SIMPLE) { + j1939_tp_set_rxtimeout(session, + J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, J1939_XTP_ABORT_OTHER); + } else { + session->err = ret; + j1939_session_rxtimer_cancel(session); + j1939_session_deactivate_activate_next(session); + } + } + + j1939_session_put(session); + + return HRTIMER_NORESTART; +} + +static void j1939_session_completed(struct j1939_session *session) +{ + struct sk_buff *skb; + + if (!session->transmission) { + skb = j1939_session_skb_find(session); + /* distribute among j1939 receivers */ + j1939_sk_recv(session->priv, skb); + } + + j1939_session_deactivate_activate_next(session); +} + +static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer) +{ + struct j1939_session *session = container_of(hrtimer, + struct j1939_session, + rxtimer); + struct j1939_priv *priv = session->priv; + + if (session->state == J1939_SESSION_WAITING_ABORT) { + netdev_alert(priv->ndev, "%s: 0x%p: abort rx timeout. Force session deactivation\n", + __func__, session); + + j1939_session_deactivate_activate_next(session); + + } else if (session->skcb.addr.type == J1939_SIMPLE) { + netdev_alert(priv->ndev, "%s: 0x%p: Timeout. Failed to send simple message.\n", + __func__, session); + + /* The message is probably stuck in the CAN controller and can + * be send as soon as CAN bus is in working state again. + */ + session->err = -ETIME; + j1939_session_deactivate(session); + } else { + netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", + __func__, session); + + j1939_session_list_lock(session->priv); + if (session->state >= J1939_SESSION_ACTIVE && + session->state < J1939_SESSION_ACTIVE_MAX) { + j1939_session_get(session); + hrtimer_start(&session->rxtimer, + ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS), + HRTIMER_MODE_REL_SOFT); + j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT); + } + j1939_session_list_unlock(session->priv); + } + + j1939_session_put(session); + + return HRTIMER_NORESTART; +} + +static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session, + const struct sk_buff *skb) +{ + const struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + pgn_t pgn = j1939_xtp_ctl_to_pgn(skb->data); + struct j1939_priv *priv = session->priv; + enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT; + u8 cmd = skb->data[0]; + + if (session->skcb.addr.pgn == pgn) + return false; + + switch (cmd) { + case J1939_TP_CMD_BAM: + abort = J1939_XTP_NO_ABORT; + break; + + case J1939_ETP_CMD_RTS: + case J1939_TP_CMD_RTS: /* fall through */ + abort = J1939_XTP_ABORT_BUSY; + break; + + case J1939_ETP_CMD_CTS: + case J1939_TP_CMD_CTS: /* fall through */ + abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN; + break; + + case J1939_ETP_CMD_DPO: + abort = J1939_XTP_ABORT_BAD_EDPO_PGN; + break; + + case J1939_ETP_CMD_EOMA: + case J1939_TP_CMD_EOMA: /* fall through */ + abort = J1939_XTP_ABORT_OTHER; + break; + + case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ + abort = J1939_XTP_NO_ABORT; + break; + + default: + WARN_ON_ONCE(1); + break; + } + + netdev_warn(priv->ndev, "%s: 0x%p: CMD 0x%02x with PGN 0x%05x for running session with different PGN 0x%05x.\n", + __func__, session, cmd, pgn, session->skcb.addr.pgn); + if (abort != J1939_XTP_NO_ABORT) + j1939_xtp_tx_abort(priv, skcb, true, abort, pgn); + + return true; +} + +static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb, + bool reverse, bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + u8 abort = skb->data[1]; + + session = j1939_session_get_by_addr(priv, &skcb->addr, reverse, + transmitter); + if (!session) + return; + + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + goto abort_put; + + netdev_info(priv->ndev, "%s: 0x%p: 0x%05x: (%u) %s\n", __func__, + session, j1939_xtp_ctl_to_pgn(skb->data), abort, + j1939_xtp_abort_to_str(abort)); + + j1939_session_timers_cancel(session); + session->err = j1939_xtp_abort_to_errno(priv, abort); + if (session->sk) + j1939_sk_send_loop_abort(session->sk, session->err); + j1939_session_deactivate_activate_next(session); + +abort_put: + j1939_session_put(session); +} + +/* abort packets may come in 2 directions */ +static void +j1939_xtp_rx_abort(struct j1939_priv *priv, struct sk_buff *skb, + bool transmitter) +{ + j1939_xtp_rx_abort_one(priv, skb, false, transmitter); + j1939_xtp_rx_abort_one(priv, skb, true, transmitter); +} + +static void +j1939_xtp_rx_eoma_one(struct j1939_session *session, struct sk_buff *skb) +{ + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + return; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + session->pkt.tx_acked = session->pkt.total; + j1939_session_timers_cancel(session); + /* transmitted without problems */ + j1939_session_completed(session); +} + +static void +j1939_xtp_rx_eoma(struct j1939_priv *priv, struct sk_buff *skb, + bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + + session = j1939_session_get_by_addr(priv, &skcb->addr, true, + transmitter); + if (!session) + return; + + j1939_xtp_rx_eoma_one(session, skb); + j1939_session_put(session); +} + +static void +j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb) +{ + enum j1939_xtp_abort err = J1939_XTP_ABORT_FAULT; + unsigned int pkt; + const u8 *dat; + + dat = skb->data; + + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + return; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + if (session->last_cmd == dat[0]) { + err = J1939_XTP_ABORT_DUP_SEQ; + goto out_session_cancel; + } + + if (session->skcb.addr.type == J1939_ETP) + pkt = j1939_etp_ctl_to_packet(dat); + else + pkt = dat[2]; + + if (!pkt) + goto out_session_cancel; + else if (dat[1] > session->pkt.block /* 0xff for etp */) + goto out_session_cancel; + + /* set packet counters only when not CTS(0) */ + session->pkt.tx_acked = pkt - 1; + j1939_session_skb_drop_old(session); + session->pkt.last = session->pkt.tx_acked + dat[1]; + if (session->pkt.last > session->pkt.total) + /* safety measure */ + session->pkt.last = session->pkt.total; + /* TODO: do not set tx here, do it in txtimer */ + session->pkt.tx = session->pkt.tx_acked; + + session->last_cmd = dat[0]; + if (dat[1]) { + j1939_tp_set_rxtimeout(session, 1250); + if (session->transmission) { + if (session->pkt.tx_acked) + j1939_sk_errqueue(session, + J1939_ERRQUEUE_SCHED); + j1939_session_txtimer_cancel(session); + j1939_tp_schedule_txtimer(session, 0); + } + } else { + /* CTS(0) */ + j1939_tp_set_rxtimeout(session, 550); + } + return; + + out_session_cancel: + j1939_session_timers_cancel(session); + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, err); +} + +static void +j1939_xtp_rx_cts(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + + session = j1939_session_get_by_addr(priv, &skcb->addr, true, + transmitter); + if (!session) + return; + j1939_xtp_rx_cts_one(session, skb); + j1939_session_put(session); +} + +static struct j1939_session *j1939_session_new(struct j1939_priv *priv, + struct sk_buff *skb, size_t size) +{ + struct j1939_session *session; + struct j1939_sk_buff_cb *skcb; + + session = kzalloc(sizeof(*session), gfp_any()); + if (!session) + return NULL; + + INIT_LIST_HEAD(&session->active_session_list_entry); + INIT_LIST_HEAD(&session->sk_session_queue_entry); + kref_init(&session->kref); + + j1939_priv_get(priv); + session->priv = priv; + session->total_message_size = size; + session->state = J1939_SESSION_NEW; + + skb_queue_head_init(&session->skb_queue); + skb_queue_tail(&session->skb_queue, skb); + + skcb = j1939_skb_to_cb(skb); + memcpy(&session->skcb, skcb, sizeof(session->skcb)); + + hrtimer_init(&session->txtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); + session->txtimer.function = j1939_tp_txtimer; + hrtimer_init(&session->rxtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); + session->rxtimer.function = j1939_tp_rxtimer; + + netdev_dbg(priv->ndev, "%s: 0x%p: sa: %02x, da: %02x\n", + __func__, session, skcb->addr.sa, skcb->addr.da); + + return session; +} + +static struct +j1939_session *j1939_session_fresh_new(struct j1939_priv *priv, + int size, + const struct j1939_sk_buff_cb *rel_skcb) +{ + struct sk_buff *skb; + struct j1939_sk_buff_cb *skcb; + struct j1939_session *session; + + skb = alloc_skb(size + sizeof(struct can_skb_priv), GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + skb->dev = priv->ndev; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = priv->ndev->ifindex; + skcb = j1939_skb_to_cb(skb); + memcpy(skcb, rel_skcb, sizeof(*skcb)); + + session = j1939_session_new(priv, skb, skb->len); + if (!session) { + kfree_skb(skb); + return NULL; + } + + /* alloc data area */ + skb_put(skb, size); + /* skb is recounted in j1939_session_new() */ + return session; +} + +int j1939_session_activate(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + struct j1939_session *active = NULL; + int ret = 0; + + j1939_session_list_lock(priv); + if (session->skcb.addr.type != J1939_SIMPLE) + active = j1939_session_get_by_addr_locked(priv, + &priv->active_session_list, + &session->skcb.addr, false, + session->transmission); + if (active) { + j1939_session_put(active); + ret = -EAGAIN; + } else { + WARN_ON_ONCE(session->state != J1939_SESSION_NEW); + list_add_tail(&session->active_session_list_entry, + &priv->active_session_list); + j1939_session_get(session); + session->state = J1939_SESSION_ACTIVE; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", + __func__, session); + } + j1939_session_list_unlock(priv); + + return ret; +} + +static struct +j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, + struct sk_buff *skb) +{ + enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT; + struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb); + struct j1939_session *session; + const u8 *dat; + pgn_t pgn; + int len; + + netdev_dbg(priv->ndev, "%s\n", __func__); + + dat = skb->data; + pgn = j1939_xtp_ctl_to_pgn(dat); + skcb.addr.pgn = pgn; + + if (!j1939_sk_recv_match(priv, &skcb)) + return NULL; + + if (skcb.addr.type == J1939_ETP) { + len = j1939_etp_ctl_to_size(dat); + if (len > J1939_MAX_ETP_PACKET_SIZE) + abort = J1939_XTP_ABORT_FAULT; + else if (len > priv->tp_max_packet_size) + abort = J1939_XTP_ABORT_RESOURCE; + else if (len <= J1939_MAX_TP_PACKET_SIZE) + abort = J1939_XTP_ABORT_FAULT; + } else { + len = j1939_tp_ctl_to_size(dat); + if (len > J1939_MAX_TP_PACKET_SIZE) + abort = J1939_XTP_ABORT_FAULT; + else if (len > priv->tp_max_packet_size) + abort = J1939_XTP_ABORT_RESOURCE; + } + + if (abort != J1939_XTP_NO_ABORT) { + j1939_xtp_tx_abort(priv, &skcb, true, abort, pgn); + return NULL; + } + + session = j1939_session_fresh_new(priv, len, &skcb); + if (!session) { + j1939_xtp_tx_abort(priv, &skcb, true, + J1939_XTP_ABORT_RESOURCE, pgn); + return NULL; + } + + /* initialize the control buffer: plain copy */ + session->pkt.total = (len + 6) / 7; + session->pkt.block = 0xff; + if (skcb.addr.type != J1939_ETP) { + if (dat[3] != session->pkt.total) + netdev_alert(priv->ndev, "%s: 0x%p: strange total, %u != %u\n", + __func__, session, session->pkt.total, + dat[3]); + session->pkt.total = dat[3]; + session->pkt.block = min(dat[3], dat[4]); + } + + session->pkt.rx = 0; + session->pkt.tx = 0; + + WARN_ON_ONCE(j1939_session_activate(session)); + + return session; +} + +static int j1939_xtp_rx_rts_session_active(struct j1939_session *session, + struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_priv *priv = session->priv; + + if (!session->transmission) { + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + return -EBUSY; + + /* RTS on active session */ + j1939_session_timers_cancel(session); + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); + } + + if (session->last_cmd != 0) { + /* we received a second rts on the same connection */ + netdev_alert(priv->ndev, "%s: 0x%p: connection exists (%02x %02x). last cmd: %x\n", + __func__, session, skcb->addr.sa, skcb->addr.da, + session->last_cmd); + + j1939_session_timers_cancel(session); + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); + + return -EBUSY; + } + + if (session->skcb.addr.sa != skcb->addr.sa || + session->skcb.addr.da != skcb->addr.da) + netdev_warn(priv->ndev, "%s: 0x%p: session->skcb.addr.sa=0x%02x skcb->addr.sa=0x%02x session->skcb.addr.da=0x%02x skcb->addr.da=0x%02x\n", + __func__, session, + session->skcb.addr.sa, skcb->addr.sa, + session->skcb.addr.da, skcb->addr.da); + /* make sure 'sa' & 'da' are correct ! + * They may be 'not filled in yet' for sending + * skb's, since they did not pass the Address Claim ever. + */ + session->skcb.addr.sa = skcb->addr.sa; + session->skcb.addr.da = skcb->addr.da; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + return 0; +} + +static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb, + bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + u8 cmd = skb->data[0]; + + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + transmitter); + + if (!session) { + if (transmitter) { + /* If we're the transmitter and this function is called, + * we received our own RTS. A session has already been + * created. + * + * For some reasons however it might have been destroyed + * already. So don't create a new one here (using + * "j1939_xtp_rx_rts_session_new()") as this will be a + * receiver session. + * + * The reasons the session is already destroyed might + * be: + * - user space closed socket was and the session was + * aborted + * - session was aborted due to external abort message + */ + return; + } + session = j1939_xtp_rx_rts_session_new(priv, skb); + if (!session) + return; + } else { + if (j1939_xtp_rx_rts_session_active(session, skb)) { + j1939_session_put(session); + return; + } + } + session->last_cmd = cmd; + + j1939_tp_set_rxtimeout(session, 1250); + + if (cmd != J1939_TP_CMD_BAM && !session->transmission) { + j1939_session_txtimer_cancel(session); + j1939_tp_schedule_txtimer(session, 0); + } + + j1939_session_put(session); +} + +static void j1939_xtp_rx_dpo_one(struct j1939_session *session, + struct sk_buff *skb) +{ + const u8 *dat = skb->data; + + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + return; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + /* transmitted without problems */ + session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data); + session->last_cmd = dat[0]; + j1939_tp_set_rxtimeout(session, 750); +} + +static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb, + bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + transmitter); + if (!session) { + netdev_info(priv->ndev, + "%s: no connection found\n", __func__); + return; + } + + j1939_xtp_rx_dpo_one(session, skb); + j1939_session_put(session); +} + +static void j1939_xtp_rx_dat_one(struct j1939_session *session, + struct sk_buff *skb) +{ + struct j1939_priv *priv = session->priv; + struct j1939_sk_buff_cb *skcb; + struct sk_buff *se_skb; + const u8 *dat; + u8 *tpdat; + int offset; + int nbytes; + bool final = false; + bool do_cts_eoma = false; + int packet; + + skcb = j1939_skb_to_cb(skb); + dat = skb->data; + if (skb->len <= 1) + /* makes no sense */ + goto out_session_cancel; + + switch (session->last_cmd) { + case 0xff: + break; + case J1939_ETP_CMD_DPO: + if (skcb->addr.type == J1939_ETP) + break; + /* fall through */ + case J1939_TP_CMD_BAM: /* fall through */ + case J1939_TP_CMD_CTS: /* fall through */ + if (skcb->addr.type != J1939_ETP) + break; + /* fall through */ + default: + netdev_info(priv->ndev, "%s: 0x%p: last %02x\n", __func__, + session, session->last_cmd); + goto out_session_cancel; + } + + packet = (dat[0] - 1 + session->pkt.dpo); + if (packet > session->pkt.total || + (session->pkt.rx + 1) > session->pkt.total) { + netdev_info(priv->ndev, "%s: 0x%p: should have been completed\n", + __func__, session); + goto out_session_cancel; + } + se_skb = j1939_session_skb_find(session); + if (!se_skb) { + netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__, + session); + goto out_session_cancel; + } + + skcb = j1939_skb_to_cb(se_skb); + offset = packet * 7 - skcb->offset; + nbytes = se_skb->len - offset; + if (nbytes > 7) + nbytes = 7; + if (nbytes <= 0 || (nbytes + 1) > skb->len) { + netdev_info(priv->ndev, "%s: 0x%p: nbytes %i, len %i\n", + __func__, session, nbytes, skb->len); + goto out_session_cancel; + } + + tpdat = se_skb->data; + memcpy(&tpdat[offset], &dat[1], nbytes); + if (packet == session->pkt.rx) + session->pkt.rx++; + + if (skcb->addr.type != J1939_ETP && + j1939_cb_is_broadcast(&session->skcb)) { + if (session->pkt.rx >= session->pkt.total) + final = true; + } else { + /* never final, an EOMA must follow */ + if (session->pkt.rx >= session->pkt.last) + do_cts_eoma = true; + } + + if (final) { + j1939_session_completed(session); + } else if (do_cts_eoma) { + j1939_tp_set_rxtimeout(session, 1250); + if (!session->transmission) + j1939_tp_schedule_txtimer(session, 0); + } else { + j1939_tp_set_rxtimeout(session, 250); + } + session->last_cmd = 0xff; + j1939_session_put(session); + + return; + + out_session_cancel: + j1939_session_timers_cancel(session); + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, J1939_XTP_ABORT_FAULT); + j1939_session_put(session); +} + +static void j1939_xtp_rx_dat(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb; + struct j1939_session *session; + + skcb = j1939_skb_to_cb(skb); + + if (j1939_tp_im_transmitter(skcb)) { + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + true); + if (!session) + netdev_info(priv->ndev, "%s: no tx connection found\n", + __func__); + else + j1939_xtp_rx_dat_one(session, skb); + } + + if (j1939_tp_im_receiver(skcb)) { + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + false); + if (!session) + netdev_info(priv->ndev, "%s: no rx connection found\n", + __func__); + else + j1939_xtp_rx_dat_one(session, skb); + } +} + +/* j1939 main intf */ +struct j1939_session *j1939_tp_send(struct j1939_priv *priv, + struct sk_buff *skb, size_t size) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + int ret; + + if (skcb->addr.pgn == J1939_TP_PGN_DAT || + skcb->addr.pgn == J1939_TP_PGN_CTL || + skcb->addr.pgn == J1939_ETP_PGN_DAT || + skcb->addr.pgn == J1939_ETP_PGN_CTL) + /* avoid conflict */ + return ERR_PTR(-EDOM); + + if (size > priv->tp_max_packet_size) + return ERR_PTR(-EMSGSIZE); + + if (size <= 8) + skcb->addr.type = J1939_SIMPLE; + else if (size > J1939_MAX_TP_PACKET_SIZE) + skcb->addr.type = J1939_ETP; + else + skcb->addr.type = J1939_TP; + + if (skcb->addr.type == J1939_ETP && + j1939_cb_is_broadcast(skcb)) + return ERR_PTR(-EDESTADDRREQ); + + /* fill in addresses from names */ + ret = j1939_ac_fixup(priv, skb); + if (unlikely(ret)) + return ERR_PTR(ret); + + /* fix DST flags, it may be used there soon */ + if (j1939_address_is_unicast(skcb->addr.da) && + priv->ents[skcb->addr.da].nusers) + skcb->flags |= J1939_ECU_LOCAL_DST; + + /* src is always local, I'm sending ... */ + skcb->flags |= J1939_ECU_LOCAL_SRC; + + /* prepare new session */ + session = j1939_session_new(priv, skb, size); + if (!session) + return ERR_PTR(-ENOMEM); + + /* skb is recounted in j1939_session_new() */ + session->sk = skb->sk; + session->transmission = true; + session->pkt.total = (size + 6) / 7; + session->pkt.block = skcb->addr.type == J1939_ETP ? 255 : + min(j1939_tp_block ?: 255, session->pkt.total); + + if (j1939_cb_is_broadcast(&session->skcb)) + /* set the end-packet for broadcast */ + session->pkt.last = session->pkt.total; + + skcb->tskey = session->sk->sk_tskey++; + session->tskey = skcb->tskey; + + return session; +} + +static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + int extd = J1939_TP; + u8 cmd = skb->data[0]; + + switch (cmd) { + case J1939_ETP_CMD_RTS: + extd = J1939_ETP; + /* fall through */ + case J1939_TP_CMD_BAM: /* fall through */ + case J1939_TP_CMD_RTS: /* fall through */ + if (skcb->addr.type != extd) + return; + + if (cmd == J1939_TP_CMD_RTS && j1939_cb_is_broadcast(skcb)) { + netdev_alert(priv->ndev, "%s: rts without destination (%02x)\n", + __func__, skcb->addr.sa); + return; + } + + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_rts(priv, skb, true); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_rts(priv, skb, false); + + break; + + case J1939_ETP_CMD_CTS: + extd = J1939_ETP; + /* fall through */ + case J1939_TP_CMD_CTS: + if (skcb->addr.type != extd) + return; + + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_cts(priv, skb, false); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_cts(priv, skb, true); + + break; + + case J1939_ETP_CMD_DPO: + if (skcb->addr.type != J1939_ETP) + return; + + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_dpo(priv, skb, true); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_dpo(priv, skb, false); + + break; + + case J1939_ETP_CMD_EOMA: + extd = J1939_ETP; + /* fall through */ + case J1939_TP_CMD_EOMA: + if (skcb->addr.type != extd) + return; + + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_eoma(priv, skb, false); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_eoma(priv, skb, true); + + break; + + case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_abort(priv, skb, true); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_abort(priv, skb, false); + + break; + default: + return; + } +} + +int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + + if (!j1939_tp_im_involved_anydir(skcb)) + return 0; + + switch (skcb->addr.pgn) { + case J1939_ETP_PGN_DAT: + skcb->addr.type = J1939_ETP; + /* fall through */ + case J1939_TP_PGN_DAT: + j1939_xtp_rx_dat(priv, skb); + break; + + case J1939_ETP_PGN_CTL: + skcb->addr.type = J1939_ETP; + /* fall through */ + case J1939_TP_PGN_CTL: + if (skb->len < 8) + return 0; /* Don't care. Nothing to extract here */ + + j1939_tp_cmd_recv(priv, skb); + break; + default: + return 0; /* no problem */ + } + return 1; /* "I processed the message" */ +} + +void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_session *session; + + if (!skb->sk) + return; + + j1939_session_list_lock(priv); + session = j1939_session_get_simple(priv, skb); + j1939_session_list_unlock(priv); + if (!session) { + netdev_warn(priv->ndev, + "%s: Received already invalidated message\n", + __func__); + return; + } + + j1939_session_timers_cancel(session); + j1939_session_deactivate(session); + j1939_session_put(session); +} + +int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk) +{ + struct j1939_session *session, *saved; + + netdev_dbg(priv->ndev, "%s, sk: %p\n", __func__, sk); + j1939_session_list_lock(priv); + list_for_each_entry_safe(session, saved, + &priv->active_session_list, + active_session_list_entry) { + if (!sk || sk == session->sk) { + j1939_session_timers_cancel(session); + session->err = ESHUTDOWN; + j1939_session_deactivate_locked(session); + } + } + j1939_session_list_unlock(priv); + return NOTIFY_DONE; +} + +void j1939_tp_init(struct j1939_priv *priv) +{ + spin_lock_init(&priv->active_session_list_lock); + INIT_LIST_HEAD(&priv->active_session_list); + priv->tp_max_packet_size = J1939_MAX_ETP_PACKET_SIZE; +} diff --git a/net/can/proc.c b/net/can/proc.c index edb822c31902..e6881bfc3ed1 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -45,6 +45,7 @@ #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/if_arp.h> +#include <linux/can/can-ml.h> #include <linux/can/core.h> #include "af_can.h" @@ -78,21 +79,21 @@ static const char rx_list_name[][8] = { static void can_init_stats(struct net *net) { - struct s_stats *can_stats = net->can.can_stats; - struct s_pstats *can_pstats = net->can.can_pstats; + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; /* * This memset function is called from a timer context (when * can_stattimer is active which is the default) OR in a process * context (reading the proc_fs when can_stattimer is disabled). */ - memset(can_stats, 0, sizeof(struct s_stats)); - can_stats->jiffies_init = jiffies; + memset(pkg_stats, 0, sizeof(struct can_pkg_stats)); + pkg_stats->jiffies_init = jiffies; - can_pstats->stats_reset++; + rcv_lists_stats->stats_reset++; if (user_reset) { user_reset = 0; - can_pstats->user_reset++; + rcv_lists_stats->user_reset++; } } @@ -118,8 +119,8 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif, void can_stat_update(struct timer_list *t) { - struct net *net = from_timer(net, t, can.can_stattimer); - struct s_stats *can_stats = net->can.can_stats; + struct net *net = from_timer(net, t, can.stattimer); + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; unsigned long j = jiffies; /* snapshot */ /* restart counting in timer context on user request */ @@ -127,57 +128,57 @@ void can_stat_update(struct timer_list *t) can_init_stats(net); /* restart counting on jiffies overflow */ - if (j < can_stats->jiffies_init) + if (j < pkg_stats->jiffies_init) can_init_stats(net); /* prevent overflow in calc_rate() */ - if (can_stats->rx_frames > (ULONG_MAX / HZ)) + if (pkg_stats->rx_frames > (ULONG_MAX / HZ)) can_init_stats(net); /* prevent overflow in calc_rate() */ - if (can_stats->tx_frames > (ULONG_MAX / HZ)) + if (pkg_stats->tx_frames > (ULONG_MAX / HZ)) can_init_stats(net); /* matches overflow - very improbable */ - if (can_stats->matches > (ULONG_MAX / 100)) + if (pkg_stats->matches > (ULONG_MAX / 100)) can_init_stats(net); /* calc total values */ - if (can_stats->rx_frames) - can_stats->total_rx_match_ratio = (can_stats->matches * 100) / - can_stats->rx_frames; + if (pkg_stats->rx_frames) + pkg_stats->total_rx_match_ratio = (pkg_stats->matches * 100) / + pkg_stats->rx_frames; - can_stats->total_tx_rate = calc_rate(can_stats->jiffies_init, j, - can_stats->tx_frames); - can_stats->total_rx_rate = calc_rate(can_stats->jiffies_init, j, - can_stats->rx_frames); + pkg_stats->total_tx_rate = calc_rate(pkg_stats->jiffies_init, j, + pkg_stats->tx_frames); + pkg_stats->total_rx_rate = calc_rate(pkg_stats->jiffies_init, j, + pkg_stats->rx_frames); /* calc current values */ - if (can_stats->rx_frames_delta) - can_stats->current_rx_match_ratio = - (can_stats->matches_delta * 100) / - can_stats->rx_frames_delta; + if (pkg_stats->rx_frames_delta) + pkg_stats->current_rx_match_ratio = + (pkg_stats->matches_delta * 100) / + pkg_stats->rx_frames_delta; - can_stats->current_tx_rate = calc_rate(0, HZ, can_stats->tx_frames_delta); - can_stats->current_rx_rate = calc_rate(0, HZ, can_stats->rx_frames_delta); + pkg_stats->current_tx_rate = calc_rate(0, HZ, pkg_stats->tx_frames_delta); + pkg_stats->current_rx_rate = calc_rate(0, HZ, pkg_stats->rx_frames_delta); /* check / update maximum values */ - if (can_stats->max_tx_rate < can_stats->current_tx_rate) - can_stats->max_tx_rate = can_stats->current_tx_rate; + if (pkg_stats->max_tx_rate < pkg_stats->current_tx_rate) + pkg_stats->max_tx_rate = pkg_stats->current_tx_rate; - if (can_stats->max_rx_rate < can_stats->current_rx_rate) - can_stats->max_rx_rate = can_stats->current_rx_rate; + if (pkg_stats->max_rx_rate < pkg_stats->current_rx_rate) + pkg_stats->max_rx_rate = pkg_stats->current_rx_rate; - if (can_stats->max_rx_match_ratio < can_stats->current_rx_match_ratio) - can_stats->max_rx_match_ratio = can_stats->current_rx_match_ratio; + if (pkg_stats->max_rx_match_ratio < pkg_stats->current_rx_match_ratio) + pkg_stats->max_rx_match_ratio = pkg_stats->current_rx_match_ratio; /* clear values for 'current rate' calculation */ - can_stats->tx_frames_delta = 0; - can_stats->rx_frames_delta = 0; - can_stats->matches_delta = 0; + pkg_stats->tx_frames_delta = 0; + pkg_stats->rx_frames_delta = 0; + pkg_stats->matches_delta = 0; /* restart timer (one second) */ - mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ)); + mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ)); } /* @@ -212,60 +213,60 @@ static void can_print_recv_banner(struct seq_file *m) static int can_stats_proc_show(struct seq_file *m, void *v) { struct net *net = m->private; - struct s_stats *can_stats = net->can.can_stats; - struct s_pstats *can_pstats = net->can.can_pstats; + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; seq_putc(m, '\n'); - seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats->tx_frames); - seq_printf(m, " %8ld received frames (RXF)\n", can_stats->rx_frames); - seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats->matches); + seq_printf(m, " %8ld transmitted frames (TXF)\n", pkg_stats->tx_frames); + seq_printf(m, " %8ld received frames (RXF)\n", pkg_stats->rx_frames); + seq_printf(m, " %8ld matched frames (RXMF)\n", pkg_stats->matches); seq_putc(m, '\n'); - if (net->can.can_stattimer.function == can_stat_update) { + if (net->can.stattimer.function == can_stat_update) { seq_printf(m, " %8ld %% total match ratio (RXMR)\n", - can_stats->total_rx_match_ratio); + pkg_stats->total_rx_match_ratio); seq_printf(m, " %8ld frames/s total tx rate (TXR)\n", - can_stats->total_tx_rate); + pkg_stats->total_tx_rate); seq_printf(m, " %8ld frames/s total rx rate (RXR)\n", - can_stats->total_rx_rate); + pkg_stats->total_rx_rate); seq_putc(m, '\n'); seq_printf(m, " %8ld %% current match ratio (CRXMR)\n", - can_stats->current_rx_match_ratio); + pkg_stats->current_rx_match_ratio); seq_printf(m, " %8ld frames/s current tx rate (CTXR)\n", - can_stats->current_tx_rate); + pkg_stats->current_tx_rate); seq_printf(m, " %8ld frames/s current rx rate (CRXR)\n", - can_stats->current_rx_rate); + pkg_stats->current_rx_rate); seq_putc(m, '\n'); seq_printf(m, " %8ld %% max match ratio (MRXMR)\n", - can_stats->max_rx_match_ratio); + pkg_stats->max_rx_match_ratio); seq_printf(m, " %8ld frames/s max tx rate (MTXR)\n", - can_stats->max_tx_rate); + pkg_stats->max_tx_rate); seq_printf(m, " %8ld frames/s max rx rate (MRXR)\n", - can_stats->max_rx_rate); + pkg_stats->max_rx_rate); seq_putc(m, '\n'); } seq_printf(m, " %8ld current receive list entries (CRCV)\n", - can_pstats->rcv_entries); + rcv_lists_stats->rcv_entries); seq_printf(m, " %8ld maximum receive list entries (MRCV)\n", - can_pstats->rcv_entries_max); + rcv_lists_stats->rcv_entries_max); - if (can_pstats->stats_reset) + if (rcv_lists_stats->stats_reset) seq_printf(m, "\n %8ld statistic resets (STR)\n", - can_pstats->stats_reset); + rcv_lists_stats->stats_reset); - if (can_pstats->user_reset) + if (rcv_lists_stats->user_reset) seq_printf(m, " %8ld user statistic resets (USTR)\n", - can_pstats->user_reset); + rcv_lists_stats->user_reset); seq_putc(m, '\n'); return 0; @@ -274,20 +275,20 @@ static int can_stats_proc_show(struct seq_file *m, void *v) static int can_reset_stats_proc_show(struct seq_file *m, void *v) { struct net *net = m->private; - struct s_pstats *can_pstats = net->can.can_pstats; - struct s_stats *can_stats = net->can.can_stats; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; user_reset = 1; - if (net->can.can_stattimer.function == can_stat_update) { + if (net->can.stattimer.function == can_stat_update) { seq_printf(m, "Scheduled statistic reset #%ld.\n", - can_pstats->stats_reset + 1); + rcv_lists_stats->stats_reset + 1); } else { - if (can_stats->jiffies_init != jiffies) + if (pkg_stats->jiffies_init != jiffies) can_init_stats(net); seq_printf(m, "Performed statistic reset #%ld.\n", - can_pstats->stats_reset); + rcv_lists_stats->stats_reset); } return 0; } @@ -300,11 +301,11 @@ static int can_version_proc_show(struct seq_file *m, void *v) static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx, struct net_device *dev, - struct can_dev_rcv_lists *d) + struct can_dev_rcv_lists *dev_rcv_lists) { - if (!hlist_empty(&d->rx[idx])) { + if (!hlist_empty(&dev_rcv_lists->rx[idx])) { can_print_recv_banner(m); - can_print_rcvlist(m, &d->rx[idx], dev); + can_print_rcvlist(m, &dev_rcv_lists->rx[idx], dev); } else seq_printf(m, " (%s: no entry)\n", DNAME(dev)); @@ -315,7 +316,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v) /* double cast to prevent GCC warning */ int idx = (int)(long)PDE_DATA(m->file->f_inode); struct net_device *dev; - struct can_dev_rcv_lists *d; + struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]); @@ -323,8 +324,8 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* receive list for 'all' CAN devices (dev == NULL) */ - d = net->can.can_rx_alldev_list; - can_rcvlist_proc_show_one(m, idx, NULL, d); + dev_rcv_lists = net->can.rx_alldev_list; + can_rcvlist_proc_show_one(m, idx, NULL, dev_rcv_lists); /* receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { @@ -366,7 +367,7 @@ static inline void can_rcvlist_proc_show_array(struct seq_file *m, static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; - struct can_dev_rcv_lists *d; + struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; /* RX_SFF */ @@ -375,15 +376,16 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* sff receive list for 'all' CAN devices (dev == NULL) */ - d = net->can.can_rx_alldev_list; - can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff)); + dev_rcv_lists = net->can.rx_alldev_list; + can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_sff, + ARRAY_SIZE(dev_rcv_lists->rx_sff)); /* sff receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { - d = dev->ml_priv; - can_rcvlist_proc_show_array(m, dev, d->rx_sff, - ARRAY_SIZE(d->rx_sff)); + dev_rcv_lists = dev->ml_priv; + can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_sff, + ARRAY_SIZE(dev_rcv_lists->rx_sff)); } } @@ -396,7 +398,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; - struct can_dev_rcv_lists *d; + struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; /* RX_EFF */ @@ -405,15 +407,16 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* eff receive list for 'all' CAN devices (dev == NULL) */ - d = net->can.can_rx_alldev_list; - can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff)); + dev_rcv_lists = net->can.rx_alldev_list; + can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_eff, + ARRAY_SIZE(dev_rcv_lists->rx_eff)); /* eff receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { - d = dev->ml_priv; - can_rcvlist_proc_show_array(m, dev, d->rx_eff, - ARRAY_SIZE(d->rx_eff)); + dev_rcv_lists = dev->ml_priv; + can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_eff, + ARRAY_SIZE(dev_rcv_lists->rx_eff)); } } diff --git a/net/can/raw.c b/net/can/raw.c index fdbc36140e9b..59c039d73c6d 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -396,7 +396,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) int err = 0; int notify_enetdown = 0; - if (len < sizeof(*addr)) + if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex)) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; @@ -733,7 +733,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); - if (msg->msg_namelen < sizeof(*addr)) + if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex)) return -EINVAL; if (addr->can_family != AF_CAN) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 94c7f77ecb6b..da5639a5bd3b 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -12,6 +12,9 @@ static atomic_t cache_idx; +#define SK_STORAGE_CREATE_FLAG_MASK \ + (BPF_F_NO_PREALLOC | BPF_F_CLONE) + struct bucket { struct hlist_head list; raw_spinlock_t lock; @@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem) kfree_rcu(sk_storage, rcu); } -/* sk_storage->lock must be held and sk_storage->list cannot be empty */ static void __selem_link_sk(struct bpf_sk_storage *sk_storage, struct bpf_sk_storage_elem *selem) { @@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map) return 0; } -/* Called by __sk_destruct() */ +/* Called by __sk_destruct() & bpf_sk_storage_clone() */ void bpf_sk_storage_free(struct sock *sk) { struct bpf_sk_storage_elem *selem; @@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) smap = (struct bpf_sk_storage_map *)map; + /* Note that this map might be concurrently cloned from + * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone + * RCU read section to finish before proceeding. New RCU + * read sections should be prevented via bpf_map_inc_not_zero. + */ synchronize_rcu(); /* bpf prog and the userspace can no longer access this map @@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) { - if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries || + if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK || + !(attr->map_flags & BPF_F_NO_PREALLOC) || + attr->max_entries || attr->key_size != sizeof(int) || !attr->value_size || /* Enforce BTF for userspace sk dumping */ !attr->btf_key_type_id || !attr->btf_value_type_id) @@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) return err; } +static struct bpf_sk_storage_elem * +bpf_sk_storage_clone_elem(struct sock *newsk, + struct bpf_sk_storage_map *smap, + struct bpf_sk_storage_elem *selem) +{ + struct bpf_sk_storage_elem *copy_selem; + + copy_selem = selem_alloc(smap, newsk, NULL, true); + if (!copy_selem) + return NULL; + + if (map_value_has_spin_lock(&smap->map)) + copy_map_value_locked(&smap->map, SDATA(copy_selem)->data, + SDATA(selem)->data, true); + else + copy_map_value(&smap->map, SDATA(copy_selem)->data, + SDATA(selem)->data); + + return copy_selem; +} + +int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) +{ + struct bpf_sk_storage *new_sk_storage = NULL; + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_elem *selem; + int ret = 0; + + RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); + + rcu_read_lock(); + sk_storage = rcu_dereference(sk->sk_bpf_storage); + + if (!sk_storage || hlist_empty(&sk_storage->list)) + goto out; + + hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { + struct bpf_sk_storage_elem *copy_selem; + struct bpf_sk_storage_map *smap; + struct bpf_map *map; + + smap = rcu_dereference(SDATA(selem)->smap); + if (!(smap->map.map_flags & BPF_F_CLONE)) + continue; + + /* Note that for lockless listeners adding new element + * here can race with cleanup in bpf_sk_storage_map_free. + * Try to grab map refcnt to make sure that it's still + * alive and prevent concurrent removal. + */ + map = bpf_map_inc_not_zero(&smap->map, false); + if (IS_ERR(map)) + continue; + + copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); + if (!copy_selem) { + ret = -ENOMEM; + bpf_map_put(map); + goto out; + } + + if (new_sk_storage) { + selem_link_map(smap, copy_selem); + __selem_link_sk(new_sk_storage, copy_selem); + } else { + ret = sk_storage_alloc(newsk, smap, copy_selem); + if (ret) { + kfree(copy_selem); + atomic_sub(smap->elem_size, + &newsk->sk_omem_alloc); + bpf_map_put(map); + goto out; + } + + new_sk_storage = rcu_dereference(copy_selem->sk_storage); + } + bpf_map_put(map); + } + +out: + rcu_read_unlock(); + + /* In case of an error, don't free anything explicitly here, the + * caller is responsible to call bpf_sk_storage_free. + */ + + return ret; +} + BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, void *, value, u64, flags) { diff --git a/net/core/dev.c b/net/core/dev.c index 49589ed2018d..b1afafee3e2a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8126,12 +8126,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, bpf_chk = generic_xdp_install; if (fd >= 0) { + u32 prog_id; + if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) { NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time"); return -EEXIST; } - if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_query(dev, bpf_op, query)) { + + prog_id = __dev_xdp_query(dev, bpf_op, query); + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) { NL_SET_ERR_MSG(extack, "XDP program already attached"); return -EBUSY; } @@ -8146,6 +8149,14 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, bpf_prog_put(prog); return -EINVAL; } + + if (prog->aux->id == prog_id) { + bpf_prog_put(prog); + return 0; + } + } else { + if (!__dev_xdp_query(dev, bpf_op, query)) + return 0; } err = dev_xdp_install(dev, bpf_op, extack, flags, prog); diff --git a/net/core/filter.c b/net/core/filter.c index b91988f8b94e..ed6563622ce3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5903,7 +5903,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, default: return -EPROTONOSUPPORT; } - if (mss <= 0) + if (mss == 0) return -ENOENT; return cookie | ((u64)mss << 32); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ea8e8d332d85..2b40b5a9425b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4087,6 +4087,9 @@ static const u8 skb_ext_type_len[] = { #ifdef CONFIG_XFRM [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path), #endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext), +#endif }; static __always_inline unsigned int skb_ext_total_length(void) @@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void) #ifdef CONFIG_XFRM skb_ext_type_len[SKB_EXT_SEC_PATH] + #endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + skb_ext_type_len[TC_SKB_EXT] + +#endif 0; } diff --git a/net/core/sock.c b/net/core/sock.c index 545fac19a711..07863edbe6fc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) goto out; } RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); -#ifdef CONFIG_BPF_SYSCALL - RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); -#endif + + if (bpf_sk_storage_clone(sk, newsk)) { + sk_free_unlock_clone(newsk); + newsk = NULL; + goto out; + } newsk->sk_err = 0; newsk->sk_err_soft = 0; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 65122bbccd27..dde9d762edee 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1545,10 +1545,34 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in dp->user_features = 0; } -static void ovs_dp_change(struct datapath *dp, struct nlattr *a[]) +DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support); + +static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) { - if (a[OVS_DP_ATTR_USER_FEATURES]) - dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); + u32 user_features = 0; + + if (a[OVS_DP_ATTR_USER_FEATURES]) { + user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); + + if (user_features & ~(OVS_DP_F_VPORT_PIDS | + OVS_DP_F_UNALIGNED | + OVS_DP_F_TC_RECIRC_SHARING)) + return -EOPNOTSUPP; + +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (user_features & OVS_DP_F_TC_RECIRC_SHARING) + return -EOPNOTSUPP; +#endif + } + + dp->user_features = user_features; + + if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) + static_branch_enable(&tc_recirc_sharing_support); + else + static_branch_disable(&tc_recirc_sharing_support); + + return 0; } static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) @@ -1610,7 +1634,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.port_no = OVSP_LOCAL; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; - ovs_dp_change(dp, a); + err = ovs_dp_change(dp, a); + if (err) + goto err_destroy_meters; /* So far only local changes have been made, now need the lock. */ ovs_lock(); @@ -1736,7 +1762,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) goto err_unlock_free; - ovs_dp_change(dp, info->attrs); + err = ovs_dp_change(dp, info->attrs); + if (err) + goto err_unlock_free; err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_SET); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 751d34accdf9..81e85dde8217 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -218,6 +218,8 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex) extern struct notifier_block ovs_dp_device_notifier; extern struct genl_family dp_vport_genl_family; +DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support); + void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key); void ovs_dp_detach_port(struct vport *); int ovs_dp_upcall(struct datapath *, struct sk_buff *, diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9d81d2c7bf82..38147e6a20f5 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -842,6 +842,9 @@ static int key_extract_mac_proto(struct sk_buff *skb) int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + struct tc_skb_ext *tc_ext; +#endif int res, err; /* Extract metadata from packet. */ @@ -874,7 +877,17 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, if (res < 0) return res; key->mac_proto = res; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (static_branch_unlikely(&tc_recirc_sharing_support)) { + tc_ext = skb_ext_find(skb, TC_SKB_EXT); + key->recirc_id = tc_ext ? tc_ext->chain : 0; + } else { + key->recirc_id = 0; + } +#else key->recirc_id = 0; +#endif err = key_extract(skb, key); if (!err) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 2977137c28eb..1a5bf3fa4578 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -559,7 +559,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, ret = -EDESTADDRREQ; break; } - if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) || + if (ipv4_is_multicast(sin->sin_addr.s_addr) || sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) { ret = -EINVAL; break; @@ -593,7 +593,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, addr4 = sin6->sin6_addr.s6_addr32[3]; if (addr4 == htonl(INADDR_ANY) || addr4 == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(addr4))) { + ipv4_is_multicast(addr4)) { ret = -EPROTOTYPE; break; } diff --git a/net/rds/bind.c b/net/rds/bind.c index 0f4398e7f2a7..6dbb763bc1fd 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -181,7 +181,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in) || sin->sin_addr.s_addr == htonl(INADDR_ANY) || sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) + ipv4_is_multicast(sin->sin_addr.s_addr)) return -EINVAL; ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr); binding_addr = &v6addr; @@ -206,7 +206,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) addr4 = sin6->sin6_addr.s6_addr32[3]; if (addr4 == htonl(INADDR_ANY) || addr4 == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(addr4))) + ipv4_is_multicast(addr4)) return -EINVAL; } /* The scope ID must be specified for link local address. */ diff --git a/net/rds/send.c b/net/rds/send.c index 9ce552abf9e9..82dcd8b84fe7 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1144,7 +1144,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) case AF_INET: if (usin->sin_addr.s_addr == htonl(INADDR_ANY) || usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) { + ipv4_is_multicast(usin->sin_addr.s_addr)) { ret = -EINVAL; goto out; } @@ -1175,7 +1175,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) addr4 = sin6->sin6_addr.s6_addr32[3]; if (addr4 == htonl(INADDR_ANY) || addr4 == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(addr4))) { + ipv4_is_multicast(addr4)) { ret = -EINVAL; goto out; } diff --git a/net/sched/Kconfig b/net/sched/Kconfig index afd2ba157a13..b3faafeafab9 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -963,6 +963,19 @@ config NET_IFE_SKBTCINDEX tristate "Support to encoding decoding skb tcindex on IFE action" depends on NET_ACT_IFE +config NET_TC_SKB_EXT + bool "TC recirculation support" + depends on NET_CLS_ACT + default y if NET_CLS_ACT + select SKB_EXTENSIONS + + help + Say Y here to allow tc chain misses to continue in OvS datapath in + the correct recirc_id, and hardware chain misses to continue in + the correct chain in tc software datapath. + + Say N here if you won't be using tc<->ovs offload or tc chains offload. + endif # NET_SCHED config NET_SCH_FIFO diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 6315e0f8d26e..89c04c52af3d 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -40,6 +40,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { [TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE }, [TCA_POLICE_AVRATE] = { .type = NLA_U32 }, [TCA_POLICE_RESULT] = { .type = NLA_U32 }, + [TCA_POLICE_RATE64] = { .type = NLA_U64 }, + [TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 }, }; static int tcf_police_init(struct net *net, struct nlattr *nla, @@ -58,6 +60,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, struct tcf_police_params *new; bool exists = false; u32 index; + u64 rate64, prate64; if (nla == NULL) return -EINVAL; @@ -155,14 +158,18 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, } if (R_tab) { new->rate_present = true; - psched_ratecfg_precompute(&new->rate, &R_tab->rate, 0); + rate64 = tb[TCA_POLICE_RATE64] ? + nla_get_u64(tb[TCA_POLICE_RATE64]) : 0; + psched_ratecfg_precompute(&new->rate, &R_tab->rate, rate64); qdisc_put_rtab(R_tab); } else { new->rate_present = false; } if (P_tab) { new->peak_present = true; - psched_ratecfg_precompute(&new->peak, &P_tab->rate, 0); + prate64 = tb[TCA_POLICE_PEAKRATE64] ? + nla_get_u64(tb[TCA_POLICE_PEAKRATE64]) : 0; + psched_ratecfg_precompute(&new->peak, &P_tab->rate, prate64); qdisc_put_rtab(P_tab); } else { new->peak_present = false; @@ -313,10 +320,22 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a, lockdep_is_held(&police->tcf_lock)); opt.mtu = p->tcfp_mtu; opt.burst = PSCHED_NS2TICKS(p->tcfp_burst); - if (p->rate_present) + if (p->rate_present) { psched_ratecfg_getrate(&opt.rate, &p->rate); - if (p->peak_present) + if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64_64bit(skb, TCA_POLICE_RATE64, + police->params->rate.rate_bytes_ps, + TCA_POLICE_PAD)) + goto nla_put_failure; + } + if (p->peak_present) { psched_ratecfg_getrate(&opt.peakrate, &p->peak); + if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64, + police->params->peak.rate_bytes_ps, + TCA_POLICE_PAD)) + goto nla_put_failure; + } if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) goto nla_put_failure; if (p->tcfp_result && diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 671ca905dbb5..05c4fe1c3ca2 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1514,6 +1514,18 @@ reclassify: goto reset; } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { first_tp = res->goto_tp; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + { + struct tc_skb_ext *ext; + + ext = skb_ext_add(skb, TC_SKB_EXT); + if (WARN_ON_ONCE(!ext)) + return TC_ACT_SHOT; + + ext->chain = err & TC_ACT_EXT_VAL_MASK; + } +#endif goto reset; } #endif diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 810645b5c086..93b58fde99b7 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -299,7 +299,7 @@ static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) { struct ethtool_link_ksettings ecmd; int speed = SPEED_10; - int port_rate = -1; + int port_rate; int err; err = __ethtool_get_link_ksettings(dev, &ecmd); diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c index 550fdf18d3b3..3b7f721c023b 100644 --- a/net/sunrpc/auth_gss/gss_krb5_keys.c +++ b/net/sunrpc/auth_gss/gss_krb5_keys.c @@ -228,14 +228,11 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e, ret = 0; err_free_raw: - memset(rawkey, 0, keybytes); - kfree(rawkey); + kzfree(rawkey); err_free_out: - memset(outblockdata, 0, blocksize); - kfree(outblockdata); + kzfree(outblockdata); err_free_in: - memset(inblockdata, 0, blocksize); - kfree(inblockdata); + kzfree(inblockdata); err_free_cipher: crypto_free_sync_skcipher(cipher); err_return: diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index e188139f0464..41c106e45f01 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -159,12 +159,8 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq) spin_lock_irqsave(&ctx->lock, flags); info = ctx->retransmit_hint; - if (info && !before(acked_seq, info->end_seq)) { + if (info && !before(acked_seq, info->end_seq)) ctx->retransmit_hint = NULL; - list_del(&info->list); - destroy_record(info); - deleted_records++; - } list_for_each_entry_safe(info, temp, &ctx->records_list, list) { if (before(acked_seq, info->end_seq)) @@ -838,22 +834,18 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) struct net_device *netdev; char *iv, *rec_seq; struct sk_buff *skb; - int rc = -EINVAL; __be64 rcd_sn; + int rc; if (!ctx) - goto out; + return -EINVAL; - if (ctx->priv_ctx_tx) { - rc = -EEXIST; - goto out; - } + if (ctx->priv_ctx_tx) + return -EEXIST; start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL); - if (!start_marker_record) { - rc = -ENOMEM; - goto out; - } + if (!start_marker_record) + return -ENOMEM; offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL); if (!offload_ctx) { @@ -939,17 +931,11 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) if (skb) TCP_SKB_CB(skb)->eor = 1; - /* We support starting offload on multiple sockets - * concurrently, so we only need a read lock here. - * This lock must precede get_netdev_for_sock to prevent races between - * NETDEV_DOWN and setsockopt. - */ - down_read(&device_offload_lock); netdev = get_netdev_for_sock(sk); if (!netdev) { pr_err_ratelimited("%s: netdev not found\n", __func__); rc = -EINVAL; - goto release_lock; + goto disable_cad; } if (!(netdev->features & NETIF_F_HW_TLS_TX)) { @@ -960,10 +946,15 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) /* Avoid offloading if the device is down * We don't want to offload new flows after * the NETDEV_DOWN event + * + * device_offload_lock is taken in tls_devices's NETDEV_DOWN + * handler thus protecting from the device going down before + * ctx was added to tls_device_list. */ + down_read(&device_offload_lock); if (!(netdev->flags & IFF_UP)) { rc = -EINVAL; - goto release_netdev; + goto release_lock; } ctx->priv_ctx_tx = offload_ctx; @@ -971,9 +962,10 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) &ctx->crypto_send.info, tcp_sk(sk)->write_seq); if (rc) - goto release_netdev; + goto release_lock; tls_device_attach(ctx, sk, netdev); + up_read(&device_offload_lock); /* following this assignment tls_is_sk_tx_device_offloaded * will return true and the context might be accessed @@ -981,13 +973,14 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) */ smp_store_release(&sk->sk_validate_xmit_skb, tls_validate_xmit_skb); dev_put(netdev); - up_read(&device_offload_lock); - goto out; -release_netdev: - dev_put(netdev); + return 0; + release_lock: up_read(&device_offload_lock); +release_netdev: + dev_put(netdev); +disable_cad: clean_acked_data_disable(inet_csk(sk)); crypto_free_aead(offload_ctx->aead_send); free_rec_seq: @@ -999,7 +992,6 @@ free_offload_ctx: ctx->priv_ctx_tx = NULL; free_marker_record: kfree(start_marker_record); -out: return rc; } @@ -1012,17 +1004,10 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) if (ctx->crypto_recv.info.version != TLS_1_2_VERSION) return -EOPNOTSUPP; - /* We support starting offload on multiple sockets - * concurrently, so we only need a read lock here. - * This lock must precede get_netdev_for_sock to prevent races between - * NETDEV_DOWN and setsockopt. - */ - down_read(&device_offload_lock); netdev = get_netdev_for_sock(sk); if (!netdev) { pr_err_ratelimited("%s: netdev not found\n", __func__); - rc = -EINVAL; - goto release_lock; + return -EINVAL; } if (!(netdev->features & NETIF_F_HW_TLS_RX)) { @@ -1033,16 +1018,21 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) /* Avoid offloading if the device is down * We don't want to offload new flows after * the NETDEV_DOWN event + * + * device_offload_lock is taken in tls_devices's NETDEV_DOWN + * handler thus protecting from the device going down before + * ctx was added to tls_device_list. */ + down_read(&device_offload_lock); if (!(netdev->flags & IFF_UP)) { rc = -EINVAL; - goto release_netdev; + goto release_lock; } context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL); if (!context) { rc = -ENOMEM; - goto release_netdev; + goto release_lock; } context->resync_nh_reset = 1; @@ -1058,7 +1048,11 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) goto free_sw_resources; tls_device_attach(ctx, sk, netdev); - goto release_netdev; + up_read(&device_offload_lock); + + dev_put(netdev); + + return 0; free_sw_resources: up_read(&device_offload_lock); @@ -1066,10 +1060,10 @@ free_sw_resources: down_read(&device_offload_lock); release_ctx: ctx->priv_ctx_rx = NULL; -release_netdev: - dev_put(netdev); release_lock: up_read(&device_offload_lock); +release_netdev: + dev_put(netdev); return rc; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 277f7c209fed..ac88877dcade 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -286,19 +286,14 @@ static void tls_sk_proto_cleanup(struct sock *sk, kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_release_resources_tx(sk); -#ifdef CONFIG_TLS_DEVICE } else if (ctx->tx_conf == TLS_HW) { tls_device_free_resources_tx(sk); -#endif } if (ctx->rx_conf == TLS_SW) tls_sw_release_resources_rx(sk); - -#ifdef CONFIG_TLS_DEVICE - if (ctx->rx_conf == TLS_HW) + else if (ctx->rx_conf == TLS_HW) tls_device_offload_cleanup_rx(sk); -#endif } static void tls_sk_proto_close(struct sock *sk, long timeout) @@ -331,7 +326,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) tls_sw_strparser_done(ctx); if (ctx->rx_conf == TLS_SW) tls_sw_free_ctx_rx(ctx); - ctx->sk_proto_close(sk, timeout); + ctx->sk_proto->close(sk, timeout); if (free_ctx) tls_ctx_free(sk, ctx); @@ -451,7 +446,8 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, struct tls_context *ctx = tls_get_ctx(sk); if (level != SOL_TLS) - return ctx->getsockopt(sk, level, optname, optval, optlen); + return ctx->sk_proto->getsockopt(sk, level, + optname, optval, optlen); return do_tls_getsockopt(sk, optname, optval, optlen); } @@ -536,26 +532,18 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, } if (tx) { -#ifdef CONFIG_TLS_DEVICE rc = tls_set_device_offload(sk, ctx); conf = TLS_HW; if (rc) { -#else - { -#endif rc = tls_set_sw_offload(sk, ctx, 1); if (rc) goto err_crypto_info; conf = TLS_SW; } } else { -#ifdef CONFIG_TLS_DEVICE rc = tls_set_device_offload_rx(sk, ctx); conf = TLS_HW; if (rc) { -#else - { -#endif rc = tls_set_sw_offload(sk, ctx, 0); if (rc) goto err_crypto_info; @@ -609,7 +597,8 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, struct tls_context *ctx = tls_get_ctx(sk); if (level != SOL_TLS) - return ctx->setsockopt(sk, level, optname, optval, optlen); + return ctx->sk_proto->setsockopt(sk, level, optname, optval, + optlen); return do_tls_setsockopt(sk, optname, optval, optlen); } @@ -624,10 +613,7 @@ static struct tls_context *create_ctx(struct sock *sk) return NULL; rcu_assign_pointer(icsk->icsk_ulp_data, ctx); - ctx->setsockopt = sk->sk_prot->setsockopt; - ctx->getsockopt = sk->sk_prot->getsockopt; - ctx->sk_proto_close = sk->sk_prot->close; - ctx->unhash = sk->sk_prot->unhash; + ctx->sk_proto = sk->sk_prot; return ctx; } @@ -683,9 +669,6 @@ static int tls_hw_prot(struct sock *sk) spin_unlock_bh(&device_spinlock); tls_build_proto(sk); - ctx->hash = sk->sk_prot->hash; - ctx->unhash = sk->sk_prot->unhash; - ctx->sk_proto_close = sk->sk_prot->close; ctx->sk_destruct = sk->sk_destruct; sk->sk_destruct = tls_hw_sk_destruct; ctx->rx_conf = TLS_HW_RECORD; @@ -717,7 +700,7 @@ static void tls_hw_unhash(struct sock *sk) } } spin_unlock_bh(&device_spinlock); - ctx->unhash(sk); + ctx->sk_proto->unhash(sk); } static int tls_hw_hash(struct sock *sk) @@ -726,7 +709,7 @@ static int tls_hw_hash(struct sock *sk) struct tls_device *dev; int err; - err = ctx->hash(sk); + err = ctx->sk_proto->hash(sk); spin_lock_bh(&device_spinlock); list_for_each_entry(dev, &device_list, dev_list) { if (dev->hash) { @@ -816,7 +799,6 @@ static int tls_init(struct sock *sk) ctx->tx_conf = TLS_BASE; ctx->rx_conf = TLS_BASE; - ctx->sk_proto = sk->sk_prot; update_sk_prot(sk, ctx); out: write_unlock_bh(&sk->sk_callback_lock); @@ -828,12 +810,10 @@ static void tls_update(struct sock *sk, struct proto *p) struct tls_context *ctx; ctx = tls_get_ctx(sk); - if (likely(ctx)) { - ctx->sk_proto_close = p->close; + if (likely(ctx)) ctx->sk_proto = p; - } else { + else sk->sk_prot = p; - } } static int tls_get_info(const struct sock *sk, struct sk_buff *skb) @@ -927,9 +907,7 @@ static int __init tls_register(void) tls_sw_proto_ops = inet_stream_ops; tls_sw_proto_ops.splice_read = tls_sw_splice_read; -#ifdef CONFIG_TLS_DEVICE tls_device_init(); -#endif tcp_register_ulp(&tcp_tls_ulp_ops); return 0; @@ -938,9 +916,7 @@ static int __init tls_register(void) static void __exit tls_unregister(void) { tcp_unregister_ulp(&tcp_tls_ulp_ops); -#ifdef CONFIG_TLS_DEVICE tls_device_cleanup(); -#endif } module_init(tls_register); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 91d21b048a9b..c2b5e0d2ba1a 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1489,13 +1489,12 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, int pad, err = 0; if (!ctx->decrypted) { -#ifdef CONFIG_TLS_DEVICE if (tls_ctx->rx_conf == TLS_HW) { err = tls_device_decrypted(sk, skb); if (err < 0) return err; } -#endif + /* Still not decrypted after tls_device */ if (!ctx->decrypted) { err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, @@ -2014,10 +2013,9 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) ret = -EINVAL; goto read_failure; } -#ifdef CONFIG_TLS_DEVICE + tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE, TCP_SKB_CB(skb)->seq + rxm->offset); -#endif return data_len + TLS_HEADER_SIZE; read_failure: diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 94cc0fa3e848..5bb70c692b1e 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -307,8 +307,13 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, spin_unlock_bh(&vvs->rx_lock); - /* We send a credit update only when the space available seen - * by the transmitter is less than VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + /* To reduce the number of credit update messages, + * don't update credits as long as lots of space is available. + * Note: the limit chosen here is arbitrary. Setting the limit + * too high causes extra messages. Too low causes transmitter + * stalls. As stalls are in theory more expensive than extra + * messages, we set the limit to a high value. TODO: experiment + * with different values. */ if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { virtio_transport_send_credit_update(vsk, diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 0e0062127124..947b8ff0227e 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -14,7 +14,7 @@ #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/idr.h> -#include <linux/highmem.h> +#include <linux/vmalloc.h> #include "xdp_umem.h" #include "xsk_queue.h" @@ -106,14 +106,22 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, umem->dev = dev; umem->queue_id = queue_id; + if (flags & XDP_USE_NEED_WAKEUP) { + umem->flags |= XDP_UMEM_USES_NEED_WAKEUP; + /* Tx needs to be explicitly woken up the first time. + * Also for supporting drivers that do not implement this + * feature. They will always have to call sendto(). + */ + xsk_set_tx_need_wakeup(umem); + } + dev_hold(dev); if (force_copy) /* For copy-mode, we are done. */ return 0; - if (!dev->netdev_ops->ndo_bpf || - !dev->netdev_ops->ndo_xsk_async_xmit) { + if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) { err = -EOPNOTSUPP; goto err_unreg_umem; } @@ -170,7 +178,30 @@ static void xdp_umem_unmap_pages(struct xdp_umem *umem) unsigned int i; for (i = 0; i < umem->npgs; i++) - kunmap(umem->pgs[i]); + if (PageHighMem(umem->pgs[i])) + vunmap(umem->pages[i].addr); +} + +static int xdp_umem_map_pages(struct xdp_umem *umem) +{ + unsigned int i; + void *addr; + + for (i = 0; i < umem->npgs; i++) { + if (PageHighMem(umem->pgs[i])) + addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL); + else + addr = page_address(umem->pgs[i]); + + if (!addr) { + xdp_umem_unmap_pages(umem); + return -ENOMEM; + } + + umem->pages[i].addr = addr; + } + + return 0; } static void xdp_umem_unpin_pages(struct xdp_umem *umem) @@ -309,10 +340,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { + bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; u32 chunk_size = mr->chunk_size, headroom = mr->headroom; unsigned int chunks, chunks_per_page; u64 addr = mr->addr, size = mr->len; - int size_chk, err, i; + int size_chk, err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { /* Strictly speaking we could support this, if: @@ -324,7 +356,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return -EINVAL; } - if (!is_power_of_2(chunk_size)) + if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG | + XDP_UMEM_USES_NEED_WAKEUP)) + return -EINVAL; + + if (!unaligned_chunks && !is_power_of_2(chunk_size)) return -EINVAL; if (!PAGE_ALIGNED(addr)) { @@ -341,9 +377,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (chunks == 0) return -EINVAL; - chunks_per_page = PAGE_SIZE / chunk_size; - if (chunks < chunks_per_page || chunks % chunks_per_page) - return -EINVAL; + if (!unaligned_chunks) { + chunks_per_page = PAGE_SIZE / chunk_size; + if (chunks < chunks_per_page || chunks % chunks_per_page) + return -EINVAL; + } headroom = ALIGN(headroom, 64); @@ -352,13 +390,15 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return -EINVAL; umem->address = (unsigned long)addr; - umem->chunk_mask = ~((u64)chunk_size - 1); + umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK + : ~((u64)chunk_size - 1); umem->size = size; umem->headroom = headroom; umem->chunk_size_nohr = chunk_size - headroom; umem->npgs = size / PAGE_SIZE; umem->pgs = NULL; umem->user = NULL; + umem->flags = mr->flags; INIT_LIST_HEAD(&umem->xsk_list); spin_lock_init(&umem->xsk_list_lock); @@ -378,10 +418,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) goto out_pin; } - for (i = 0; i < umem->npgs; i++) - umem->pages[i].addr = kmap(umem->pgs[i]); + err = xdp_umem_map_pages(umem); + if (!err) + return 0; - return 0; + kfree(umem->pages); out_pin: xdp_umem_unpin_pages(umem); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 59b57d708697..c2f1af3b6a7c 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -45,7 +45,7 @@ EXPORT_SYMBOL(xsk_umem_has_addrs); u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) { - return xskq_peek_addr(umem->fq, addr); + return xskq_peek_addr(umem->fq, addr, umem); } EXPORT_SYMBOL(xsk_umem_peek_addr); @@ -55,21 +55,103 @@ void xsk_umem_discard_addr(struct xdp_umem *umem) } EXPORT_SYMBOL(xsk_umem_discard_addr); +void xsk_set_rx_need_wakeup(struct xdp_umem *umem) +{ + if (umem->need_wakeup & XDP_WAKEUP_RX) + return; + + umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP; + umem->need_wakeup |= XDP_WAKEUP_RX; +} +EXPORT_SYMBOL(xsk_set_rx_need_wakeup); + +void xsk_set_tx_need_wakeup(struct xdp_umem *umem) +{ + struct xdp_sock *xs; + + if (umem->need_wakeup & XDP_WAKEUP_TX) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(xs, &umem->xsk_list, list) { + xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; + } + rcu_read_unlock(); + + umem->need_wakeup |= XDP_WAKEUP_TX; +} +EXPORT_SYMBOL(xsk_set_tx_need_wakeup); + +void xsk_clear_rx_need_wakeup(struct xdp_umem *umem) +{ + if (!(umem->need_wakeup & XDP_WAKEUP_RX)) + return; + + umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP; + umem->need_wakeup &= ~XDP_WAKEUP_RX; +} +EXPORT_SYMBOL(xsk_clear_rx_need_wakeup); + +void xsk_clear_tx_need_wakeup(struct xdp_umem *umem) +{ + struct xdp_sock *xs; + + if (!(umem->need_wakeup & XDP_WAKEUP_TX)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(xs, &umem->xsk_list, list) { + xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP; + } + rcu_read_unlock(); + + umem->need_wakeup &= ~XDP_WAKEUP_TX; +} +EXPORT_SYMBOL(xsk_clear_tx_need_wakeup); + +bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem) +{ + return umem->flags & XDP_UMEM_USES_NEED_WAKEUP; +} +EXPORT_SYMBOL(xsk_umem_uses_need_wakeup); + +/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for + * each page. This is only required in copy mode. + */ +static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf, + u32 len, u32 metalen) +{ + void *to_buf = xdp_umem_get_data(umem, addr); + + addr = xsk_umem_add_offset_to_addr(addr); + if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) { + void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr; + u64 page_start = addr & ~(PAGE_SIZE - 1); + u64 first_len = PAGE_SIZE - (addr - page_start); + + memcpy(to_buf, from_buf, first_len + metalen); + memcpy(next_pg_addr, from_buf + first_len, len - first_len); + + return; + } + + memcpy(to_buf, from_buf, len + metalen); +} + static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { - void *to_buf, *from_buf; + u64 offset = xs->umem->headroom; + u64 addr, memcpy_addr; + void *from_buf; u32 metalen; - u64 addr; int err; - if (!xskq_peek_addr(xs->umem->fq, &addr) || + if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) || len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { xs->rx_dropped++; return -ENOSPC; } - addr += xs->umem->headroom; - if (unlikely(xdp_data_meta_unsupported(xdp))) { from_buf = xdp->data; metalen = 0; @@ -78,9 +160,11 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) metalen = xdp->data - xdp->data_meta; } - to_buf = xdp_umem_get_data(xs->umem, addr); - memcpy(to_buf, from_buf, len + metalen); - addr += metalen; + memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset); + __xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen); + + offset += metalen; + addr = xsk_umem_adjust_offset(xs->umem, addr, offset); err = xskq_produce_batch_desc(xs->rx, addr, len); if (!err) { xskq_discard_addr(xs->umem->fq); @@ -102,10 +186,23 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) return err; } +static bool xsk_is_bound(struct xdp_sock *xs) +{ + if (READ_ONCE(xs->state) == XSK_BOUND) { + /* Matches smp_wmb() in bind(). */ + smp_rmb(); + return true; + } + return false; +} + int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { u32 len; + if (!xsk_is_bound(xs)) + return -EINVAL; + if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; @@ -125,6 +222,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { u32 metalen = xdp->data - xdp->data_meta; u32 len = xdp->data_end - xdp->data; + u64 offset = xs->umem->headroom; void *buffer; u64 addr; int err; @@ -136,17 +234,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) goto out_unlock; } - if (!xskq_peek_addr(xs->umem->fq, &addr) || + if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) || len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { err = -ENOSPC; goto out_drop; } - addr += xs->umem->headroom; - + addr = xsk_umem_adjust_offset(xs->umem, addr, offset); buffer = xdp_umem_get_data(xs->umem, addr); memcpy(buffer, xdp->data_meta, len + metalen); - addr += metalen; + + addr = xsk_umem_adjust_offset(xs->umem, addr, metalen); err = xskq_produce_batch_desc(xs->rx, addr, len); if (err) goto out_drop; @@ -190,7 +288,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc) rcu_read_lock(); list_for_each_entry_rcu(xs, &umem->xsk_list, list) { - if (!xskq_peek_desc(xs->tx, desc)) + if (!xskq_peek_desc(xs->tx, desc, umem)) continue; if (xskq_produce_addr_lazy(umem->cq, desc->addr)) @@ -212,7 +310,8 @@ static int xsk_zc_xmit(struct sock *sk) struct xdp_sock *xs = xdp_sk(sk); struct net_device *dev = xs->dev; - return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id); + return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, + XDP_WAKEUP_TX); } static void xsk_destruct_skb(struct sk_buff *skb) @@ -243,7 +342,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, if (xs->queue_id >= xs->dev->real_num_tx_queues) goto out; - while (xskq_peek_desc(xs->tx, &desc)) { + while (xskq_peek_desc(xs->tx, &desc, xs->umem)) { char *buffer; u64 addr; u32 len; @@ -272,7 +371,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, skb->dev = xs->dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_shinfo(skb)->destructor_arg = (void *)(long)addr; + skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; err = dev_direct_xmit(skb, xs->queue_id); @@ -301,7 +400,7 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); - if (unlikely(!xs->dev)) + if (unlikely(!xsk_is_bound(xs))) return -ENXIO; if (unlikely(!(xs->dev->flags & IFF_UP))) return -ENETDOWN; @@ -317,8 +416,19 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { unsigned int mask = datagram_poll(file, sock, wait); - struct sock *sk = sock->sk; - struct xdp_sock *xs = xdp_sk(sk); + struct xdp_sock *xs = xdp_sk(sock->sk); + struct net_device *dev; + struct xdp_umem *umem; + + if (unlikely(!xsk_is_bound(xs))) + return mask; + + dev = xs->dev; + umem = xs->umem; + + if (umem->need_wakeup) + dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, + umem->need_wakeup); if (xs->rx && !xskq_empty_desc(xs->rx)) mask |= POLLIN | POLLRDNORM; @@ -342,7 +452,7 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue, /* Make sure queue is ready before it can be seen by others */ smp_wmb(); - *queue = q; + WRITE_ONCE(*queue, q); return 0; } @@ -350,10 +460,9 @@ static void xsk_unbind_dev(struct xdp_sock *xs) { struct net_device *dev = xs->dev; - if (!dev || xs->state != XSK_BOUND) + if (xs->state != XSK_BOUND) return; - - xs->state = XSK_UNBOUND; + WRITE_ONCE(xs->state, XSK_UNBOUND); /* Wait for driver to stop using the xdp socket. */ xdp_del_sk_umem(xs->umem, xs); @@ -362,6 +471,52 @@ static void xsk_unbind_dev(struct xdp_sock *xs) dev_put(dev); } +static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs, + struct xdp_sock ***map_entry) +{ + struct xsk_map *map = NULL; + struct xsk_map_node *node; + + *map_entry = NULL; + + spin_lock_bh(&xs->map_list_lock); + node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node, + node); + if (node) { + WARN_ON(xsk_map_inc(node->map)); + map = node->map; + *map_entry = node->map_entry; + } + spin_unlock_bh(&xs->map_list_lock); + return map; +} + +static void xsk_delete_from_maps(struct xdp_sock *xs) +{ + /* This function removes the current XDP socket from all the + * maps it resides in. We need to take extra care here, due to + * the two locks involved. Each map has a lock synchronizing + * updates to the entries, and each socket has a lock that + * synchronizes access to the list of maps (map_list). For + * deadlock avoidance the locks need to be taken in the order + * "map lock"->"socket map list lock". We start off by + * accessing the socket map list, and take a reference to the + * map to guarantee existence between the + * xsk_get_map_list_entry() and xsk_map_try_sock_delete() + * calls. Then we ask the map to remove the socket, which + * tries to remove the socket from the map. Note that there + * might be updates to the map between + * xsk_get_map_list_entry() and xsk_map_try_sock_delete(). + */ + struct xdp_sock **map_entry = NULL; + struct xsk_map *map; + + while ((map = xsk_get_map_list_entry(xs, &map_entry))) { + xsk_map_try_sock_delete(map, xs, map_entry); + xsk_map_put(map); + } +} + static int xsk_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -381,7 +536,10 @@ static int xsk_release(struct socket *sock) sock_prot_inuse_add(net, sk->sk_prot, -1); local_bh_enable(); + xsk_delete_from_maps(xs); + mutex_lock(&xs->mutex); xsk_unbind_dev(xs); + mutex_unlock(&xs->mutex); xskq_destroy(xs->rx); xskq_destroy(xs->tx); @@ -412,6 +570,24 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd) return sock; } +/* Check if umem pages are contiguous. + * If zero-copy mode, use the DMA address to do the page contiguity check + * For all other modes we use addr (kernel virtual address) + * Store the result in the low bits of addr. + */ +static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags) +{ + struct xdp_umem_page *pgs = umem->pages; + int i, is_contig; + + for (i = 0; i < umem->npgs - 1; i++) { + is_contig = (flags & XDP_ZEROCOPY) ? + (pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) : + (pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr); + pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT; + } +} + static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; @@ -427,7 +603,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) return -EINVAL; flags = sxdp->sxdp_flags; - if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY)) + if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY | + XDP_USE_NEED_WAKEUP)) return -EINVAL; rtnl_lock(); @@ -454,7 +631,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) struct xdp_sock *umem_xs; struct socket *sock; - if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) { + if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) || + (flags & XDP_USE_NEED_WAKEUP)) { /* Cannot specify flags for shared sockets. */ err = -EINVAL; goto out_unlock; @@ -473,19 +651,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } umem_xs = xdp_sk(sock->sk); - if (!umem_xs->umem) { - /* No umem to inherit. */ + if (!xsk_is_bound(umem_xs)) { err = -EBADF; sockfd_put(sock); goto out_unlock; - } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) { + } + if (umem_xs->dev != dev || umem_xs->queue_id != qid) { err = -EINVAL; sockfd_put(sock); goto out_unlock; } xdp_get_umem(umem_xs->umem); - xs->umem = umem_xs->umem; + WRITE_ONCE(xs->umem, umem_xs->umem); sockfd_put(sock); } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { err = -EINVAL; @@ -500,6 +678,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) err = xdp_umem_assign_dev(xs->umem, dev, qid, flags); if (err) goto out_unlock; + + xsk_check_page_contiguity(xs->umem, flags); } xs->dev = dev; @@ -510,16 +690,28 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xdp_add_sk_umem(xs->umem, xs); out_unlock: - if (err) + if (err) { dev_put(dev); - else - xs->state = XSK_BOUND; + } else { + /* Matches smp_rmb() in bind() for shared umem + * sockets, and xsk_is_bound(). + */ + smp_wmb(); + WRITE_ONCE(xs->state, XSK_BOUND); + } out_release: mutex_unlock(&xs->mutex); rtnl_unlock(); return err; } +struct xdp_umem_reg_v1 { + __u64 addr; /* Start of packet data area */ + __u64 len; /* Length of packet data area */ + __u32 chunk_size; + __u32 headroom; +}; + static int xsk_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { @@ -549,15 +741,24 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, } q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx; err = xsk_init_queue(entries, q, false); + if (!err && optname == XDP_TX_RING) + /* Tx needs to be explicitly woken up the first time */ + xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; mutex_unlock(&xs->mutex); return err; } case XDP_UMEM_REG: { - struct xdp_umem_reg mr; + size_t mr_size = sizeof(struct xdp_umem_reg); + struct xdp_umem_reg mr = {}; struct xdp_umem *umem; - if (copy_from_user(&mr, optval, sizeof(mr))) + if (optlen < sizeof(struct xdp_umem_reg_v1)) + return -EINVAL; + else if (optlen < sizeof(mr)) + mr_size = sizeof(struct xdp_umem_reg_v1); + + if (copy_from_user(&mr, optval, mr_size)) return -EFAULT; mutex_lock(&xs->mutex); @@ -574,7 +775,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, /* Make sure umem is ready before it can be seen by others */ smp_wmb(); - xs->umem = umem; + WRITE_ONCE(xs->umem, umem); mutex_unlock(&xs->mutex); return 0; } @@ -610,6 +811,20 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, return -ENOPROTOOPT; } +static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring) +{ + ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); + ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); + ring->desc = offsetof(struct xdp_rxtx_ring, desc); +} + +static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring) +{ + ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer); + ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); + ring->desc = offsetof(struct xdp_umem_ring, desc); +} + static int xsk_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { @@ -649,26 +864,49 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, case XDP_MMAP_OFFSETS: { struct xdp_mmap_offsets off; + struct xdp_mmap_offsets_v1 off_v1; + bool flags_supported = true; + void *to_copy; - if (len < sizeof(off)) + if (len < sizeof(off_v1)) return -EINVAL; + else if (len < sizeof(off)) + flags_supported = false; + + if (flags_supported) { + /* xdp_ring_offset is identical to xdp_ring_offset_v1 + * except for the flags field added to the end. + */ + xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *) + &off.rx); + xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *) + &off.tx); + xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *) + &off.fr); + xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *) + &off.cr); + off.rx.flags = offsetof(struct xdp_rxtx_ring, + ptrs.flags); + off.tx.flags = offsetof(struct xdp_rxtx_ring, + ptrs.flags); + off.fr.flags = offsetof(struct xdp_umem_ring, + ptrs.flags); + off.cr.flags = offsetof(struct xdp_umem_ring, + ptrs.flags); + + len = sizeof(off); + to_copy = &off; + } else { + xsk_enter_rxtx_offsets(&off_v1.rx); + xsk_enter_rxtx_offsets(&off_v1.tx); + xsk_enter_umem_offsets(&off_v1.fr); + xsk_enter_umem_offsets(&off_v1.cr); + + len = sizeof(off_v1); + to_copy = &off_v1; + } - off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); - off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); - off.rx.desc = offsetof(struct xdp_rxtx_ring, desc); - off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); - off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); - off.tx.desc = offsetof(struct xdp_rxtx_ring, desc); - - off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer); - off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); - off.fr.desc = offsetof(struct xdp_umem_ring, desc); - off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer); - off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); - off.cr.desc = offsetof(struct xdp_umem_ring, desc); - - len = sizeof(off); - if (copy_to_user(optval, &off, len)) + if (copy_to_user(optval, to_copy, len)) return -EFAULT; if (put_user(len, optlen)) return -EFAULT; @@ -713,7 +951,7 @@ static int xsk_mmap(struct file *file, struct socket *sock, unsigned long pfn; struct page *qpg; - if (xs->state != XSK_READY) + if (READ_ONCE(xs->state) != XSK_READY) return -EBUSY; if (offset == XDP_PGOFF_RX_RING) { @@ -855,6 +1093,9 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, spin_lock_init(&xs->rx_lock); spin_lock_init(&xs->tx_completion_lock); + INIT_LIST_HEAD(&xs->map_list); + spin_lock_init(&xs->map_list_lock); + mutex_lock(&net->xdp.lock); sk_add_node_rcu(sk, &net->xdp.list); mutex_unlock(&net->xdp.lock); diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h index ba8120610426..4cfd106bdb53 100644 --- a/net/xdp/xsk.h +++ b/net/xdp/xsk.h @@ -4,6 +4,19 @@ #ifndef XSK_H_ #define XSK_H_ +struct xdp_ring_offset_v1 { + __u64 producer; + __u64 consumer; + __u64 desc; +}; + +struct xdp_mmap_offsets_v1 { + struct xdp_ring_offset_v1 rx; + struct xdp_ring_offset_v1 tx; + struct xdp_ring_offset_v1 fr; + struct xdp_ring_offset_v1 cr; +}; + static inline struct xdp_sock *xdp_sk(struct sock *sk) { return (struct xdp_sock *)sk; diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index d5e06c8e0cbf..f59791ba43a0 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) du.id = umem->id; du.size = umem->size; du.num_pages = umem->npgs; - du.chunk_size = (__u32)(~umem->chunk_mask + 1); + du.chunk_size = umem->chunk_size_nohr + umem->headroom; du.headroom = umem->headroom; du.ifindex = umem->dev ? umem->dev->ifindex : 0; du.queue_id = umem->queue_id; @@ -97,6 +97,7 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, msg->xdiag_ino = sk_ino; sock_diag_save_cookie(sk, msg->xdiag_cookie); + mutex_lock(&xs->mutex); if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb)) goto out_nlmsg_trim; @@ -117,10 +118,12 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO)) goto out_nlmsg_trim; + mutex_unlock(&xs->mutex); nlmsg_end(nlskb, nlh); return 0; out_nlmsg_trim: + mutex_unlock(&xs->mutex); nlmsg_cancel(nlskb, nlh); return -EMSGSIZE; } diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 909c5168ed0f..eddae4688862 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -16,6 +16,7 @@ struct xdp_ring { u32 producer ____cacheline_aligned_in_smp; u32 consumer ____cacheline_aligned_in_smp; + u32 flags; }; /* Used for the RX and TX queues for packets */ @@ -133,6 +134,17 @@ static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt) /* UMEM queue */ +static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr, + u64 length) +{ + bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE; + bool next_pg_contig = + (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr & + XSK_NEXT_PG_CONTIG_MASK; + + return cross_pg && !next_pg_contig; +} + static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) { if (addr >= q->size) { @@ -143,23 +155,51 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) return true; } -static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr) +static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr, + u64 length, + struct xdp_umem *umem) +{ + u64 base_addr = xsk_umem_extract_addr(addr); + + addr = xsk_umem_add_offset_to_addr(addr); + if (base_addr >= q->size || addr >= q->size || + xskq_crosses_non_contig_pg(umem, addr, length)) { + q->invalid_descs++; + return false; + } + + return true; +} + +static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr, + struct xdp_umem *umem) { while (q->cons_tail != q->cons_head) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; unsigned int idx = q->cons_tail & q->ring_mask; *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask; + + if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { + if (xskq_is_valid_addr_unaligned(q, *addr, + umem->chunk_size_nohr, + umem)) + return addr; + goto out; + } + if (xskq_is_valid_addr(q, *addr)) return addr; +out: q->cons_tail++; } return NULL; } -static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr) +static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr, + struct xdp_umem *umem) { if (q->cons_tail == q->cons_head) { smp_mb(); /* D, matches A */ @@ -170,7 +210,7 @@ static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr) smp_rmb(); } - return xskq_validate_addr(q, addr); + return xskq_validate_addr(q, addr, umem); } static inline void xskq_discard_addr(struct xsk_queue *q) @@ -229,8 +269,21 @@ static inline int xskq_reserve_addr(struct xsk_queue *q) /* Rx/Tx queue */ -static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) +static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d, + struct xdp_umem *umem) { + if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { + if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem)) + return false; + + if (d->len > umem->chunk_size_nohr || d->options) { + q->invalid_descs++; + return false; + } + + return true; + } + if (!xskq_is_valid_addr(q, d->addr)) return false; @@ -244,14 +297,15 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) } static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q, - struct xdp_desc *desc) + struct xdp_desc *desc, + struct xdp_umem *umem) { while (q->cons_tail != q->cons_head) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; unsigned int idx = q->cons_tail & q->ring_mask; *desc = READ_ONCE(ring->desc[idx]); - if (xskq_is_valid_desc(q, desc)) + if (xskq_is_valid_desc(q, desc, umem)) return desc; q->cons_tail++; @@ -261,7 +315,8 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q, } static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, - struct xdp_desc *desc) + struct xdp_desc *desc, + struct xdp_umem *umem) { if (q->cons_tail == q->cons_head) { smp_mb(); /* D, matches A */ @@ -272,7 +327,7 @@ static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, smp_rmb(); /* C, matches B */ } - return xskq_validate_desc(q, desc); + return xskq_validate_desc(q, desc, umem); } static inline void xskq_discard_desc(struct xsk_queue *q) diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c index 516e255cbe8f..88f940052450 100644 --- a/samples/bpf/syscall_nrs.c +++ b/samples/bpf/syscall_nrs.c @@ -9,5 +9,11 @@ void syscall_defines(void) COMMENT("Linux system call numbers."); SYSNR(__NR_write); SYSNR(__NR_read); +#ifdef __NR_mmap2 + SYSNR(__NR_mmap2); +#endif +#ifdef __NR_mmap SYSNR(__NR_mmap); +#endif + } diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c index f57f4e1ea1ec..35cb0eed3be5 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5_kern.c @@ -68,12 +68,25 @@ PROG(SYS__NR_read)(struct pt_regs *ctx) return 0; } +#ifdef __NR_mmap2 +PROG(SYS__NR_mmap2)(struct pt_regs *ctx) +{ + char fmt[] = "mmap2\n"; + + bpf_trace_printk(fmt, sizeof(fmt)); + return 0; +} +#endif + +#ifdef __NR_mmap PROG(SYS__NR_mmap)(struct pt_regs *ctx) { char fmt[] = "mmap\n"; + bpf_trace_printk(fmt, sizeof(fmt)); return 0; } +#endif char _license[] SEC("license") = "GPL"; u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 93eaaf7239b2..102eace22956 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -67,8 +67,14 @@ static int opt_ifindex; static int opt_queue; static int opt_poll; static int opt_interval = 1; +static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; +static u32 opt_umem_flags; +static int opt_unaligned_chunks; +static int opt_mmap_flags; static u32 opt_xdp_bind_flags; static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; +static int opt_timeout = 1000; +static bool opt_need_wakeup = true; static __u32 prog_id; struct xsk_umem_info { @@ -282,7 +288,9 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = opt_xsk_frame_size, .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, + .flags = opt_umem_flags }; + int ret; umem = calloc(1, sizeof(*umem)); @@ -291,6 +299,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, &cfg); + if (ret) exit_with_error(-ret); @@ -352,6 +361,8 @@ static struct option long_options[] = { {"zero-copy", no_argument, 0, 'z'}, {"copy", no_argument, 0, 'c'}, {"frame-size", required_argument, 0, 'f'}, + {"no-need-wakeup", no_argument, 0, 'm'}, + {"unaligned", no_argument, 0, 'u'}, {0, 0, 0, 0} }; @@ -372,6 +383,9 @@ static void usage(const char *prog) " -z, --zero-copy Force zero-copy mode.\n" " -c, --copy Force copy mode.\n" " -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n" + " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n" + " -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n" + " -u, --unaligned Enable unaligned chunk placement\n" "\n"; fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE); exit(EXIT_FAILURE); @@ -384,8 +398,8 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options, - &option_index); + c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:mu", + long_options, &option_index); if (c == -1) break; @@ -424,12 +438,21 @@ static void parse_command_line(int argc, char **argv) case 'c': opt_xdp_bind_flags |= XDP_COPY; break; + case 'u': + opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; + opt_unaligned_chunks = 1; + opt_mmap_flags = MAP_HUGETLB; + break; case 'F': opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; break; case 'f': opt_xsk_frame_size = atoi(optarg); + case 'm': + opt_need_wakeup = false; + opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP; break; + default: usage(basename(argv[0])); } @@ -442,7 +465,8 @@ static void parse_command_line(int argc, char **argv) usage(basename(argv[0])); } - if (opt_xsk_frame_size & (opt_xsk_frame_size - 1)) { + if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) && + !opt_unaligned_chunks) { fprintf(stderr, "--frame-size=%d is not a power of two\n", opt_xsk_frame_size); usage(basename(argv[0])); @@ -459,8 +483,10 @@ static void kick_tx(struct xsk_socket_info *xsk) exit_with_error(errno); } -static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk) +static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, + struct pollfd *fds) { + struct xsk_umem_info *umem = xsk->umem; u32 idx_cq = 0, idx_fq = 0; unsigned int rcvd; size_t ndescs; @@ -468,27 +494,30 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk) if (!xsk->outstanding_tx) return; - kick_tx(xsk); + if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) + kick_tx(xsk); + ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE : xsk->outstanding_tx; /* re-add completed Tx buffers */ - rcvd = xsk_ring_cons__peek(&xsk->umem->cq, ndescs, &idx_cq); + rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq); if (rcvd > 0) { unsigned int i; int ret; - ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); - ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, - &idx_fq); + if (xsk_ring_prod__needs_wakeup(&umem->fq)) + ret = poll(fds, num_socks, opt_timeout); + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } + for (i = 0; i < rcvd; i++) - *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = - *xsk_ring_cons__comp_addr(&xsk->umem->cq, - idx_cq++); + *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = + *xsk_ring_cons__comp_addr(&umem->cq, idx_cq++); xsk_ring_prod__submit(&xsk->umem->fq, rcvd); xsk_ring_cons__release(&xsk->umem->cq, rcvd); @@ -505,7 +534,8 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk) if (!xsk->outstanding_tx) return; - kick_tx(xsk); + if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) + kick_tx(xsk); rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx); if (rcvd > 0) { @@ -515,30 +545,38 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk) } } -static void rx_drop(struct xsk_socket_info *xsk) +static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds) { unsigned int rcvd, i; u32 idx_rx = 0, idx_fq = 0; int ret; rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); - if (!rcvd) + if (!rcvd) { + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) + ret = poll(fds, num_socks, opt_timeout); return; + } ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) + ret = poll(fds, num_socks, opt_timeout); ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); } for (i = 0; i < rcvd; i++) { u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; + u64 orig = xsk_umem__extract_addr(addr); + + addr = xsk_umem__add_offset_to_addr(addr); char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); hex_dump(pkt, len, addr); - *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = addr; + *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig; } xsk_ring_prod__submit(&xsk->umem->fq, rcvd); @@ -549,42 +587,65 @@ static void rx_drop(struct xsk_socket_info *xsk) static void rx_drop_all(void) { struct pollfd fds[MAX_SOCKS + 1]; - int i, ret, timeout, nfds = 1; + int i, ret; memset(fds, 0, sizeof(fds)); for (i = 0; i < num_socks; i++) { fds[i].fd = xsk_socket__fd(xsks[i]->xsk); fds[i].events = POLLIN; - timeout = 1000; /* 1sn */ } for (;;) { if (opt_poll) { - ret = poll(fds, nfds, timeout); + ret = poll(fds, num_socks, opt_timeout); if (ret <= 0) continue; } for (i = 0; i < num_socks; i++) - rx_drop(xsks[i]); + rx_drop(xsks[i], fds); + } +} + +static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb) +{ + u32 idx; + + if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) { + unsigned int i; + + for (i = 0; i < BATCH_SIZE; i++) { + xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr = + (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; + xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len = + sizeof(pkt_data) - 1; + } + + xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE); + xsk->outstanding_tx += BATCH_SIZE; + frame_nb += BATCH_SIZE; + frame_nb %= NUM_FRAMES; } + + complete_tx_only(xsk); } -static void tx_only(struct xsk_socket_info *xsk) +static void tx_only_all(void) { - int timeout, ret, nfds = 1; - struct pollfd fds[nfds + 1]; - u32 idx, frame_nb = 0; + struct pollfd fds[MAX_SOCKS]; + u32 frame_nb[MAX_SOCKS] = {}; + int i, ret; memset(fds, 0, sizeof(fds)); - fds[0].fd = xsk_socket__fd(xsk->xsk); - fds[0].events = POLLOUT; - timeout = 1000; /* 1sn */ + for (i = 0; i < num_socks; i++) { + fds[0].fd = xsk_socket__fd(xsks[i]->xsk); + fds[0].events = POLLOUT; + } for (;;) { if (opt_poll) { - ret = poll(fds, nfds, timeout); + ret = poll(fds, num_socks, opt_timeout); if (ret <= 0) continue; @@ -592,69 +653,78 @@ static void tx_only(struct xsk_socket_info *xsk) continue; } - if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == - BATCH_SIZE) { - unsigned int i; - - for (i = 0; i < BATCH_SIZE; i++) { - xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr - = (frame_nb + i) * opt_xsk_frame_size; - xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len = - sizeof(pkt_data) - 1; - } - - xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE); - xsk->outstanding_tx += BATCH_SIZE; - frame_nb += BATCH_SIZE; - frame_nb %= NUM_FRAMES; - } - - complete_tx_only(xsk); + for (i = 0; i < num_socks; i++) + tx_only(xsks[i], frame_nb[i]); } } -static void l2fwd(struct xsk_socket_info *xsk) +static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) { - for (;;) { - unsigned int rcvd, i; - u32 idx_rx = 0, idx_tx = 0; - int ret; + unsigned int rcvd, i; + u32 idx_rx = 0, idx_tx = 0; + int ret; - for (;;) { - complete_tx_l2fwd(xsk); + complete_tx_l2fwd(xsk, fds); - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, - &idx_rx); - if (rcvd > 0) - break; - } + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); + if (!rcvd) { + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) + ret = poll(fds, num_socks, opt_timeout); + return; + } + ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); + while (ret != rcvd) { + if (ret < 0) + exit_with_error(-ret); + if (xsk_ring_prod__needs_wakeup(&xsk->tx)) + kick_tx(xsk); ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); - while (ret != rcvd) { - if (ret < 0) - exit_with_error(-ret); - ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); - } + } - for (i = 0; i < rcvd; i++) { - u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, - idx_rx)->addr; - u32 len = xsk_ring_cons__rx_desc(&xsk->rx, - idx_rx++)->len; - char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); + for (i = 0; i < rcvd; i++) { + u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; + u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; + u64 orig = xsk_umem__extract_addr(addr); - swap_mac_addresses(pkt); + addr = xsk_umem__add_offset_to_addr(addr); + char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); - hex_dump(pkt, len, addr); - xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = addr; - xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len; - } + swap_mac_addresses(pkt); + + hex_dump(pkt, len, addr); + xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig; + xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len; + } + + xsk_ring_prod__submit(&xsk->tx, rcvd); + xsk_ring_cons__release(&xsk->rx, rcvd); + + xsk->rx_npkts += rcvd; + xsk->outstanding_tx += rcvd; +} - xsk_ring_prod__submit(&xsk->tx, rcvd); - xsk_ring_cons__release(&xsk->rx, rcvd); +static void l2fwd_all(void) +{ + struct pollfd fds[MAX_SOCKS]; + int i, ret; + + memset(fds, 0, sizeof(fds)); - xsk->rx_npkts += rcvd; - xsk->outstanding_tx += rcvd; + for (i = 0; i < num_socks; i++) { + fds[i].fd = xsk_socket__fd(xsks[i]->xsk); + fds[i].events = POLLOUT | POLLIN; + } + + for (;;) { + if (opt_poll) { + ret = poll(fds, num_socks, opt_timeout); + if (ret <= 0) + continue; + } + + for (i = 0; i < num_socks; i++) + l2fwd(xsks[i], fds); } } @@ -674,11 +744,14 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */ - NUM_FRAMES * opt_xsk_frame_size); - if (ret) - exit_with_error(ret); - + /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */ + bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0); + if (bufs == MAP_FAILED) { + printf("ERROR: mmap failed\n"); + exit(EXIT_FAILURE); + } /* Create sockets... */ umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size); xsks[num_socks++] = xsk_configure_socket(umem); @@ -705,9 +778,9 @@ int main(int argc, char **argv) if (opt_bench == BENCH_RXDROP) rx_drop_all(); else if (opt_bench == BENCH_TXONLY) - tx_only(xsks[0]); + tx_only_all(); else - l2fwd(xsks[0]); + l2fwd_all(); return 0; } diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index c31193340108..0d8f41db8cd6 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -115,10 +115,12 @@ gen_btf() LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1} # dump .BTF section into raw binary file to link with final vmlinux - bin_arch=$(${OBJDUMP} -f ${1} | grep architecture | \ + bin_arch=$(LANG=C ${OBJDUMP} -f ${1} | grep architecture | \ cut -d, -f1 | cut -d' ' -f2) + bin_format=$(LANG=C ${OBJDUMP} -f ${1} | grep 'file format' | \ + awk '{print $4}') ${OBJCOPY} --dump-section .BTF=.btf.vmlinux.bin ${1} 2>/dev/null - ${OBJCOPY} -I binary -O ${CONFIG_OUTPUT_FORMAT} -B ${bin_arch} \ + ${OBJCOPY} -I binary -O ${bin_format} -B ${bin_arch} \ --rename-section .data=.BTF .btf.vmlinux.bin ${2} } diff --git a/tools/bpf/.gitignore b/tools/bpf/.gitignore index dfe2bd5a4b95..59024197e71d 100644 --- a/tools/bpf/.gitignore +++ b/tools/bpf/.gitignore @@ -1,4 +1,5 @@ FEATURE-DUMP.bpf +feature bpf_asm bpf_dbg bpf_exp.yacc.* diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 53b60ad452f5..fbf5e4a0cb9c 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -81,10 +81,11 @@ $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c clean: bpftool_clean $(call QUIET_CLEAN, bpf-progs) - $(Q)rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \ + $(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \ $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.* $(call QUIET_CLEAN, core-gen) - $(Q)rm -f $(OUTPUT)FEATURE-DUMP.bpf + $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf + $(Q)$(RM) -r -- $(OUTPUT)feature install: $(PROGS) bpftool_install $(call QUIET_INSTALL, bpf_jit_disasm) diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index 8248b8dd89d4..b13926432b84 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -3,3 +3,5 @@ bpftool*.8 bpf-helpers.* FEATURE-DUMP.bpftool +feature +libbpf diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index 6694a0fc8f99..39615f8e145b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -19,6 +19,7 @@ SYNOPSIS BTF COMMANDS ============= +| **bpftool** **btf** { **show** | **list** } [**id** *BTF_ID*] | **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*] | **bpftool** **btf help** | @@ -29,6 +30,12 @@ BTF COMMANDS DESCRIPTION =========== + **bpftool btf { show | list }** [**id** *BTF_ID*] + Show information about loaded BTF objects. If a BTF ID is + specified, show information only about given BTF object, + otherwise list all BTF objects currently loaded on the + system. + **bpftool btf dump** *BTF_SRC* Dump BTF entries from a given *BTF_SRC*. diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 61d1d270eb5e..1c0f7146aab0 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -36,6 +36,7 @@ MAP COMMANDS | **bpftool** **map pop** *MAP* | **bpftool** **map enqueue** *MAP* **value** *VALUE* | **bpftool** **map dequeue** *MAP* +| **bpftool** **map freeze** *MAP* | **bpftool** **map help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } @@ -127,6 +128,14 @@ DESCRIPTION **bpftool map dequeue** *MAP* Dequeue and print **value** from the queue. + **bpftool map freeze** *MAP* + Freeze the map as read-only from user space. Entries from a + frozen map can not longer be updated or deleted with the + **bpf\ ()** system call. This operation is not reversible, + and the map remains immutable from user space until its + destruction. However, read and write permissions for BPF + programs to the map remain unchanged. + **bpftool map help** Print short help message. diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index d8e5237a2085..8651b00b81ea 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -15,17 +15,22 @@ SYNOPSIS *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] } *COMMANDS* := - { **show** | **list** } [ **dev** name ] | **help** + { **show** | **list** | **attach** | **detach** | **help** } NET COMMANDS ============ -| **bpftool** **net { show | list } [ dev name ]** +| **bpftool** **net { show | list }** [ **dev** *NAME* ] +| **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ] +| **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME* | **bpftool** **net help** +| +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *ATTACH_TYPE* := { **xdp** | **xdpgeneric** | **xdpdrv** | **xdpoffload** } DESCRIPTION =========== - **bpftool net { show | list } [ dev name ]** + **bpftool net { show | list }** [ **dev** *NAME* ] List bpf program attachments in the kernel networking subsystem. Currently, only device driver xdp attachments and tc filter @@ -47,6 +52,24 @@ DESCRIPTION all bpf programs attached to non clsact qdiscs, and finally all bpf programs attached to root and clsact qdisc. + **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ] + Attach bpf program *PROG* to network interface *NAME* with + type specified by *ATTACH_TYPE*. Previously attached bpf program + can be replaced by the command used with **overwrite** option. + Currently, only XDP-related modes are supported for *ATTACH_TYPE*. + + *ATTACH_TYPE* can be of: + **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it; + **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb; + **xdpdrv** - Native XDP. runs earliest point in driver's receive path; + **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception; + + **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME* + Detach bpf program attached to network interface *NAME* with + type specified by *ATTACH_TYPE*. To detach bpf program, same + *ATTACH_TYPE* previously used for attach must be specified. + Currently, only XDP-related modes are supported for *ATTACH_TYPE*. + **bpftool net help** Print short help message. @@ -137,6 +160,34 @@ EXAMPLES } ] +| +| **# bpftool net attach xdpdrv id 16 dev enp6s0np0** +| **# bpftool net** + +:: + + xdp: + enp6s0np0(4) driver id 16 + +| +| **# bpftool net attach xdpdrv id 16 dev enp6s0np0** +| **# bpftool net attach xdpdrv id 20 dev enp6s0np0 overwrite** +| **# bpftool net** + +:: + + xdp: + enp6s0np0(4) driver id 20 + +| +| **# bpftool net attach xdpdrv id 16 dev enp6s0np0** +| **# bpftool net detach xdpdrv dev enp6s0np0** +| **# bpftool net** + +:: + + xdp: + SEE ALSO ======== diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 4c9d1ffc3fc7..39bc6f0f4f0b 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -17,27 +17,30 @@ endif BPF_DIR = $(srctree)/tools/lib/bpf/ ifneq ($(OUTPUT),) - BPF_PATH = $(OUTPUT) + LIBBPF_OUTPUT = $(OUTPUT)/libbpf/ + LIBBPF_PATH = $(LIBBPF_OUTPUT) else - BPF_PATH = $(BPF_DIR) + LIBBPF_PATH = $(BPF_DIR) endif -LIBBPF = $(BPF_PATH)libbpf.a +LIBBPF = $(LIBBPF_PATH)libbpf.a -BPFTOOL_VERSION := $(shell make --no-print-directory -sC ../../.. kernelversion) +BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) $(LIBBPF): FORCE - $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a + $(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT)) + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) - $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null prefix ?= /usr/local bash_compdir ?= /usr/share/bash-completion/completions CFLAGS += -O2 -CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers +CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers +CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS)) CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -I$(srctree)/kernel/bpf/ \ -I$(srctree)/tools/include \ @@ -52,7 +55,7 @@ ifneq ($(EXTRA_LDFLAGS),) LDFLAGS += $(EXTRA_LDFLAGS) endif -LIBS = -lelf -lz $(LIBBPF) +LIBS = $(LIBBPF) -lelf -lz INSTALL ?= install RM ?= rm -f @@ -114,16 +117,18 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(OUTPUT)feature.o: | zdep $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) $(OUTPUT)%.o: %.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< clean: $(LIBBPF)-clean $(call QUIET_CLEAN, bpftool) - $(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + $(Q)$(RM) -r -- $(OUTPUT)libbpf/ $(call QUIET_CLEAN, core-gen) - $(Q)$(RM) $(OUTPUT)FEATURE-DUMP.bpftool + $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool + $(Q)$(RM) -r -- $(OUTPUT)feature/ install: $(OUTPUT)bpftool $(call QUIET_INSTALL, bpftool) @@ -134,8 +139,8 @@ install: $(OUTPUT)bpftool uninstall: $(call QUIET_UNINST, bpftool) - $(Q)$(RM) $(DESTDIR)$(prefix)/sbin/bpftool - $(Q)$(RM) $(DESTDIR)$(bash_compdir)/bpftool + $(Q)$(RM) -- $(DESTDIR)$(prefix)/sbin/bpftool + $(Q)$(RM) -- $(DESTDIR)$(bash_compdir)/bpftool doc: $(call descend,Documentation) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index df16c5415444..70493a6da206 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -73,8 +73,8 @@ _bpftool_get_prog_tags() _bpftool_get_btf_ids() { - COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ - command sed -n 's/.*"btf_id": \(.*\),\?$/\1/p' )" -- "$cur" ) ) + COMPREPLY+=( $( compgen -W "$( bpftool -jp btf 2>&1 | \ + command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) } _bpftool_get_obj_map_names() @@ -201,6 +201,10 @@ _bpftool() _bpftool_get_prog_tags return 0 ;; + dev) + _sysfs_get_netdevs + return 0 + ;; file|pinned) _filedir return 0 @@ -399,10 +403,6 @@ _bpftool() _filedir return 0 ;; - dev) - _sysfs_get_netdevs - return 0 - ;; *) COMPREPLY=( $( compgen -W "map" -- "$cur" ) ) _bpftool_once_attr 'type' @@ -449,7 +449,7 @@ _bpftool() map) local MAP_TYPE='id pinned' case $command in - show|list|dump|peek|pop|dequeue) + show|list|dump|peek|pop|dequeue|freeze) case $prev in $command) COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) @@ -498,10 +498,6 @@ _bpftool() key|value|flags|name|entries) return 0 ;; - dev) - _sysfs_get_netdevs - return 0 - ;; *) _bpftool_once_attr 'type' _bpftool_once_attr 'key' @@ -642,7 +638,7 @@ _bpftool() [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'delete dump getnext help \ lookup pin event_pipe show list update create \ - peek push enqueue pop dequeue' -- \ + peek push enqueue pop dequeue freeze' -- \ "$cur" ) ) ;; esac @@ -674,7 +670,7 @@ _bpftool() map) _bpftool_get_map_ids ;; - dump) + $command) _bpftool_get_btf_ids ;; esac @@ -702,9 +698,21 @@ _bpftool() ;; esac ;; + show|list) + case $prev in + $command) + COMPREPLY+=( $( compgen -W "id" -- "$cur" ) ) + ;; + id) + _bpftool_get_btf_ids + ;; + esac + return 0 + ;; *) [[ $prev == $object ]] && \ - COMPREPLY=( $( compgen -W 'dump help' -- "$cur" ) ) + COMPREPLY=( $( compgen -W 'dump help show list' \ + -- "$cur" ) ) ;; esac ;; @@ -778,18 +786,67 @@ _bpftool() esac ;; net) + local PROG_TYPE='id pinned tag' + local ATTACH_TYPES='xdp xdpgeneric xdpdrv xdpoffload' case $command in + show|list) + [[ $prev != "$command" ]] && return 0 + COMPREPLY=( $( compgen -W 'dev' -- "$cur" ) ) + return 0 + ;; + attach) + case $cword in + 3) + COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- "$cur" ) ) + return 0 + ;; + 4) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) + return 0 + ;; + 5) + case $prev in + id) + _bpftool_get_prog_ids + ;; + pinned) + _filedir + ;; + esac + return 0 + ;; + 6) + COMPREPLY=( $( compgen -W 'dev' -- "$cur" ) ) + return 0 + ;; + 8) + _bpftool_once_attr 'overwrite' + return 0 + ;; + esac + ;; + detach) + case $cword in + 3) + COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- "$cur" ) ) + return 0 + ;; + 4) + COMPREPLY=( $( compgen -W 'dev' -- "$cur" ) ) + return 0 + ;; + esac + ;; *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'help \ - show list' -- "$cur" ) ) + show list attach detach' -- "$cur" ) ) ;; esac ;; feature) case $command in probe) - [[ $prev == "dev" ]] && _sysfs_get_netdevs && return 0 [[ $prev == "prefix" ]] && return 0 if _bpftool_search_list 'macros'; then COMPREPLY+=( $( compgen -W 'prefix' -- "$cur" ) ) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 1b8ec91899e6..9a9376d1d3df 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -11,6 +11,7 @@ #include <bpf.h> #include <libbpf.h> #include <linux/btf.h> +#include <linux/hashtable.h> #include "btf.h" #include "json_writer.h" @@ -35,6 +36,16 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_DATASEC] = "DATASEC", }; +struct btf_attach_table { + DECLARE_HASHTABLE(table, 16); +}; + +struct btf_attach_point { + __u32 obj_id; + __u32 btf_id; + struct hlist_node hash; +}; + static const char *btf_int_enc_str(__u8 encoding) { switch (encoding) { @@ -449,7 +460,7 @@ static int do_dump(int argc, char **argv) btf_id = strtoul(*argv, &endptr, 0); if (*endptr) { - p_err("can't parse %s as ID", **argv); + p_err("can't parse %s as ID", *argv); return -1; } NEXT_ARG(); @@ -522,6 +533,330 @@ done: return err; } +static int btf_parse_fd(int *argc, char ***argv) +{ + unsigned int id; + char *endptr; + int fd; + + if (!is_prefix(*argv[0], "id")) { + p_err("expected 'id', got: '%s'?", **argv); + return -1; + } + NEXT_ARGP(); + + id = strtoul(**argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as ID", **argv); + return -1; + } + NEXT_ARGP(); + + fd = bpf_btf_get_fd_by_id(id); + if (fd < 0) + p_err("can't get BTF object by id (%u): %s", + id, strerror(errno)); + + return fd; +} + +static void delete_btf_table(struct btf_attach_table *tab) +{ + struct btf_attach_point *obj; + struct hlist_node *tmp; + + unsigned int bkt; + + hash_for_each_safe(tab->table, bkt, tmp, obj, hash) { + hash_del(&obj->hash); + free(obj); + } +} + +static int +build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, + void *info, __u32 *len) +{ + static const char * const names[] = { + [BPF_OBJ_UNKNOWN] = "unknown", + [BPF_OBJ_PROG] = "prog", + [BPF_OBJ_MAP] = "map", + }; + struct btf_attach_point *obj_node; + __u32 btf_id, id = 0; + int err; + int fd; + + while (true) { + switch (type) { + case BPF_OBJ_PROG: + err = bpf_prog_get_next_id(id, &id); + break; + case BPF_OBJ_MAP: + err = bpf_map_get_next_id(id, &id); + break; + default: + err = -1; + p_err("unexpected object type: %d", type); + goto err_free; + } + if (err) { + if (errno == ENOENT) { + err = 0; + break; + } + p_err("can't get next %s: %s%s", names[type], + strerror(errno), + errno == EINVAL ? " -- kernel too old?" : ""); + goto err_free; + } + + switch (type) { + case BPF_OBJ_PROG: + fd = bpf_prog_get_fd_by_id(id); + break; + case BPF_OBJ_MAP: + fd = bpf_map_get_fd_by_id(id); + break; + default: + err = -1; + p_err("unexpected object type: %d", type); + goto err_free; + } + if (fd < 0) { + if (errno == ENOENT) + continue; + p_err("can't get %s by id (%u): %s", names[type], id, + strerror(errno)); + err = -1; + goto err_free; + } + + memset(info, 0, *len); + err = bpf_obj_get_info_by_fd(fd, info, len); + close(fd); + if (err) { + p_err("can't get %s info: %s", names[type], + strerror(errno)); + goto err_free; + } + + switch (type) { + case BPF_OBJ_PROG: + btf_id = ((struct bpf_prog_info *)info)->btf_id; + break; + case BPF_OBJ_MAP: + btf_id = ((struct bpf_map_info *)info)->btf_id; + break; + default: + err = -1; + p_err("unexpected object type: %d", type); + goto err_free; + } + if (!btf_id) + continue; + + obj_node = calloc(1, sizeof(*obj_node)); + if (!obj_node) { + p_err("failed to allocate memory: %s", strerror(errno)); + goto err_free; + } + + obj_node->obj_id = id; + obj_node->btf_id = btf_id; + hash_add(tab->table, &obj_node->hash, obj_node->btf_id); + } + + return 0; + +err_free: + delete_btf_table(tab); + return err; +} + +static int +build_btf_tables(struct btf_attach_table *btf_prog_table, + struct btf_attach_table *btf_map_table) +{ + struct bpf_prog_info prog_info; + __u32 prog_len = sizeof(prog_info); + struct bpf_map_info map_info; + __u32 map_len = sizeof(map_info); + int err = 0; + + err = build_btf_type_table(btf_prog_table, BPF_OBJ_PROG, &prog_info, + &prog_len); + if (err) + return err; + + err = build_btf_type_table(btf_map_table, BPF_OBJ_MAP, &map_info, + &map_len); + if (err) { + delete_btf_table(btf_prog_table); + return err; + } + + return 0; +} + +static void +show_btf_plain(struct bpf_btf_info *info, int fd, + struct btf_attach_table *btf_prog_table, + struct btf_attach_table *btf_map_table) +{ + struct btf_attach_point *obj; + int n; + + printf("%u: ", info->id); + printf("size %uB", info->btf_size); + + n = 0; + hash_for_each_possible(btf_prog_table->table, obj, hash, info->id) { + if (obj->btf_id == info->id) + printf("%s%u", n++ == 0 ? " prog_ids " : ",", + obj->obj_id); + } + + n = 0; + hash_for_each_possible(btf_map_table->table, obj, hash, info->id) { + if (obj->btf_id == info->id) + printf("%s%u", n++ == 0 ? " map_ids " : ",", + obj->obj_id); + } + + printf("\n"); +} + +static void +show_btf_json(struct bpf_btf_info *info, int fd, + struct btf_attach_table *btf_prog_table, + struct btf_attach_table *btf_map_table) +{ + struct btf_attach_point *obj; + + jsonw_start_object(json_wtr); /* btf object */ + jsonw_uint_field(json_wtr, "id", info->id); + jsonw_uint_field(json_wtr, "size", info->btf_size); + + jsonw_name(json_wtr, "prog_ids"); + jsonw_start_array(json_wtr); /* prog_ids */ + hash_for_each_possible(btf_prog_table->table, obj, hash, + info->id) { + if (obj->btf_id == info->id) + jsonw_uint(json_wtr, obj->obj_id); + } + jsonw_end_array(json_wtr); /* prog_ids */ + + jsonw_name(json_wtr, "map_ids"); + jsonw_start_array(json_wtr); /* map_ids */ + hash_for_each_possible(btf_map_table->table, obj, hash, + info->id) { + if (obj->btf_id == info->id) + jsonw_uint(json_wtr, obj->obj_id); + } + jsonw_end_array(json_wtr); /* map_ids */ + jsonw_end_object(json_wtr); /* btf object */ +} + +static int +show_btf(int fd, struct btf_attach_table *btf_prog_table, + struct btf_attach_table *btf_map_table) +{ + struct bpf_btf_info info = {}; + __u32 len = sizeof(info); + int err; + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get BTF object info: %s", strerror(errno)); + return -1; + } + + if (json_output) + show_btf_json(&info, fd, btf_prog_table, btf_map_table); + else + show_btf_plain(&info, fd, btf_prog_table, btf_map_table); + + return 0; +} + +static int do_show(int argc, char **argv) +{ + struct btf_attach_table btf_prog_table; + struct btf_attach_table btf_map_table; + int err, fd = -1; + __u32 id = 0; + + if (argc == 2) { + fd = btf_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + } + + if (argc) { + if (fd >= 0) + close(fd); + return BAD_ARG(); + } + + hash_init(btf_prog_table.table); + hash_init(btf_map_table.table); + err = build_btf_tables(&btf_prog_table, &btf_map_table); + if (err) { + if (fd >= 0) + close(fd); + return err; + } + + if (fd >= 0) { + err = show_btf(fd, &btf_prog_table, &btf_map_table); + close(fd); + goto exit_free; + } + + if (json_output) + jsonw_start_array(json_wtr); /* root array */ + + while (true) { + err = bpf_btf_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) { + err = 0; + break; + } + p_err("can't get next BTF object: %s%s", + strerror(errno), + errno == EINVAL ? " -- kernel too old?" : ""); + err = -1; + break; + } + + fd = bpf_btf_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; + p_err("can't get BTF object by id (%u): %s", + id, strerror(errno)); + err = -1; + break; + } + + err = show_btf(fd, &btf_prog_table, &btf_map_table); + close(fd); + if (err) + break; + } + + if (json_output) + jsonw_end_array(json_wtr); /* root array */ + +exit_free: + delete_btf_table(&btf_prog_table); + delete_btf_table(&btf_map_table); + + return err; +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -530,7 +865,8 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s btf dump BTF_SRC [format FORMAT]\n" + "Usage: %s btf { show | list } [id BTF_ID]\n" + " %s btf dump BTF_SRC [format FORMAT]\n" " %s btf help\n" "\n" " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n" @@ -539,12 +875,14 @@ static int do_help(int argc, char **argv) " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS "\n" "", - bin_name, bin_name); + bin_name, bin_name, bin_name); return 0; } static const struct cmd cmds[] = { + { "show", do_show }, + { "list", do_show }, { "help", do_help }, { "dump", do_dump }, { 0 } diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 8cafb9b31467..d66131f69689 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -26,9 +26,9 @@ static void btf_dumper_ptr(const void *data, json_writer_t *jw, bool is_plain_text) { if (is_plain_text) - jsonw_printf(jw, "%p", *(unsigned long *)data); + jsonw_printf(jw, "%p", data); else - jsonw_printf(jw, "%u", *(unsigned long *)data); + jsonw_printf(jw, "%lu", *(unsigned long *)data); } static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id, @@ -216,7 +216,7 @@ static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset, switch (BTF_INT_ENCODING(*int_type)) { case 0: if (BTF_INT_BITS(*int_type) == 64) - jsonw_printf(jw, "%lu", *(__u64 *)data); + jsonw_printf(jw, "%llu", *(__u64 *)data); else if (BTF_INT_BITS(*int_type) == 32) jsonw_printf(jw, "%u", *(__u32 *)data); else if (BTF_INT_BITS(*int_type) == 16) @@ -229,7 +229,7 @@ static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset, break; case BTF_INT_SIGNED: if (BTF_INT_BITS(*int_type) == 64) - jsonw_printf(jw, "%ld", *(long long *)data); + jsonw_printf(jw, "%lld", *(long long *)data); else if (BTF_INT_BITS(*int_type) == 32) jsonw_printf(jw, "%d", *(int *)data); else if (BTF_INT_BITS(*int_type) == 16) diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 44352b5aca85..1ef45e55039e 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -120,8 +120,8 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type, int level) { + const char *attach_flags_str; __u32 prog_ids[1024] = {0}; - char *attach_flags_str; __u32 prog_cnt, iter; __u32 attach_flags; char buf[32]; diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 6a71324be628..88264abaa738 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -29,7 +29,7 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif -void __printf(1, 2) p_err(const char *fmt, ...) +void p_err(const char *fmt, ...) { va_list ap; @@ -47,7 +47,7 @@ void __printf(1, 2) p_err(const char *fmt, ...) va_end(ap); } -void __printf(1, 2) p_info(const char *fmt, ...) +void p_info(const char *fmt, ...) { va_list ap; diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c index 6046dcab51cc..86501cd3c763 100644 --- a/tools/bpf/bpftool/json_writer.c +++ b/tools/bpf/bpftool/json_writer.c @@ -15,7 +15,6 @@ #include <malloc.h> #include <inttypes.h> #include <stdint.h> -#include <linux/compiler.h> #include "json_writer.h" @@ -153,8 +152,7 @@ void jsonw_name(json_writer_t *self, const char *name) putc(' ', self->out); } -void __printf(2, 0) -jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap) +void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap) { jsonw_eor(self); putc('"', self->out); @@ -162,7 +160,7 @@ jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap) putc('"', self->out); } -void __printf(2, 3) jsonw_printf(json_writer_t *self, const char *fmt, ...) +void jsonw_printf(json_writer_t *self, const char *fmt, ...) { va_list ap; diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h index cb9a1993681c..35cf1f00f96c 100644 --- a/tools/bpf/bpftool/json_writer.h +++ b/tools/bpf/bpftool/json_writer.h @@ -14,6 +14,7 @@ #include <stdbool.h> #include <stdint.h> #include <stdarg.h> +#include <linux/compiler.h> /* Opaque class structure */ typedef struct json_writer json_writer_t; @@ -30,8 +31,9 @@ void jsonw_pretty(json_writer_t *self, bool on); void jsonw_name(json_writer_t *self, const char *name); /* Add value */ -void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap); -void jsonw_printf(json_writer_t *self, const char *fmt, ...); +void __printf(2, 0) jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, + va_list ap); +void __printf(2, 3) jsonw_printf(json_writer_t *self, const char *fmt, ...); void jsonw_string(json_writer_t *self, const char *value); void jsonw_bool(json_writer_t *self, bool value); void jsonw_float(json_writer_t *self, double number); diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index e916ff25697f..93d008687020 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -139,7 +139,7 @@ int detect_common_prefix(const char *arg, ...) strncat(msg, "'", sizeof(msg) - strlen(msg) - 1); if (count >= 2) { - p_err(msg); + p_err("%s", msg); return -1; } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 7031a4bf87a0..af9ad56c303a 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -98,8 +98,8 @@ extern int bpf_flags; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; -void p_err(const char *fmt, ...); -void p_info(const char *fmt, ...); +void __printf(1, 2) p_err(const char *fmt, ...); +void __printf(1, 2) p_info(const char *fmt, ...); bool is_prefix(const char *pfx, const char *str); int detect_common_prefix(const char *arg, ...); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index bfbbc6b4cb83..de61d73b9030 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -481,9 +481,11 @@ static int parse_elem(char **argv, struct bpf_map_info *info, static int show_map_close_json(int fd, struct bpf_map_info *info) { - char *memlock; + char *memlock, *frozen_str; + int frozen = 0; memlock = get_fdinfo(fd, "memlock"); + frozen_str = get_fdinfo(fd, "frozen"); jsonw_start_object(json_wtr); @@ -533,6 +535,12 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) } close(fd); + if (frozen_str) { + frozen = atoi(frozen_str); + free(frozen_str); + } + jsonw_int_field(json_wtr, "frozen", frozen); + if (info->btf_id) jsonw_int_field(json_wtr, "btf_id", info->btf_id); @@ -555,9 +563,11 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) static int show_map_close_plain(int fd, struct bpf_map_info *info) { - char *memlock; + char *memlock, *frozen_str; + int frozen = 0; memlock = get_fdinfo(fd, "memlock"); + frozen_str = get_fdinfo(fd, "frozen"); printf("%u: ", info->id); if (info->type < ARRAY_SIZE(map_type_name)) @@ -610,9 +620,23 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) printf("\n\tpinned %s", obj->path); } } + printf("\n"); + + if (frozen_str) { + frozen = atoi(frozen_str); + free(frozen_str); + } + + if (!info->btf_id && !frozen) + return 0; + + printf("\t"); if (info->btf_id) - printf("\n\tbtf_id %d", info->btf_id); + printf("btf_id %d", info->btf_id); + + if (frozen) + printf("%sfrozen", info->btf_id ? " " : ""); printf("\n"); return 0; @@ -1238,6 +1262,35 @@ exit_free: return err; } +static int do_freeze(int argc, char **argv) +{ + int err, fd; + + if (!REQ_ARGS(2)) + return -1; + + fd = map_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + if (argc) { + close(fd); + return BAD_ARG(); + } + + err = bpf_map_freeze(fd); + close(fd); + if (err) { + p_err("failed to freeze map: %s", strerror(errno)); + return err; + } + + if (json_output) + jsonw_null(json_wtr); + + return 0; +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -1262,6 +1315,7 @@ static int do_help(int argc, char **argv) " %s %s pop MAP\n" " %s %s enqueue MAP value VALUE\n" " %s %s dequeue MAP\n" + " %s %s freeze MAP\n" " %s %s help\n" "\n" " " HELP_SPEC_MAP "\n" @@ -1280,7 +1334,8 @@ static int do_help(int argc, char **argv) bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2]); return 0; } @@ -1302,6 +1357,7 @@ static const struct cmd cmds[] = { { "enqueue", do_update }, { "pop", do_pop_dequeue }, { "dequeue", do_pop_dequeue }, + { "freeze", do_freeze }, { 0 } }; diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 3f108ab17797..4c5531d1a450 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -157,7 +157,7 @@ int do_event_pipe(int argc, char **argv) NEXT_ARG(); ctx.cpu = strtoul(*argv, &endptr, 0); if (*endptr) { - p_err("can't parse %s as CPU ID", **argv); + p_err("can't parse %s as CPU ID", *argv); goto err_close_map; } @@ -168,7 +168,7 @@ int do_event_pipe(int argc, char **argv) NEXT_ARG(); ctx.idx = strtoul(*argv, &endptr, 0); if (*endptr) { - p_err("can't parse %s as index", **argv); + p_err("can't parse %s as index", *argv); goto err_close_map; } diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 67e99c56bc88..4f52d3151616 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -55,6 +55,35 @@ struct bpf_attach_info { __u32 flow_dissector_id; }; +enum net_attach_type { + NET_ATTACH_TYPE_XDP, + NET_ATTACH_TYPE_XDP_GENERIC, + NET_ATTACH_TYPE_XDP_DRIVER, + NET_ATTACH_TYPE_XDP_OFFLOAD, +}; + +static const char * const attach_type_strings[] = { + [NET_ATTACH_TYPE_XDP] = "xdp", + [NET_ATTACH_TYPE_XDP_GENERIC] = "xdpgeneric", + [NET_ATTACH_TYPE_XDP_DRIVER] = "xdpdrv", + [NET_ATTACH_TYPE_XDP_OFFLOAD] = "xdpoffload", +}; + +const size_t net_attach_type_size = ARRAY_SIZE(attach_type_strings); + +static enum net_attach_type parse_attach_type(const char *str) +{ + enum net_attach_type type; + + for (type = 0; type < net_attach_type_size; type++) { + if (attach_type_strings[type] && + is_prefix(str, attach_type_strings[type])) + return type; + } + + return net_attach_type_size; +} + static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_netdev_t *netinfo = cookie; @@ -197,7 +226,7 @@ static int query_flow_dissector(struct bpf_attach_info *attach_info) fd = open("/proc/self/ns/net", O_RDONLY); if (fd < 0) { - p_err("can't open /proc/self/ns/net: %d", + p_err("can't open /proc/self/ns/net: %s", strerror(errno)); return -1; } @@ -223,6 +252,134 @@ static int query_flow_dissector(struct bpf_attach_info *attach_info) return 0; } +static int net_parse_dev(int *argc, char ***argv) +{ + int ifindex; + + if (is_prefix(**argv, "dev")) { + NEXT_ARGP(); + + ifindex = if_nametoindex(**argv); + if (!ifindex) + p_err("invalid devname %s", **argv); + + NEXT_ARGP(); + } else { + p_err("expected 'dev', got: '%s'?", **argv); + return -1; + } + + return ifindex; +} + +static int do_attach_detach_xdp(int progfd, enum net_attach_type attach_type, + int ifindex, bool overwrite) +{ + __u32 flags = 0; + + if (!overwrite) + flags = XDP_FLAGS_UPDATE_IF_NOEXIST; + if (attach_type == NET_ATTACH_TYPE_XDP_GENERIC) + flags |= XDP_FLAGS_SKB_MODE; + if (attach_type == NET_ATTACH_TYPE_XDP_DRIVER) + flags |= XDP_FLAGS_DRV_MODE; + if (attach_type == NET_ATTACH_TYPE_XDP_OFFLOAD) + flags |= XDP_FLAGS_HW_MODE; + + return bpf_set_link_xdp_fd(ifindex, progfd, flags); +} + +static int do_attach(int argc, char **argv) +{ + enum net_attach_type attach_type; + int progfd, ifindex, err = 0; + bool overwrite = false; + + /* parse attach args */ + if (!REQ_ARGS(5)) + return -EINVAL; + + attach_type = parse_attach_type(*argv); + if (attach_type == net_attach_type_size) { + p_err("invalid net attach/detach type: %s", *argv); + return -EINVAL; + } + NEXT_ARG(); + + progfd = prog_parse_fd(&argc, &argv); + if (progfd < 0) + return -EINVAL; + + ifindex = net_parse_dev(&argc, &argv); + if (ifindex < 1) { + close(progfd); + return -EINVAL; + } + + if (argc) { + if (is_prefix(*argv, "overwrite")) { + overwrite = true; + } else { + p_err("expected 'overwrite', got: '%s'?", *argv); + close(progfd); + return -EINVAL; + } + } + + /* attach xdp prog */ + if (is_prefix("xdp", attach_type_strings[attach_type])) + err = do_attach_detach_xdp(progfd, attach_type, ifindex, + overwrite); + + if (err < 0) { + p_err("interface %s attach failed: %s", + attach_type_strings[attach_type], strerror(-err)); + return err; + } + + if (json_output) + jsonw_null(json_wtr); + + return 0; +} + +static int do_detach(int argc, char **argv) +{ + enum net_attach_type attach_type; + int progfd, ifindex, err = 0; + + /* parse detach args */ + if (!REQ_ARGS(3)) + return -EINVAL; + + attach_type = parse_attach_type(*argv); + if (attach_type == net_attach_type_size) { + p_err("invalid net attach/detach type: %s", *argv); + return -EINVAL; + } + NEXT_ARG(); + + ifindex = net_parse_dev(&argc, &argv); + if (ifindex < 1) + return -EINVAL; + + /* detach xdp prog */ + progfd = -1; + if (is_prefix("xdp", attach_type_strings[attach_type])) + err = do_attach_detach_xdp(progfd, attach_type, ifindex, NULL); + + if (err < 0) { + p_err("interface %s detach failed: %s", + attach_type_strings[attach_type], strerror(-err)); + return err; + } + + if (json_output) + jsonw_null(json_wtr); + + return 0; +} + static int do_show(int argc, char **argv) { struct bpf_attach_info attach_info = {}; @@ -232,13 +389,9 @@ static int do_show(int argc, char **argv) char err_buf[256]; if (argc == 2) { - if (strcmp(argv[0], "dev") != 0) - usage(); - filter_idx = if_nametoindex(argv[1]); - if (filter_idx == 0) { - fprintf(stderr, "invalid dev name %s\n", argv[1]); + filter_idx = net_parse_dev(&argc, &argv); + if (filter_idx < 1) return -1; - } } else if (argc != 0) { usage(); } @@ -305,13 +458,20 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %s %s { show | list } [dev <devname>]\n" + " %s %s attach ATTACH_TYPE PROG dev <devname> [ overwrite ]\n" + " %s %s detach ATTACH_TYPE dev <devname>\n" " %s %s help\n" + "\n" + " " HELP_SPEC_PROGRAM "\n" + " ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n" + "\n" "Note: Only xdp and tc attachments are supported now.\n" " For progs attached to cgroups, use \"bpftool cgroup\"\n" " to dump program attachments. For program types\n" " sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n" " consult iproute2.\n", - bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2]); return 0; } @@ -319,6 +479,8 @@ static int do_help(int argc, char **argv) static const struct cmd cmds[] = { { "show", do_show }, { "list", do_show }, + { "attach", do_attach }, + { "detach", do_detach }, { "help", do_help }, { 0 } }; diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index f2a545e667c4..b2046f33e23f 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -104,6 +104,8 @@ static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type, jsonw_string_field(json_wtr, "filename", buf); jsonw_lluint_field(json_wtr, "offset", probe_offset); break; + default: + break; } jsonw_end_object(json_wtr); } @@ -140,6 +142,8 @@ static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type, printf("uretprobe filename %s offset %llu\n", buf, probe_offset); break; + default: + break; } } diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 0d35f18006a1..95c072b70d0e 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -6,9 +6,11 @@ /* * Common definitions for all gcc versions go here. */ +#ifndef GCC_VERSION #define GCC_VERSION (__GNUC__ * 10000 \ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) +#endif #if GCC_VERSION >= 70000 && !defined(__CHECKER__) # define __fallthrough __attribute__ ((fallthrough)) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0e66371bea13..77c6be96d676 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -106,6 +106,7 @@ enum bpf_cmd { BPF_TASK_FD_QUERY, BPF_MAP_LOOKUP_AND_DELETE_ELEM, BPF_MAP_FREEZE, + BPF_BTF_GET_NEXT_ID, }; enum bpf_map_type { @@ -284,6 +285,9 @@ enum bpf_attach_type { */ #define BPF_F_TEST_RND_HI32 (1U << 2) +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_STATE_FREQ (1U << 3) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * two extensions: * @@ -337,6 +341,9 @@ enum bpf_attach_type { #define BPF_F_RDONLY_PROG (1U << 7) #define BPF_F_WRONLY_PROG (1U << 8) +/* Clone map from listener for newly accepted socket */ +#define BPF_F_CLONE (1U << 9) + /* flags for BPF_PROG_QUERY */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) @@ -576,6 +583,8 @@ union bpf_attr { * limited to five). * * Each time the helper is called, it appends a line to the trace. + * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is + * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. * The format of the trace is customizable, and the exact output * one will get depends on the options set in * *\/sys/kernel/debug/tracing/trace_options* (see also the @@ -1014,7 +1023,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1076,7 +1085,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags) + * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1725,7 +1734,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_override_return(struct pt_reg *regs, u64 rc) + * int bpf_override_return(struct pt_regs *regs, u64 rc) * Description * Used for error injection, this helper uses kprobes to override * the return value of the probed function, and to set it to *rc*. diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index faaa5ca2a117..be328c59389d 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -16,6 +16,18 @@ #define XDP_SHARED_UMEM (1 << 0) #define XDP_COPY (1 << 1) /* Force copy-mode */ #define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */ +/* If this option is set, the driver might go sleep and in that case + * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be + * set. If it is set, the application need to explicitly wake up the + * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are + * running the driver and the application on the same core, you should + * use this option so that the kernel will yield to the user space + * application. + */ +#define XDP_USE_NEED_WAKEUP (1 << 3) + +/* Flags for xsk_umem_config flags */ +#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) struct sockaddr_xdp { __u16 sxdp_family; @@ -25,10 +37,14 @@ struct sockaddr_xdp { __u32 sxdp_shared_umem_fd; }; +/* XDP_RING flags */ +#define XDP_RING_NEED_WAKEUP (1 << 0) + struct xdp_ring_offset { __u64 producer; __u64 consumer; __u64 desc; + __u64 flags; }; struct xdp_mmap_offsets { @@ -53,6 +69,7 @@ struct xdp_umem_reg { __u64 len; /* Length of packet data area */ __u32 chunk_size; __u32 headroom; + __u32 flags; }; struct xdp_statistics { @@ -74,6 +91,11 @@ struct xdp_options { #define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL #define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL +/* Masks for unaligned chunks mode */ +#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48 +#define XSK_UNALIGNED_BUF_ADDR_MASK \ + ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) + /* Rx/Tx descriptor */ struct xdp_desc { __u64 addr; diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 9312066a1ae3..c6f94cffe06e 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -1,9 +1,10 @@ # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) # Most of this file is copied from tools/lib/traceevent/Makefile -BPF_VERSION = 0 -BPF_PATCHLEVEL = 0 -BPF_EXTRAVERSION = 4 +LIBBPF_VERSION := $(shell \ + grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \ + sort -rV | head -n1 | cut -d'_' -f2) +LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION))) MAKEFLAGS += --no-print-directory @@ -79,15 +80,9 @@ export prefix libdir src obj libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -VERSION = $(BPF_VERSION) -PATCHLEVEL = $(BPF_PATCHLEVEL) -EXTRAVERSION = $(BPF_EXTRAVERSION) - OBJ = $@ N = -LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) - LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION) LIB_FILE = libbpf.a libbpf.so* PC_FILE = libbpf.pc @@ -113,6 +108,7 @@ override CFLAGS += -Werror -Wall override CFLAGS += -fPIC override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden +override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 ifeq ($(VERBOSE),1) Q = @@ -138,7 +134,9 @@ LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE)) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}') + cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}' | \ + sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) @@ -178,10 +176,10 @@ $(BPF_IN): force elfdep bpfdep $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN) - $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(VERSION) \ + $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so - @ln -sf $(@F) $(OUTPUT)libbpf.so.$(VERSION) + @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION) $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ @@ -205,6 +203,7 @@ check_abi: $(OUTPUT)libbpf.so "Please make sure all LIBBPF_API symbols are" \ "versioned in $(VERSION_SCRIPT)." >&2; \ readelf -s --wide $(OUTPUT)libbpf-in.o | \ + cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}'| \ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf -s --wide $(OUTPUT)libbpf.so | \ @@ -257,7 +256,8 @@ config-clean: clean: $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ - *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd *.pc LIBBPF-CFLAGS + *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \ + *.pc LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index c7d7993c44bb..cbb933532981 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -568,7 +568,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) return ret; } -int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) +static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd) { union bpf_attr attr; int err; @@ -576,26 +576,26 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) memset(&attr, 0, sizeof(attr)); attr.start_id = start_id; - err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr)); + err = sys_bpf(cmd, &attr, sizeof(attr)); if (!err) *next_id = attr.next_id; return err; } -int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) +int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) { - union bpf_attr attr; - int err; - - memset(&attr, 0, sizeof(attr)); - attr.start_id = start_id; + return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID); +} - err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr)); - if (!err) - *next_id = attr.next_id; +int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) +{ + return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID); +} - return err; +int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id) +{ + return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID); } int bpf_prog_get_fd_by_id(__u32 id) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index ff42ca043dc8..0db01334740f 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -156,6 +156,7 @@ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 *retval, __u32 *duration); LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); +LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); LIBBPF_API int bpf_map_get_fd_by_id(__u32 id); LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f9d316e873d8..d04c7cb623ed 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -183,4 +183,10 @@ LIBBPF_0.0.4 { perf_buffer__new; perf_buffer__new_raw; perf_buffer__poll; + xsk_umem__create; } LIBBPF_0.0.3; + +LIBBPF_0.0.5 { + global: + bpf_btf_get_next_id; +} LIBBPF_0.0.4; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 680e63066cf3..842c4fd55859 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -74,23 +74,6 @@ struct xsk_nl_info { int fd; }; -/* For 32-bit systems, we need to use mmap2 as the offsets are 64-bit. - * Unfortunately, it is not part of glibc. - */ -static inline void *xsk_mmap(void *addr, size_t length, int prot, int flags, - int fd, __u64 offset) -{ -#ifdef __NR_mmap2 - unsigned int page_shift = __builtin_ffs(getpagesize()) - 1; - long ret = syscall(__NR_mmap2, addr, length, prot, flags, fd, - (off_t)(offset >> page_shift)); - - return (void *)ret; -#else - return mmap(addr, length, prot, flags, fd, offset); -#endif -} - int xsk_umem__fd(const struct xsk_umem *umem) { return umem ? umem->fd : -EINVAL; @@ -116,6 +99,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg, cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; + cfg->flags = XSK_UMEM__DEFAULT_FLAGS; return; } @@ -123,6 +107,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg, cfg->comp_size = usr_cfg->comp_size; cfg->frame_size = usr_cfg->frame_size; cfg->frame_headroom = usr_cfg->frame_headroom; + cfg->flags = usr_cfg->flags; } static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, @@ -149,9 +134,10 @@ static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, return 0; } -int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, - struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, - const struct xsk_umem_config *usr_config) +int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, + __u64 size, struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *usr_config) { struct xdp_mmap_offsets off; struct xdp_umem_reg mr; @@ -182,6 +168,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, mr.len = size; mr.chunk_size = umem->config.frame_size; mr.headroom = umem->config.frame_headroom; + mr.flags = umem->config.flags; err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); if (err) { @@ -210,10 +197,9 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, goto out_socket; } - map = xsk_mmap(NULL, off.fr.desc + - umem->config.fill_size * sizeof(__u64), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, - umem->fd, XDP_UMEM_PGOFF_FILL_RING); + map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd, + XDP_UMEM_PGOFF_FILL_RING); if (map == MAP_FAILED) { err = -errno; goto out_socket; @@ -224,13 +210,13 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, fill->size = umem->config.fill_size; fill->producer = map + off.fr.producer; fill->consumer = map + off.fr.consumer; + fill->flags = map + off.fr.flags; fill->ring = map + off.fr.desc; fill->cached_cons = umem->config.fill_size; - map = xsk_mmap(NULL, - off.cr.desc + umem->config.comp_size * sizeof(__u64), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, - umem->fd, XDP_UMEM_PGOFF_COMPLETION_RING); + map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd, + XDP_UMEM_PGOFF_COMPLETION_RING); if (map == MAP_FAILED) { err = -errno; goto out_mmap; @@ -241,6 +227,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, comp->size = umem->config.comp_size; comp->producer = map + off.cr.producer; comp->consumer = map + off.cr.consumer; + comp->flags = map + off.cr.flags; comp->ring = map + off.cr.desc; *umem_ptr = umem; @@ -255,6 +242,29 @@ out_umem_alloc: return err; } +struct xsk_umem_config_v1 { + __u32 fill_size; + __u32 comp_size; + __u32 frame_size; + __u32 frame_headroom; +}; + +int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, + __u64 size, struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *usr_config) +{ + struct xsk_umem_config config; + + memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1)); + config.flags = 0; + + return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, + &config); +} +asm(".symver xsk_umem__create_v0_0_2, xsk_umem__create@LIBBPF_0.0.2"); +asm(".symver xsk_umem__create_v0_0_4, xsk_umem__create@@LIBBPF_0.0.4"); + static int xsk_load_xdp_prog(struct xsk_socket *xsk) { static const int log_buf_size = 16 * 1024; @@ -550,11 +560,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, } if (rx) { - rx_map = xsk_mmap(NULL, off.rx.desc + - xsk->config.rx_size * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, - xsk->fd, XDP_PGOFF_RX_RING); + rx_map = mmap(NULL, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_RX_RING); if (rx_map == MAP_FAILED) { err = -errno; goto out_socket; @@ -564,16 +573,16 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, rx->size = xsk->config.rx_size; rx->producer = rx_map + off.rx.producer; rx->consumer = rx_map + off.rx.consumer; + rx->flags = rx_map + off.rx.flags; rx->ring = rx_map + off.rx.desc; } xsk->rx = rx; if (tx) { - tx_map = xsk_mmap(NULL, off.tx.desc + - xsk->config.tx_size * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, - xsk->fd, XDP_PGOFF_TX_RING); + tx_map = mmap(NULL, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_TX_RING); if (tx_map == MAP_FAILED) { err = -errno; goto out_mmap_rx; @@ -583,6 +592,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, tx->size = xsk->config.tx_size; tx->producer = tx_map + off.tx.producer; tx->consumer = tx_map + off.tx.consumer; + tx->flags = tx_map + off.tx.flags; tx->ring = tx_map + off.tx.desc; tx->cached_cons = xsk->config.tx_size; } diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 833a6e60d065..584f6820a639 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -32,6 +32,7 @@ struct name { \ __u32 *producer; \ __u32 *consumer; \ void *ring; \ + __u32 *flags; \ } DEFINE_XSK_RING(xsk_ring_prod); @@ -76,6 +77,11 @@ xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) return &descs[idx & rx->mask]; } +static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r) +{ + return *r->flags & XDP_RING_NEED_WAKEUP; +} + static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) { __u32 free_entries = r->cached_cons - r->cached_prod; @@ -162,6 +168,21 @@ static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) return &((char *)umem_area)[addr]; } +static inline __u64 xsk_umem__extract_addr(__u64 addr) +{ + return addr & XSK_UNALIGNED_BUF_ADDR_MASK; +} + +static inline __u64 xsk_umem__extract_offset(__u64 addr) +{ + return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT; +} + +static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) +{ + return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr); +} + LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); @@ -170,12 +191,14 @@ LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); #define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */ #define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) #define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 +#define XSK_UMEM__DEFAULT_FLAGS 0 struct xsk_umem_config { __u32 fill_size; __u32 comp_size; __u32 frame_size; __u32 frame_headroom; + __u32 flags; }; /* Flags for the libbpf_flags field. */ @@ -195,6 +218,16 @@ LIBBPF_API int xsk_umem__create(struct xsk_umem **umem, struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, const struct xsk_umem_config *config); +LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, const char *ifname, __u32 queue_id, struct xsk_umem *umem, diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 90f70d2c7c22..60c9338cd9b4 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -42,4 +42,5 @@ xdping test_sockopt test_sockopt_sk test_sockopt_multi +test_sockopt_inherit test_tcp_rtt diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index d69c541e2039..9eef5edf17be 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -29,7 +29,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_cgroup_storage test_select_reuseport test_section_names \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \ - test_sockopt_multi test_tcp_rtt + test_sockopt_multi test_sockopt_inherit test_tcp_rtt BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) TEST_GEN_FILES = $(BPF_OBJ_FILES) @@ -66,7 +66,8 @@ TEST_PROGS := test_kmod.sh \ test_tcp_check_syncookie.sh \ test_tc_tunnel.sh \ test_tc_edt.sh \ - test_xdping.sh + test_xdping.sh \ + test_bpftool_build.sh TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ @@ -115,6 +116,7 @@ $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c $(OUTPUT)/test_sockopt: cgroup_helpers.c $(OUTPUT)/test_sockopt_sk: cgroup_helpers.c $(OUTPUT)/test_sockopt_multi: cgroup_helpers.c +$(OUTPUT)/test_sockopt_inherit: cgroup_helpers.c $(OUTPUT)/test_tcp_rtt: cgroup_helpers.c .PHONY: force diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h index 05f036df8a4c..fbe28008450f 100644 --- a/tools/testing/selftests/bpf/bpf_endian.h +++ b/tools/testing/selftests/bpf/bpf_endian.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ #ifndef __BPF_ENDIAN__ #define __BPF_ENDIAN__ @@ -29,6 +29,10 @@ # define __bpf_htonl(x) __builtin_bswap32(x) # define __bpf_constant_ntohl(x) ___constant_swab32(x) # define __bpf_constant_htonl(x) ___constant_swab32(x) +# define __bpf_be64_to_cpu(x) __builtin_bswap64(x) +# define __bpf_cpu_to_be64(x) __builtin_bswap64(x) +# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x) +# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x) #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ # define __bpf_ntohs(x) (x) # define __bpf_htons(x) (x) @@ -38,6 +42,10 @@ # define __bpf_htonl(x) (x) # define __bpf_constant_ntohl(x) (x) # define __bpf_constant_htonl(x) (x) +# define __bpf_be64_to_cpu(x) (x) +# define __bpf_cpu_to_be64(x) (x) +# define __bpf_constant_be64_to_cpu(x) (x) +# define __bpf_constant_cpu_to_be64(x) (x) #else # error "Fix your compiler's __BYTE_ORDER__?!" #endif @@ -54,5 +62,11 @@ #define bpf_ntohl(x) \ (__builtin_constant_p(x) ? \ __bpf_constant_ntohl(x) : __bpf_ntohl(x)) +#define bpf_cpu_to_be64(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x)) +#define bpf_be64_to_cpu(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x)) #endif /* __BPF_ENDIAN__ */ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 8b503ea142f0..6c4930bc6e2e 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ #ifndef __BPF_HELPERS_H #define __BPF_HELPERS_H diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index fb5840a62548..f10029821e16 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -48,16 +48,17 @@ void test_bpf_obj_id(void) /* test_obj_id.o is a dumb prog. It should never fail * to load. */ - if (err) - error_cnt++; - assert(!err); + if (CHECK_FAIL(err)) + continue; /* Insert a magic value to the map */ map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); - assert(map_fds[i] >= 0); + if (CHECK_FAIL(map_fds[i] < 0)) + goto done; err = bpf_map_update_elem(map_fds[i], &array_key, &array_magic_value, 0); - assert(!err); + if (CHECK_FAIL(err)) + goto done; /* Check getting map info */ info_len = sizeof(struct bpf_map_info) * 2; @@ -96,9 +97,11 @@ void test_bpf_obj_id(void) prog_infos[i].map_ids = ptr_to_u64(map_ids + i); prog_infos[i].nr_map_ids = 2; err = clock_gettime(CLOCK_REALTIME, &real_time_ts); - assert(!err); + if (CHECK_FAIL(err)) + goto done; err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts); - assert(!err); + if (CHECK_FAIL(err)) + goto done; err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i], &info_len); load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec) @@ -224,7 +227,8 @@ void test_bpf_obj_id(void) nr_id_found++; err = bpf_map_lookup_elem(map_fd, &array_key, &array_value); - assert(!err); + if (CHECK_FAIL(err)) + goto done; err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); CHECK(err || info_len != sizeof(struct bpf_map_info) || diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 1a1eae356f81..1c01ee2600a9 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -28,8 +28,6 @@ static int check_load(const char *file, enum bpf_prog_type type) attr.prog_flags = BPF_F_TEST_RND_HI32; err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); bpf_object__close(obj); - if (err) - error_cnt++; return err; } @@ -105,12 +103,7 @@ void test_bpf_verif_scale(void) continue; err = check_load(test->file, test->attach_type); - if (test->fails) { /* expected to fail */ - if (err) - error_cnt--; - else - error_cnt++; - } + CHECK_FAIL(err && !test->fails); } if (env.verifier_stats) diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 6892b88ae065..92563898867c 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -344,7 +344,6 @@ struct test tests[] = { .tcp.dest = 8080, }, .keys = { - .nhoff = 0, .nhoff = ETH_HLEN, .thoff = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct iphdr), @@ -452,10 +451,8 @@ void test_flow_dissector(void) err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector", "jmp_table", "last_dissection", &prog_fd, &keys_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_flow_keys flow_keys; diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c index 3d59b3c841fe..eba9a970703b 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c @@ -135,10 +135,7 @@ void test_get_stack_raw_tp(void) exp_cnt -= err; } - goto close_prog_noerr; close_prog: - error_cnt++; -close_prog_noerr: if (!IS_ERR_OR_NULL(link)) bpf_link__destroy(link); if (!IS_ERR_OR_NULL(pb)) diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c index d011079fb0bf..c680926fce73 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -7,10 +7,8 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration) uint64_t num; map_fd = bpf_find_map(__func__, obj, "result_number"); - if (map_fd < 0) { - error_cnt++; + if (CHECK_FAIL(map_fd < 0)) return; - } struct { char *name; @@ -44,10 +42,8 @@ static void test_global_data_string(struct bpf_object *obj, __u32 duration) char str[32]; map_fd = bpf_find_map(__func__, obj, "result_string"); - if (map_fd < 0) { - error_cnt++; + if (CHECK_FAIL(map_fd < 0)) return; - } struct { char *name; @@ -81,10 +77,8 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration) struct foo val; map_fd = bpf_find_map(__func__, obj, "result_struct"); - if (map_fd < 0) { - error_cnt++; + if (CHECK_FAIL(map_fd < 0)) return; - } struct { char *name; @@ -112,16 +106,12 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration) __u8 *buff; map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); - if (!map || !bpf_map__is_internal(map)) { - error_cnt++; + if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) return; - } map_fd = bpf_map__fd(map); - if (map_fd < 0) { - error_cnt++; + if (CHECK_FAIL(map_fd < 0)) return; - } buff = malloc(bpf_map__def(map)->value_size); if (buff) diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c index 20ddca830e68..eaf64595be88 100644 --- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c @@ -30,10 +30,8 @@ static void test_l4lb(const char *file) u32 *magic = (u32 *)buf; err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } map_fd = bpf_find_map(__func__, obj, "vip_map"); if (map_fd < 0) @@ -72,10 +70,9 @@ static void test_l4lb(const char *file) bytes += stats[i].bytes; pkts += stats[i].pkts; } - if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { - error_cnt++; + if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 || + pkts != NUM_ITER * 2)) printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts); - } out: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c index ee99368c595c..8f91f1881d11 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c @@ -8,14 +8,12 @@ static void *parallel_map_access(void *arg) for (i = 0; i < 10000; i++) { err = bpf_map_lookup_elem_flags(map_fd, &key, vars, BPF_F_LOCK); - if (err) { + if (CHECK_FAIL(err)) { printf("lookup failed\n"); - error_cnt++; goto out; } - if (vars[0] != 0) { + if (CHECK_FAIL(vars[0] != 0)) { printf("lookup #%d var[0]=%d\n", i, vars[0]); - error_cnt++; goto out; } rnd = vars[1]; @@ -24,7 +22,7 @@ static void *parallel_map_access(void *arg) continue; printf("lookup #%d var[1]=%d var[%d]=%d\n", i, rnd, j, vars[j]); - error_cnt++; + CHECK_FAIL(vars[j] != rnd); goto out; } } @@ -42,34 +40,36 @@ void test_map_lock(void) void *ret; err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); - if (err) { + if (CHECK_FAIL(err)) { printf("test_map_lock:bpf_prog_load errno %d\n", errno); goto close_prog; } map_fd[0] = bpf_find_map(__func__, obj, "hash_map"); - if (map_fd[0] < 0) + if (CHECK_FAIL(map_fd[0] < 0)) goto close_prog; map_fd[1] = bpf_find_map(__func__, obj, "array_map"); - if (map_fd[1] < 0) + if (CHECK_FAIL(map_fd[1] < 0)) goto close_prog; bpf_map_update_elem(map_fd[0], &key, vars, BPF_F_LOCK); for (i = 0; i < 4; i++) - assert(pthread_create(&thread_id[i], NULL, - &spin_lock_thread, &prog_fd) == 0); + if (CHECK_FAIL(pthread_create(&thread_id[i], NULL, + &spin_lock_thread, &prog_fd))) + goto close_prog; for (i = 4; i < 6; i++) - assert(pthread_create(&thread_id[i], NULL, - ¶llel_map_access, &map_fd[i - 4]) == 0); + if (CHECK_FAIL(pthread_create(&thread_id[i], NULL, + ¶llel_map_access, + &map_fd[i - 4]))) + goto close_prog; for (i = 0; i < 4; i++) - assert(pthread_join(thread_id[i], &ret) == 0 && - ret == (void *)&prog_fd); + if (CHECK_FAIL(pthread_join(thread_id[i], &ret) || + ret != (void *)&prog_fd)) + goto close_prog; for (i = 4; i < 6; i++) - assert(pthread_join(thread_id[i], &ret) == 0 && - ret == (void *)&map_fd[i - 4]); - goto close_prog_noerr; + if (CHECK_FAIL(pthread_join(thread_id[i], &ret) || + ret != (void *)&map_fd[i - 4])) + goto close_prog; close_prog: - error_cnt++; -close_prog_noerr: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c index 4ecfd721a044..a2537dfa899c 100644 --- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c +++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c @@ -9,10 +9,8 @@ void test_pkt_access(void) int err, prog_fd; err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4), NULL, NULL, &retval, &duration); diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c index ac0d43435806..5f7aea605019 100644 --- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c +++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c @@ -9,10 +9,8 @@ void test_pkt_md_access(void) int err, prog_fd; err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4), NULL, NULL, &retval, &duration); diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c index e60cd5ff1f55..faccc66f4e39 100644 --- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c @@ -27,10 +27,8 @@ static void test_queue_stack_map_by_type(int type) return; err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } map_in_fd = bpf_find_map(__func__, obj, "map_in"); if (map_in_fd < 0) @@ -43,10 +41,8 @@ static void test_queue_stack_map_by_type(int type) /* Push 32 elements to the input map */ for (i = 0; i < MAP_SIZE; i++) { err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) goto out; - } } /* The eBPF program pushes iph.saddr in the output map, diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index 4a4f428d1a78..5c78e2b5a917 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -10,10 +10,8 @@ void test_reference_tracking(void) int err = 0; obj = bpf_object__open(file); - if (IS_ERR(obj)) { - error_cnt++; + if (CHECK_FAIL(IS_ERR(obj))) return; - } bpf_object__for_each_program(prog, obj) { const char *title; diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 1575f0a1f586..b607112c64e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -8,7 +8,7 @@ static void sigusr1_handler(int signum) sigusr1_received++; } -static int test_send_signal_common(struct perf_event_attr *attr, +static void test_send_signal_common(struct perf_event_attr *attr, int prog_type, const char *test_name) { @@ -23,13 +23,13 @@ static int test_send_signal_common(struct perf_event_attr *attr, if (CHECK(pipe(pipe_c2p), test_name, "pipe pipe_c2p error: %s\n", strerror(errno))) - goto no_fork_done; + return; if (CHECK(pipe(pipe_p2c), test_name, "pipe pipe_p2c error: %s\n", strerror(errno))) { close(pipe_c2p[0]); close(pipe_c2p[1]); - goto no_fork_done; + return; } pid = fork(); @@ -38,7 +38,7 @@ static int test_send_signal_common(struct perf_event_attr *attr, close(pipe_c2p[1]); close(pipe_p2c[0]); close(pipe_p2c[1]); - goto no_fork_done; + return; } if (pid == 0) { @@ -125,7 +125,7 @@ static int test_send_signal_common(struct perf_event_attr *attr, goto disable_pmu; } - err = CHECK(buf[0] != '2', test_name, "incorrect result\n"); + CHECK(buf[0] != '2', test_name, "incorrect result\n"); /* notify child safe to exit */ write(pipe_p2c[1], buf, 1); @@ -138,11 +138,9 @@ prog_load_failure: close(pipe_c2p[0]); close(pipe_p2c[1]); wait(NULL); -no_fork_done: - return err; } -static int test_send_signal_tracepoint(void) +static void test_send_signal_tracepoint(void) { const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id"; struct perf_event_attr attr = { @@ -159,21 +157,21 @@ static int test_send_signal_tracepoint(void) if (CHECK(efd < 0, "tracepoint", "open syscalls/sys_enter_nanosleep/id failure: %s\n", strerror(errno))) - return -1; + return; bytes = read(efd, buf, sizeof(buf)); close(efd); if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint", "read syscalls/sys_enter_nanosleep/id failure: %s\n", strerror(errno))) - return -1; + return; attr.config = strtol(buf, NULL, 0); - return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint"); + test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint"); } -static int test_send_signal_perf(void) +static void test_send_signal_perf(void) { struct perf_event_attr attr = { .sample_period = 1, @@ -181,11 +179,11 @@ static int test_send_signal_perf(void) .config = PERF_COUNT_SW_CPU_CLOCK, }; - return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, - "perf_sw_event"); + test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, + "perf_sw_event"); } -static int test_send_signal_nmi(void) +static void test_send_signal_nmi(void) { struct perf_event_attr attr = { .sample_freq = 50, @@ -204,25 +202,24 @@ static int test_send_signal_nmi(void) if (errno == ENOENT) { printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); - return 0; + test__skip(); + return; } /* Let the test fail with a more informative message */ } else { close(pmu_fd); } - return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, - "perf_hw_event"); + test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, + "perf_hw_event"); } void test_send_signal(void) { - int ret = 0; - if (test__start_subtest("send_signal_tracepoint")) - ret |= test_send_signal_tracepoint(); + test_send_signal_tracepoint(); if (test__start_subtest("send_signal_perf")) - ret |= test_send_signal_perf(); + test_send_signal_perf(); if (test__start_subtest("send_signal_nmi")) - ret |= test_send_signal_nmi(); + test_send_signal_nmi(); } diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c index 114ebe6a438e..1ae00cd3174e 100644 --- a/tools/testing/selftests/bpf/prog_tests/spinlock.c +++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c @@ -11,19 +11,19 @@ void test_spinlock(void) void *ret; err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); - if (err) { + if (CHECK_FAIL(err)) { printf("test_spin_lock:bpf_prog_load errno %d\n", errno); goto close_prog; } for (i = 0; i < 4; i++) - assert(pthread_create(&thread_id[i], NULL, - &spin_lock_thread, &prog_fd) == 0); + if (CHECK_FAIL(pthread_create(&thread_id[i], NULL, + &spin_lock_thread, &prog_fd))) + goto close_prog; + for (i = 0; i < 4; i++) - assert(pthread_join(thread_id[i], &ret) == 0 && - ret == (void *)&prog_fd); - goto close_prog_noerr; + if (CHECK_FAIL(pthread_join(thread_id[i], &ret) || + ret != (void *)&prog_fd)) + goto close_prog; close_prog: - error_cnt++; -close_prog_noerr: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c index ac44fda84833..d841dced971f 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c @@ -51,9 +51,10 @@ retry: "err %d errno %d\n", err, errno)) goto disable_pmu; - assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") - == 0); - assert(system("./urandom_read") == 0); + if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null"))) + goto disable_pmu; + if (CHECK_FAIL(system("./urandom_read"))) + goto disable_pmu; /* disable stack trace collection */ key = 0; val = 1; diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 9557b7dfb782..f62aa0eb959b 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -82,9 +82,10 @@ retry: "err %d errno %d\n", err, errno)) goto disable_pmu; - assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") - == 0); - assert(system("taskset 0x1 ./urandom_read 100000") == 0); + if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null"))) + goto disable_pmu; + if (CHECK_FAIL(system("taskset 0x1 ./urandom_read 100000"))) + goto disable_pmu; /* disable stack trace collection */ key = 0; val = 1; diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c index fc539335c5b3..37269d23df93 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c @@ -26,19 +26,19 @@ void test_stacktrace_map(void) /* find map fds */ control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (control_map_fd < 0) + if (CHECK_FAIL(control_map_fd < 0)) goto disable_pmu; stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (stackid_hmap_fd < 0) + if (CHECK_FAIL(stackid_hmap_fd < 0)) goto disable_pmu; stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (stackmap_fd < 0) + if (CHECK_FAIL(stackmap_fd < 0)) goto disable_pmu; stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); - if (stack_amap_fd < 0) + if (CHECK_FAIL(stack_amap_fd < 0)) goto disable_pmu; /* give some time for bpf program run */ @@ -55,23 +55,20 @@ void test_stacktrace_map(void) err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", "err %d errno %d\n", err, errno)) - goto disable_pmu_noerr; + goto disable_pmu; err = compare_map_keys(stackmap_fd, stackid_hmap_fd); if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", "err %d errno %d\n", err, errno)) - goto disable_pmu_noerr; + goto disable_pmu; stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap", "err %d errno %d\n", err, errno)) - goto disable_pmu_noerr; + goto disable_pmu; - goto disable_pmu_noerr; disable_pmu: - error_cnt++; -disable_pmu_noerr: bpf_link__destroy(link); close_prog: bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c index fbfa8e76cf63..404a5498e1a3 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c @@ -26,15 +26,15 @@ void test_stacktrace_map_raw_tp(void) /* find map fds */ control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (control_map_fd < 0) + if (CHECK_FAIL(control_map_fd < 0)) goto close_prog; stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (stackid_hmap_fd < 0) + if (CHECK_FAIL(stackid_hmap_fd < 0)) goto close_prog; stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (stackmap_fd < 0) + if (CHECK_FAIL(stackmap_fd < 0)) goto close_prog; /* give some time for bpf program run */ @@ -58,10 +58,7 @@ void test_stacktrace_map_raw_tp(void) "err %d errno %d\n", err, errno)) goto close_prog; - goto close_prog_noerr; close_prog: - error_cnt++; -close_prog_noerr: if (!IS_ERR_OR_NULL(link)) bpf_link__destroy(link); bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c index 958a3d88de99..1bdc1d86a50c 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c +++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c @@ -70,9 +70,6 @@ void test_task_fd_query_rawtp(void) if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len)) goto close_prog; - goto close_prog_noerr; close_prog: - error_cnt++; -close_prog_noerr: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c index f9b70e81682b..3f131b8fe328 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c @@ -62,14 +62,9 @@ static void test_task_fd_query_tp_core(const char *probe_name, fd_type, buf)) goto close_pmu; - close(pmu_fd); - goto close_prog_noerr; - close_pmu: close(pmu_fd); close_prog: - error_cnt++; -close_prog_noerr: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c index bb8759d69099..594307dffd13 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c @@ -10,10 +10,8 @@ void test_tcp_estats(void) err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); CHECK(err, "", "err %d errno %d\n", err, errno); - if (err) { - error_cnt++; + if (err) return; - } bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c index a74167289545..dcb5ecac778e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp.c @@ -16,10 +16,8 @@ void test_xdp(void) int err, prog_fd, map_fd; err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } map_fd = bpf_find_map(__func__, obj, "vip2tnl"); if (map_fd < 0) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index 922aa0a19764..3744196d7cba 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -10,10 +10,8 @@ void test_xdp_adjust_tail(void) int err, prog_fd; err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c index 15f7c272edb0..c9404e6b226e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c @@ -31,10 +31,8 @@ void test_xdp_noinline(void) u32 *magic = (u32 *)buf; err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } map_fd = bpf_find_map(__func__, obj, "vip_map"); if (map_fd < 0) @@ -73,8 +71,8 @@ void test_xdp_noinline(void) bytes += stats[i].bytes; pkts += stats[i].pkts; } - if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { - error_cnt++; + if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 || + pkts != NUM_ITER * 2)) { printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts); } diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c new file mode 100644 index 000000000000..dede0fcd6102 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include "bpf_helpers.h" + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; + +#define SOL_CUSTOM 0xdeadbeef +#define CUSTOM_INHERIT1 0 +#define CUSTOM_INHERIT2 1 +#define CUSTOM_LISTENER 2 + +struct sockopt_inherit { + __u8 val; +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE); + __type(key, int); + __type(value, struct sockopt_inherit); +} cloned1_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE); + __type(key, int); + __type(value, struct sockopt_inherit); +} cloned2_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct sockopt_inherit); +} listener_only_map SEC(".maps"); + +static __inline struct sockopt_inherit *get_storage(struct bpf_sockopt *ctx) +{ + if (ctx->optname == CUSTOM_INHERIT1) + return bpf_sk_storage_get(&cloned1_map, ctx->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + else if (ctx->optname == CUSTOM_INHERIT2) + return bpf_sk_storage_get(&cloned2_map, ctx->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + else + return bpf_sk_storage_get(&listener_only_map, ctx->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); +} + +SEC("cgroup/getsockopt") +int _getsockopt(struct bpf_sockopt *ctx) +{ + __u8 *optval_end = ctx->optval_end; + struct sockopt_inherit *storage; + __u8 *optval = ctx->optval; + + if (ctx->level != SOL_CUSTOM) + return 1; /* only interested in SOL_CUSTOM */ + + if (optval + 1 > optval_end) + return 0; /* EPERM, bounds check */ + + storage = get_storage(ctx); + if (!storage) + return 0; /* EPERM, couldn't get sk storage */ + + ctx->retval = 0; /* Reset system call return value to zero */ + + optval[0] = storage->val; + ctx->optlen = 1; + + return 1; +} + +SEC("cgroup/setsockopt") +int _setsockopt(struct bpf_sockopt *ctx) +{ + __u8 *optval_end = ctx->optval_end; + struct sockopt_inherit *storage; + __u8 *optval = ctx->optval; + + if (ctx->level != SOL_CUSTOM) + return 1; /* only interested in SOL_CUSTOM */ + + if (optval + 1 > optval_end) + return 0; /* EPERM, bounds check */ + + storage = get_storage(ctx); + if (!storage) + return 0; /* EPERM, couldn't get sk storage */ + + storage->val = optval[0]; + ctx->optlen = -1; + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c index a334a0e882e4..41a3ebcd593d 100644 --- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c +++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c @@ -12,10 +12,6 @@ #define SR6_FLAG_ALERT (1 << 4) -#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \ - 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32)) -#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \ - 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32)) #define BPF_PACKET_HEADER __attribute__((packed)) struct ip6_t { @@ -276,8 +272,8 @@ int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh) return 0; // check if egress TLV value is correct - if (ntohll(egr_addr.hi) == 0xfd00000000000000 && - ntohll(egr_addr.lo) == 0x4) + if (bpf_be64_to_cpu(egr_addr.hi) == 0xfd00000000000000 && + bpf_be64_to_cpu(egr_addr.lo) == 0x4) return 1; } @@ -308,8 +304,8 @@ int __encap_srh(struct __sk_buff *skb) #pragma clang loop unroll(full) for (unsigned long long lo = 0; lo < 4; lo++) { - seg->lo = htonll(4 - lo); - seg->hi = htonll(hi); + seg->lo = bpf_cpu_to_be64(4 - lo); + seg->hi = bpf_cpu_to_be64(hi); seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg)); } @@ -349,8 +345,8 @@ int __add_egr_x(struct __sk_buff *skb) if (err) return BPF_DROP; - addr.lo = htonll(lo); - addr.hi = htonll(hi); + addr.lo = bpf_cpu_to_be64(lo); + addr.hi = bpf_cpu_to_be64(hi); err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X, (void *)&addr, sizeof(addr)); if (err) diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c index 1dbe1d4d467e..c4d104428643 100644 --- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c +++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c @@ -12,10 +12,6 @@ #define SR6_FLAG_ALERT (1 << 4) -#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \ - 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32)) -#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \ - 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32)) #define BPF_PACKET_HEADER __attribute__((packed)) struct ip6_t { @@ -251,8 +247,8 @@ int __add_egr_x(struct __sk_buff *skb) if (err) return BPF_DROP; - addr.lo = htonll(lo); - addr.hi = htonll(hi); + addr.lo = bpf_cpu_to_be64(lo); + addr.hi = bpf_cpu_to_be64(hi); err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X, (void *)&addr, sizeof(addr)); if (err) diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh new file mode 100755 index 000000000000..4ba5a34bff56 --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool_build.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +ERROR=0 +TMPDIR= + +# If one build fails, continue but return non-0 on exit. +return_value() { + if [ -d "$TMPDIR" ] ; then + rm -rf -- $TMPDIR + fi + exit $ERROR +} +trap return_value EXIT + +case $1 in + -h|--help) + echo -e "$0 [-j <n>]" + echo -e "\tTest the different ways of building bpftool." + echo -e "" + echo -e "\tOptions:" + echo -e "\t\t-j <n>:\tPass -j flag to 'make'." + exit + ;; +esac + +J=$* + +# Assume script is located under tools/testing/selftests/bpf/. We want to start +# build attempts from the top of kernel repository. +SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0) +SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH) +KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../) +cd $KDIR_ROOT_DIR + +check() { + local dir=$(realpath $1) + + echo -n "binary: " + # Returns non-null if file is found (and "false" is run) + find $dir -type f -executable -name bpftool -print -exec false {} + && \ + ERROR=1 && printf "FAILURE: Did not find bpftool\n" +} + +make_and_clean() { + echo -e "\$PWD: $PWD" + echo -e "command: make -s $* >/dev/null" + make $J -s $* >/dev/null + if [ $? -ne 0 ] ; then + ERROR=1 + fi + if [ $# -ge 1 ] ; then + check ${@: -1} + else + check . + fi + ( + if [ $# -ge 1 ] ; then + cd ${@: -1} + fi + make -s clean + ) + echo +} + +make_with_tmpdir() { + local ARGS + + TMPDIR=$(mktemp -d) + if [ $# -ge 2 ] ; then + ARGS=${@:1:(($# - 1))} + fi + echo -e "\$PWD: $PWD" + echo -e "command: make -s $ARGS ${@: -1}=$TMPDIR/ >/dev/null" + make $J -s $ARGS ${@: -1}=$TMPDIR/ >/dev/null + if [ $? -ne 0 ] ; then + ERROR=1 + fi + check $TMPDIR + rm -rf -- $TMPDIR + echo +} + +echo "Trying to build bpftool" +echo -e "... through kbuild\n" + +if [ -f ".config" ] ; then + make_and_clean tools/bpf + + ## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed + ## down from toplevel Makefile to bpftool's Makefile. + + # make_with_tmpdir tools/bpf OUTPUT + echo -e "skip: make tools/bpf OUTPUT=<dir> (not supported)\n" + + make_with_tmpdir tools/bpf O +else + echo -e "skip: make tools/bpf (no .config found)\n" + echo -e "skip: make tools/bpf OUTPUT=<dir> (not supported)\n" + echo -e "skip: make tools/bpf O=<dir> (no .config found)\n" +fi + +echo -e "... from kernel source tree\n" + +make_and_clean -C tools/bpf/bpftool + +make_with_tmpdir -C tools/bpf/bpftool OUTPUT + +make_with_tmpdir -C tools/bpf/bpftool O + +echo -e "... from tools/\n" +cd tools/ + +make_and_clean bpf + +## In tools/bpf/Makefile, function "descend" is called and passes $(O) and +## $(OUTPUT). We would like $(OUTPUT) to have "bpf/bpftool/" appended before +## calling bpftool's Makefile, but this is not the case as the "descend" +## function focuses on $(O)/$(subdir). However, in the present case, updating +## $(O) to have $(OUTPUT) recomputed from it in bpftool's Makefile does not +## work, because $(O) is not defined from command line and $(OUTPUT) is not +## updated in tools/scripts/Makefile.include. +## +## Workarounds would require to a) edit "descend" or use an alternative way to +## call bpftool's Makefile, b) modify the conditions to update $(OUTPUT) and +## other variables in tools/scripts/Makefile.include (at the risk of breaking +## the build of other tools), or c) append manually the "bpf/bpftool" suffix to +## $(OUTPUT) in bpf's Makefile, which may break if targets for other directories +## use "descend" in the future. + +# make_with_tmpdir bpf OUTPUT +echo -e "skip: make bpf OUTPUT=<dir> (not supported)\n" + +make_with_tmpdir bpf O + +echo -e "... from bpftool's dir\n" +cd bpf/bpftool + +make_and_clean + +make_with_tmpdir OUTPUT + +make_with_tmpdir O diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 425f9ed27c3b..15a666329a34 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -1353,7 +1353,7 @@ try: bpftool_prog_list_wait(expected=1) ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"] - fail(ifnameB != simB1['ifname'], "program not bound to originial device") + fail(ifnameB != simB1['ifname'], "program not bound to original device") simB1.remove() bpftool_prog_list_wait(expected=1) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 12895d03d58b..e8616e778cb5 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -8,22 +8,20 @@ /* defined in test_progs.h */ struct test_env env; -int error_cnt, pass_cnt; struct prog_test_def { const char *test_name; int test_num; void (*run_test)(void); bool force_log; - int pass_cnt; int error_cnt; + int skip_cnt; bool tested; const char *subtest_name; int subtest_num; /* store counts before subtest started */ - int old_pass_cnt; int old_error_cnt; }; @@ -47,6 +45,7 @@ static void dump_test_log(const struct prog_test_def *test, bool failed) if (env.verbose || test->force_log || failed) { if (env.log_cnt) { + env.log_buf[env.log_cnt] = '\0'; fprintf(env.stdout, "%s", env.log_buf); if (env.log_buf[env.log_cnt - 1] != '\n') fprintf(env.stdout, "\n"); @@ -56,15 +55,24 @@ static void dump_test_log(const struct prog_test_def *test, bool failed) fseeko(stdout, 0, SEEK_SET); /* rewind */ } +static void skip_account(void) +{ + if (env.test->skip_cnt) { + env.skip_cnt++; + env.test->skip_cnt = 0; + } +} + void test__end_subtest() { struct prog_test_def *test = env.test; - int sub_error_cnt = error_cnt - test->old_error_cnt; + int sub_error_cnt = test->error_cnt - test->old_error_cnt; if (sub_error_cnt) env.fail_cnt++; else env.sub_succ_cnt++; + skip_account(); dump_test_log(test, sub_error_cnt); @@ -95,8 +103,7 @@ bool test__start_subtest(const char *name) return false; test->subtest_name = name; - env.test->old_pass_cnt = pass_cnt; - env.test->old_error_cnt = error_cnt; + env.test->old_error_cnt = env.test->error_cnt; return true; } @@ -105,6 +112,16 @@ void test__force_log() { env.test->force_log = true; } +void test__skip(void) +{ + env.test->skip_cnt++; +} + +void test__fail(void) +{ + env.test->error_cnt++; +} + struct ipv4_packet pkt_v4 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, @@ -129,7 +146,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) map = bpf_object__find_map_by_name(obj, name); if (!map) { printf("%s:FAIL:map '%s' not found\n", test, name); - error_cnt++; + test__fail(); return -1; } return bpf_map__fd(map); @@ -488,8 +505,6 @@ int main(int argc, char **argv) stdio_hijack(); for (i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; - int old_pass_cnt = pass_cnt; - int old_error_cnt = error_cnt; env.test = test; test->test_num = i + 1; @@ -504,12 +519,11 @@ int main(int argc, char **argv) test__end_subtest(); test->tested = true; - test->pass_cnt = pass_cnt - old_pass_cnt; - test->error_cnt = error_cnt - old_error_cnt; if (test->error_cnt) env.fail_cnt++; else env.succ_cnt++; + skip_account(); dump_test_log(test, test->error_cnt); @@ -518,11 +532,11 @@ int main(int argc, char **argv) test->error_cnt ? "FAIL" : "OK"); } stdio_restore(); - printf("Summary: %d/%d PASSED, %d FAILED\n", - env.succ_cnt, env.sub_succ_cnt, env.fail_cnt); + printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); free(env.test_selector.num_set); free(env.subtest_selector.num_set); - return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; + return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 37d427f5a1e5..c8edb9464ba6 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -38,8 +38,6 @@ typedef __u16 __sum16; #include "trace_helpers.h" #include "flow_dissector_load.h" -struct prog_test_def; - struct test_selector { const char *name; bool *num_set; @@ -64,14 +62,15 @@ struct test_env { int succ_cnt; /* successful tests */ int sub_succ_cnt; /* successful sub-tests */ int fail_cnt; /* total failed tests + sub-tests */ + int skip_cnt; /* skipped tests */ }; -extern int error_cnt; -extern int pass_cnt; extern struct test_env env; extern void test__force_log(); extern bool test__start_subtest(const char *name); +extern void test__skip(void); +extern void test__fail(void); #define MAGIC_BYTES 123 @@ -94,17 +93,25 @@ extern struct ipv6_packet pkt_v6; #define _CHECK(condition, tag, duration, format...) ({ \ int __ret = !!(condition); \ if (__ret) { \ - error_cnt++; \ + test__fail(); \ printf("%s:FAIL:%s ", __func__, tag); \ printf(format); \ } else { \ - pass_cnt++; \ printf("%s:PASS:%s %d nsec\n", \ __func__, tag, duration); \ } \ __ret; \ }) +#define CHECK_FAIL(condition) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + test__fail(); \ + printf("%s:FAIL:%d\n", __func__, __LINE__); \ + } \ + __ret; \ +}) + #define CHECK(condition, tag, format...) \ _CHECK(condition, tag, duration, format) #define CHECK_ATTR(condition, tag, format...) \ diff --git a/tools/testing/selftests/bpf/test_sockopt_inherit.c b/tools/testing/selftests/bpf/test_sockopt_inherit.c new file mode 100644 index 000000000000..1bf699815b9b --- /dev/null +++ b/tools/testing/selftests/bpf/test_sockopt_inherit.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <pthread.h> + +#include <linux/filter.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "bpf_rlimit.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" + +#define CG_PATH "/sockopt_inherit" +#define SOL_CUSTOM 0xdeadbeef +#define CUSTOM_INHERIT1 0 +#define CUSTOM_INHERIT2 1 +#define CUSTOM_LISTENER 2 + +static int connect_to_server(int server_fd) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int fd; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); + goto out; + } + + if (connect(fd, (const struct sockaddr *)&addr, len) < 0) { + log_err("Fail to connect to server"); + goto out; + } + + return fd; + +out: + close(fd); + return -1; +} + +static int verify_sockopt(int fd, int optname, const char *msg, char expected) +{ + socklen_t optlen = 1; + char buf = 0; + int err; + + err = getsockopt(fd, SOL_CUSTOM, optname, &buf, &optlen); + if (err) { + log_err("%s: failed to call getsockopt", msg); + return 1; + } + + printf("%s %d: got=0x%x ? expected=0x%x\n", msg, optname, buf, expected); + + if (buf != expected) { + log_err("%s: unexpected getsockopt value %d != %d", msg, + buf, expected); + return 1; + } + + return 0; +} + +static void *server_thread(void *arg) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int fd = *(int *)arg; + int client_fd; + int err = 0; + + if (listen(fd, 1) < 0) + error(1, errno, "Failed to listed on socket"); + + err += verify_sockopt(fd, CUSTOM_INHERIT1, "listen", 1); + err += verify_sockopt(fd, CUSTOM_INHERIT2, "listen", 1); + err += verify_sockopt(fd, CUSTOM_LISTENER, "listen", 1); + + client_fd = accept(fd, (struct sockaddr *)&addr, &len); + if (client_fd < 0) + error(1, errno, "Failed to accept client"); + + err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "accept", 1); + err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "accept", 1); + err += verify_sockopt(client_fd, CUSTOM_LISTENER, "accept", 0); + + close(client_fd); + + return (void *)(long)err; +} + +static int start_server(void) +{ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + }; + char buf; + int err; + int fd; + int i; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + log_err("Failed to create server socket"); + return -1; + } + + for (i = CUSTOM_INHERIT1; i <= CUSTOM_LISTENER; i++) { + buf = 0x01; + err = setsockopt(fd, SOL_CUSTOM, i, &buf, 1); + if (err) { + log_err("Failed to call setsockopt(%d)", i); + close(fd); + return -1; + } + } + + if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { + log_err("Failed to bind socket"); + close(fd); + return -1; + } + + return fd; +} + +static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) +{ + enum bpf_attach_type attach_type; + enum bpf_prog_type prog_type; + struct bpf_program *prog; + int err; + + err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); + if (err) { + log_err("Failed to deduct types for %s BPF program", title); + return -1; + } + + prog = bpf_object__find_program_by_title(obj, title); + if (!prog) { + log_err("Failed to find %s BPF program", title); + return -1; + } + + err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, + attach_type, 0); + if (err) { + log_err("Failed to attach %s BPF program", title); + return -1; + } + + return 0; +} + +static int run_test(int cgroup_fd) +{ + struct bpf_prog_load_attr attr = { + .file = "./sockopt_inherit.o", + }; + int server_fd = -1, client_fd; + struct bpf_object *obj; + void *server_err; + pthread_t tid; + int ignored; + int err; + + err = bpf_prog_load_xattr(&attr, &obj, &ignored); + if (err) { + log_err("Failed to load BPF object"); + return -1; + } + + err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt"); + if (err) + goto close_bpf_object; + + err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt"); + if (err) + goto close_bpf_object; + + server_fd = start_server(); + if (server_fd < 0) { + err = -1; + goto close_bpf_object; + } + + pthread_create(&tid, NULL, server_thread, (void *)&server_fd); + + client_fd = connect_to_server(server_fd); + if (client_fd < 0) { + err = -1; + goto close_server_fd; + } + + err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0); + err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0); + err += verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0); + + pthread_join(tid, &server_err); + + err += (int)(long)server_err; + + close(client_fd); + +close_server_fd: + close(server_fd); +close_bpf_object: + bpf_object__close(obj); + return err; +} + +int main(int args, char **argv) +{ + int cgroup_fd; + int err = EXIT_SUCCESS; + + if (setup_cgroup_environment()) + return err; + + cgroup_fd = create_and_get_cgroup(CG_PATH); + if (cgroup_fd < 0) + goto cleanup_cgroup_env; + + if (join_cgroup(CG_PATH)) + goto cleanup_cgroup; + + if (run_test(cgroup_fd)) + err = EXIT_FAILURE; + + printf("test_sockopt_inherit: %s\n", + err == EXIT_SUCCESS ? "PASSED" : "FAILED"); + +cleanup_cgroup: + close(cgroup_fd); +cleanup_cgroup_env: + cleanup_cgroup_environment(); + return err; +} diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index a3bebd7c68dd..fc33ae36b760 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -13,6 +13,7 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include "bpf_endian.h" #include "bpf_rlimit.h" #include "bpf_util.h" #include "cgroup_helpers.h" @@ -100,7 +101,7 @@ static struct sysctl_test tests[] = { .descr = "ctx:write sysctl:write read ok", .insns = { /* If (write) */ - BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, write)), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2), @@ -214,7 +215,8 @@ static struct sysctl_test tests[] = { /* if (ret == expected && */ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6), /* buf == "tcp_mem\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x006d656d5f706374ULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x7463705f6d656d00ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -255,7 +257,8 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), /* buf[0:7] == "tcp_me\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x00656d5f706374ULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x7463705f6d650000ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -298,12 +301,14 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14), /* buf[0:8] == "net/ipv4" && */ - BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x6e65742f69707634ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10), /* buf[8:16] == "/tcp_mem" && */ - BPF_LD_IMM64(BPF_REG_8, 0x6d656d5f7063742fULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x2f7463705f6d656dULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), @@ -350,12 +355,14 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10), /* buf[0:8] == "net/ipv4" && */ - BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x6e65742f69707634ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), /* buf[8:16] == "/tcp_me\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x00656d5f7063742fULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x2f7463705f6d6500ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -396,7 +403,8 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), /* buf[0:8] == "net/ip\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x000070692f74656eULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x6e65742f69700000ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -431,7 +439,8 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6), /* buf[0:6] == "Linux\n\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x4c696e75780a0000ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -469,7 +478,8 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6), /* buf[0:6] == "Linux\n\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x4c696e75780a0000ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -507,7 +517,8 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), /* buf[0:6] == "Linux\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x000078756e694cULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x4c696e7578000000ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -650,7 +661,8 @@ static struct sysctl_test tests[] = { /* buf[0:4] == "606\0") */ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x00363036, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, + bpf_ntohl(0x36303600), 2), /* return DENY; */ BPF_MOV64_IMM(BPF_REG_0, 0), @@ -685,17 +697,20 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14), /* buf[0:8] == "3000000 " && */ - BPF_LD_IMM64(BPF_REG_8, 0x2030303030303033ULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x3330303030303020ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10), /* buf[8:16] == "4000000 " && */ - BPF_LD_IMM64(BPF_REG_8, 0x2030303030303034ULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x3430303030303020ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), /* buf[16:24] == "6000000\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x0030303030303036ULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x3630303030303000ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -735,7 +750,8 @@ static struct sysctl_test tests[] = { /* buf[0:3] == "60\0") */ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x003036, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, + bpf_ntohl(0x36300000), 2), /* return DENY; */ BPF_MOV64_IMM(BPF_REG_0, 0), @@ -757,7 +773,8 @@ static struct sysctl_test tests[] = { /* sysctl_set_new_value arg2 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), @@ -791,7 +808,7 @@ static struct sysctl_test tests[] = { /* sysctl_set_new_value arg2 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, FIXUP_SYSCTL_VALUE), + BPF_LD_IMM64(BPF_REG_0, FIXUP_SYSCTL_VALUE), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), @@ -825,8 +842,9 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -869,7 +887,8 @@ static struct sysctl_test tests[] = { BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), /* "600 602\0" */ - BPF_LD_IMM64(BPF_REG_0, 0x0032303620303036ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3630302036303200ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -937,7 +956,8 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -969,8 +989,9 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00373730), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x30373700)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1012,7 +1033,8 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1052,7 +1074,8 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x090a0c0d), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x0d0c0a09)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1092,7 +1115,9 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */ + /* " -6\0" */ + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x0a2d3600)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1132,8 +1157,10 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */ - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + /* " -6\0" */ + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x0a2d3600)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1175,8 +1202,10 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x65667830), /* "0xfe" */ - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + /* "0xfe" */ + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x30786665)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1218,11 +1247,14 @@ static struct sysctl_test tests[] = { /* arg1 (buf) 9223372036854775807 */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), - BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3932323333373230ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3336383534373735ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), - BPF_LD_IMM64(BPF_REG_0, 0x0000000000373038ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3830370000000000ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1266,11 +1298,14 @@ static struct sysctl_test tests[] = { /* arg1 (buf) 9223372036854775808 */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), - BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3932323333373230ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3336383534373735ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), - BPF_LD_IMM64(BPF_REG_0, 0x0000000000383038ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3830380000000000ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1344,20 +1379,24 @@ static size_t probe_prog_length(const struct bpf_insn *fp) static int fixup_sysctl_value(const char *buf, size_t buf_len, struct bpf_insn *prog, size_t insn_num) { - uint32_t value_num = 0; + union { + uint8_t raw[sizeof(uint64_t)]; + uint64_t num; + } value = {}; uint8_t c, i; - if (buf_len > sizeof(value_num)) { + if (buf_len > sizeof(value)) { log_err("Value is too big (%zd) to use in fixup", buf_len); return -1; } - - for (i = 0; i < buf_len; ++i) { - c = buf[i]; - value_num |= (c << i * 8); + if (prog[insn_num].code != (BPF_LD | BPF_DW | BPF_IMM)) { + log_err("Can fixup only BPF_LD_IMM64 insns"); + return -1; } - prog[insn_num].imm = value_num; + memcpy(value.raw, buf, buf_len); + prog[insn_num].imm = (uint32_t)value.num; + prog[insn_num + 1].imm = (uint32_t)(value.num >> 32); return 0; } @@ -1499,6 +1538,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test) goto err; } + errno = 0; if (access_sysctl(sysctl_path, test) == -1) { if (test->result == OP_EPERM && errno == EPERM) goto out; @@ -1507,7 +1547,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test) } if (test->result != SUCCESS) { - log_err("Unexpected failure"); + log_err("Unexpected success"); goto err; } diff --git a/tools/testing/selftests/bpf/test_tcp_rtt.c b/tools/testing/selftests/bpf/test_tcp_rtt.c index 90c3862f74a8..93916a69823e 100644 --- a/tools/testing/selftests/bpf/test_tcp_rtt.c +++ b/tools/testing/selftests/bpf/test_tcp_rtt.c @@ -6,6 +6,7 @@ #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> +#include <netinet/tcp.h> #include <pthread.h> #include <linux/filter.h> @@ -34,6 +35,30 @@ static void send_byte(int fd) error(1, errno, "Failed to send single byte"); } +static int wait_for_ack(int fd, int retries) +{ + struct tcp_info info; + socklen_t optlen; + int i, err; + + for (i = 0; i < retries; i++) { + optlen = sizeof(info); + err = getsockopt(fd, SOL_TCP, TCP_INFO, &info, &optlen); + if (err < 0) { + log_err("Failed to lookup TCP stats"); + return err; + } + + if (info.tcpi_unacked == 0) + return 0; + + usleep(10); + } + + log_err("Did not receive ACK"); + return -1; +} + static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked, __u32 dsack_dups, __u32 delivered, __u32 delivered_ce, __u32 icsk_retransmits) @@ -149,6 +174,11 @@ static int run_test(int cgroup_fd, int server_fd) /*icsk_retransmits=*/0); send_byte(client_fd); + if (wait_for_ack(client_fd, 100) < 0) { + err = -1; + goto close_client_fd; + } + err += verify_sk(map_fd, client_fd, "first payload byte", /*invoked=*/2, @@ -157,6 +187,7 @@ static int run_test(int cgroup_fd, int server_fd) /*delivered_ce=*/0, /*icsk_retransmits=*/0); +close_client_fd: close(client_fd); close_bpf_object: diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 44e2d640b088..d27fd929abb9 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -61,6 +61,7 @@ #define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled" static bool unpriv_disabled = false; static int skips; +static bool verbose = false; struct bpf_test { const char *descr; @@ -92,7 +93,8 @@ struct bpf_test { enum { UNDEF, ACCEPT, - REJECT + REJECT, + VERBOSE_ACCEPT, } result, result_unpriv; enum bpf_prog_type prog_type; uint8_t flags; @@ -859,6 +861,36 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, return 0; } +static bool cmp_str_seq(const char *log, const char *exp) +{ + char needle[80]; + const char *p, *q; + int len; + + do { + p = strchr(exp, '\t'); + if (!p) + p = exp + strlen(exp); + + len = p - exp; + if (len >= sizeof(needle) || !len) { + printf("FAIL\nTestcase bug\n"); + return false; + } + strncpy(needle, exp, len); + needle[len] = 0; + q = strstr(log, needle); + if (!q) { + printf("FAIL\nUnexpected verifier log in successful load!\n" + "EXP: %s\nRES:\n", needle); + return false; + } + log = q + len; + exp = p + 1; + } while (*p); + return true; +} + static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { @@ -897,14 +929,20 @@ static void do_test_single(struct bpf_test *test, bool unpriv, pflags |= BPF_F_STRICT_ALIGNMENT; if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) pflags |= BPF_F_ANY_ALIGNMENT; + if (test->flags & ~3) + pflags |= test->flags; + expected_ret = unpriv && test->result_unpriv != UNDEF ? + test->result_unpriv : test->result; + expected_err = unpriv && test->errstr_unpriv ? + test->errstr_unpriv : test->errstr; memset(&attr, 0, sizeof(attr)); attr.prog_type = prog_type; attr.expected_attach_type = test->expected_attach_type; attr.insns = prog; attr.insns_cnt = prog_len; attr.license = "GPL"; - attr.log_level = 4; + attr.log_level = verbose || expected_ret == VERBOSE_ACCEPT ? 1 : 4; attr.prog_flags = pflags; fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog)); @@ -914,14 +952,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, goto close_fds; } - expected_ret = unpriv && test->result_unpriv != UNDEF ? - test->result_unpriv : test->result; - expected_err = unpriv && test->errstr_unpriv ? - test->errstr_unpriv : test->errstr; - alignment_prevented_execution = 0; - if (expected_ret == ACCEPT) { + if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) { if (fd_prog < 0) { printf("FAIL\nFailed to load prog '%s'!\n", strerror(errno)); @@ -932,6 +965,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)) alignment_prevented_execution = 1; #endif + if (expected_ret == VERBOSE_ACCEPT && !cmp_str_seq(bpf_vlog, expected_err)) { + goto fail_log; + } } else { if (fd_prog >= 0) { printf("FAIL\nUnexpected success to load!\n"); @@ -957,6 +993,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } } + if (verbose) + printf(", verifier log:\n%s", bpf_vlog); + run_errs = 0; run_successes = 0; if (!alignment_prevented_execution && fd_prog >= 0) { @@ -1097,17 +1136,24 @@ int main(int argc, char **argv) { unsigned int from = 0, to = ARRAY_SIZE(tests); bool unpriv = !is_admin(); + int arg = 1; + + if (argc > 1 && strcmp(argv[1], "-v") == 0) { + arg++; + verbose = true; + argc--; + } if (argc == 3) { - unsigned int l = atoi(argv[argc - 2]); - unsigned int u = atoi(argv[argc - 1]); + unsigned int l = atoi(argv[arg]); + unsigned int u = atoi(argv[arg + 1]); if (l < to && u < to) { from = l; to = u + 1; } } else if (argc == 2) { - unsigned int t = atoi(argv[argc - 1]); + unsigned int t = atoi(argv[arg]); if (t < to) { from = t; diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c new file mode 100644 index 000000000000..02151f8c940f --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -0,0 +1,194 @@ +{ + "precise: test 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + + BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), /* map_value_ptr -= map_value_ptr */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_9), + BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1), + BPF_EXIT_INSN(), + + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_array_48b = { 1 }, + .result = VERBOSE_ACCEPT, + .errstr = + "26: (85) call bpf_probe_read#4\ + last_idx 26 first_idx 20\ + regs=4 stack=0 before 25\ + regs=4 stack=0 before 24\ + regs=4 stack=0 before 23\ + regs=4 stack=0 before 22\ + regs=4 stack=0 before 20\ + parent didn't have regs=4 stack=0 marks\ + last_idx 19 first_idx 10\ + regs=4 stack=0 before 19\ + regs=200 stack=0 before 18\ + regs=300 stack=0 before 17\ + regs=201 stack=0 before 15\ + regs=201 stack=0 before 14\ + regs=200 stack=0 before 13\ + regs=200 stack=0 before 12\ + regs=200 stack=0 before 11\ + regs=200 stack=0 before 10\ + parent already had regs=0 stack=0 marks", +}, +{ + "precise: test 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + + BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), /* map_value_ptr -= map_value_ptr */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_9), + BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1), + BPF_EXIT_INSN(), + + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_array_48b = { 1 }, + .result = VERBOSE_ACCEPT, + .flags = BPF_F_TEST_STATE_FREQ, + .errstr = + "26: (85) call bpf_probe_read#4\ + last_idx 26 first_idx 22\ + regs=4 stack=0 before 25\ + regs=4 stack=0 before 24\ + regs=4 stack=0 before 23\ + regs=4 stack=0 before 22\ + parent didn't have regs=4 stack=0 marks\ + last_idx 20 first_idx 20\ + regs=4 stack=0 before 20\ + parent didn't have regs=4 stack=0 marks\ + last_idx 19 first_idx 17\ + regs=4 stack=0 before 19\ + regs=200 stack=0 before 18\ + regs=300 stack=0 before 17\ + parent already had regs=0 stack=0 marks", +}, +{ + "precise: cross frame pruning", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = BPF_F_TEST_STATE_FREQ, + .errstr = "!read_ok", + .result = REJECT, +}, +{ + "precise: ST insn causing spi > allocated_stack", + .insns = { + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_MOV64_IMM(BPF_REG_0, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = BPF_F_TEST_STATE_FREQ, + .errstr = "5: (2d) if r4 > r0 goto pc+0\ + last_idx 5 first_idx 5\ + parent didn't have regs=10 stack=0 marks\ + last_idx 4 first_idx 2\ + regs=10 stack=0 before 4\ + regs=10 stack=0 before 3\ + regs=0 stack=1 before 2\ + last_idx 5 first_idx 5\ + parent didn't have regs=1 stack=0 marks", + .result = VERBOSE_ACCEPT, + .retval = -1, +}, +{ + "precise: STX insn causing spi > allocated_stack", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_MOV64_IMM(BPF_REG_0, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = BPF_F_TEST_STATE_FREQ, + .errstr = "last_idx 6 first_idx 6\ + parent didn't have regs=10 stack=0 marks\ + last_idx 5 first_idx 3\ + regs=10 stack=0 before 5\ + regs=10 stack=0 before 4\ + regs=0 stack=1 before 3\ + last_idx 6 first_idx 6\ + parent didn't have regs=1 stack=0 marks\ + last_idx 5 first_idx 3\ + regs=1 stack=0 before 5", + .result = VERBOSE_ACCEPT, + .retval = -1, +}, |