aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-class-net15
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcm4908enet.yaml45
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/ice.rst1027
-rw-r--r--Documentation/networking/ip-sysctl.rst25
-rw-r--r--MAINTAINERS9
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig8
-rw-r--r--drivers/net/ethernet/broadcom/Makefile1
-rw-r--r--drivers/net/ethernet/broadcom/bcm4908enet.c676
-rw-r--r--drivers/net/ethernet/broadcom/bcm4908enet.h96
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c31
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h11
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c27
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c11
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c14
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h5
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c14
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h6
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h11
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c19
-rw-r--r--drivers/net/ethernet/intel/ice/Makefile1
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h52
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h25
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c58
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb.c40
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c47
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_nl.c50
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c14
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flex_pipe.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hw_autogen.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c445
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.h87
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c142
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c87
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c1059
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.h24
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c61
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h27
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c72
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c58
-rw-r--r--drivers/net/netdevsim/dev.c40
-rw-r--r--drivers/net/netdevsim/fib.c123
-rw-r--r--drivers/net/phy/broadcom.c93
-rw-r--r--drivers/net/phy/marvell.c1
-rw-r--r--drivers/net/phy/micrel.c1
-rw-r--r--include/linux/netdevice.h23
-rw-r--r--include/net/ip6_fib.h5
-rw-r--r--include/net/ip_fib.h3
-rw-r--r--include/uapi/linux/rtnetlink.h5
-rw-r--r--net/core/dev.c192
-rw-r--r--net/core/net-sysfs.c40
-rw-r--r--net/ipv4/esp4_offload.c2
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/fib_trie.c13
-rw-r--r--net/ipv4/route.c1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c2
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/route.c14
-rw-r--r--net/ipv6/sysctl_net_ipv6.c2
-rw-r--r--net/xfrm/xfrm_interface.c10
-rw-r--r--net/xfrm/xfrm_user.c2
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh134
73 files changed, 4666 insertions, 563 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index 1f2002df5ba2..1419103d11f9 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -337,3 +337,18 @@ Contact: [email protected]
Description:
32-bit unsigned integer counting the number of times the link has
been down
+
+What: /sys/class/net/<iface>/threaded
+Date: Jan 2021
+KernelVersion: 5.12
+Description:
+ Boolean value to control the threaded mode per device. User could
+ set this value to enable/disable threaded mode for all napi
+ belonging to this device, without the need to do device up/down.
+
+ Possible values:
+ == ==================================
+ 0 threaded mode disabled for this dev
+ 1 threaded mode enabled for this dev
+ == ==================================
diff --git a/Documentation/devicetree/bindings/net/brcm,bcm4908enet.yaml b/Documentation/devicetree/bindings/net/brcm,bcm4908enet.yaml
new file mode 100644
index 000000000000..5f12f51c5b19
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/brcm,bcm4908enet.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/brcm,bcm4908enet.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM4908 Ethernet controller
+
+description: Broadcom's Ethernet controller integrated into BCM4908 family SoCs
+
+maintainers:
+ - Rafał Miłecki <[email protected]>
+
+properties:
+ compatible:
+ const: brcm,bcm4908enet
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: RX interrupt
+
+ interrupt-names:
+ const: rx
+
+required:
+ - reg
+ - interrupts
+ - interrupt-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ ethernet@80002000 {
+ compatible = "brcm,bcm4908enet";
+ reg = <0x80002000 0x1000>;
+
+ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "rx";
+ };
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
index ee43ea57d443..e7d9cbff771b 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
@@ -1,46 +1,1031 @@
.. SPDX-License-Identifier: GPL-2.0+
-==================================================================
-Linux Base Driver for the Intel(R) Ethernet Connection E800 Series
-==================================================================
+=================================================================
+Linux Base Driver for the Intel(R) Ethernet Controller 800 Series
+=================================================================
Intel ice Linux driver.
-Copyright(c) 2018 Intel Corporation.
+Copyright(c) 2018-2021 Intel Corporation.
Contents
========
-- Enabling the driver
-- Support
+- Overview
+- Identifying Your Adapter
+- Important Notes
+- Additional Features & Configurations
+- Performance Optimization
-The driver in this release supports Intel's E800 Series of products. For
-more information, visit Intel's support page at https://support.intel.com.
-Enabling the driver
-===================
-The driver is enabled via the standard kernel configuration system,
-using the make command::
+The associated Virtual Function (VF) driver for this driver is iavf.
- make oldconfig/menuconfig/etc.
+Driver information can be obtained using ethtool and lspci.
-The driver is located in the menu structure at:
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel adapter. All hardware requirements listed apply to use
+with Linux.
+
+This driver supports XDP (Express Data Path) and AF_XDP zero-copy. Note that
+XDP is blocked for frame sizes larger than 3KB.
+
+
+Identifying Your Adapter
+========================
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+https://www.intel.com/support
+
+
+Important Notes
+===============
+
+Packet drops may occur under receive stress
+-------------------------------------------
+Devices based on the Intel(R) Ethernet Controller 800 Series are designed to
+tolerate a limited amount of system latency during PCIe and DMA transactions.
+If these transactions take longer than the tolerated latency, it can impact the
+length of time the packets are buffered in the device and associated memory,
+which may result in dropped packets. These packets drops typically do not have
+a noticeable impact on throughput and performance under standard workloads.
+
+If these packet drops appear to affect your workload, the following may improve
+the situation:
+
+1) Make sure that your system's physical memory is in a high-performance
+ configuration, as recommended by the platform vendor. A common
+ recommendation is for all channels to be populated with a single DIMM
+ module.
+2) In your system's BIOS/UEFI settings, select the "Performance" profile.
+3) Your distribution may provide tools like "tuned," which can help tweak
+ kernel settings to achieve better standard settings for different workloads.
+
+
+Configuring SR-IOV for improved network security
+------------------------------------------------
+In a virtualized environment, on Intel(R) Ethernet Network Adapters that
+support SR-IOV, the virtual function (VF) may be subject to malicious behavior.
+Software-generated layer two frames, like IEEE 802.3x (link flow control), IEEE
+802.1Qbb (priority based flow-control), and others of this type, are not
+expected and can throttle traffic between the host and the virtual switch,
+reducing performance. To resolve this issue, and to ensure isolation from
+unintended traffic streams, configure all SR-IOV enabled ports for VLAN tagging
+from the administrative interface on the PF. This configuration allows
+unexpected, and potentially malicious, frames to be dropped.
+
+See "Configuring VLAN Tagging on SR-IOV Enabled Adapter Ports" later in this
+README for configuration instructions.
+
+
+Do not unload port driver if VF with active VM is bound to it
+-------------------------------------------------------------
+Do not unload a port's driver if a Virtual Function (VF) with an active Virtual
+Machine (VM) is bound to it. Doing so will cause the port to appear to hang.
+Once the VM shuts down, or otherwise releases the VF, the command will
+complete.
+
+
+Important notes for SR-IOV and Link Aggregation
+-----------------------------------------------
+Link Aggregation is mutually exclusive with SR-IOV.
+
+- If Link Aggregation is active, SR-IOV VFs cannot be created on the PF.
+- If SR-IOV is active, you cannot set up Link Aggregation on the interface.
+
+Bridging and MACVLAN are also affected by this. If you wish to use bridging or
+MACVLAN with SR-IOV, you must set up bridging or MACVLAN before enabling
+SR-IOV. If you are using bridging or MACVLAN in conjunction with SR-IOV, and
+you want to remove the interface from the bridge or MACVLAN, you must follow
+these steps:
+
+1. Destroy SR-IOV VFs if they exist
+2. Remove the interface from the bridge or MACVLAN
+3. Recreate SRIOV VFs as needed
+
+
+Additional Features and Configurations
+======================================
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+https://kernel.org/pub/software/network/ethtool/
+
+NOTE: The rx_bytes value of ethtool does not match the rx_bytes value of
+Netdev, due to the 4-byte CRC being stripped by the device. The difference
+between the two rx_bytes values will be 4 x the number of Rx packets. For
+example, if Rx packets are 10 and Netdev (software statistics) displays
+rx_bytes as "X", then ethtool (hardware statistics) will display rx_bytes as
+"X+40" (4 bytes CRC x 10 packets).
+
+
+Viewing Link Messages
+---------------------
+Link messages will not be displayed to the console if the distribution is
+restricting system messages. In order to see network driver link messages on
+your console, set dmesg to eight by entering the following::
+
+ # dmesg -n 8
+
+NOTE: This setting is not saved across reboots.
+
+
+Dynamic Device Personalization
+------------------------------
+Dynamic Device Personalization (DDP) allows you to change the packet processing
+pipeline of a device by applying a profile package to the device at runtime.
+Profiles can be used to, for example, add support for new protocols, change
+existing protocols, or change default settings. DDP profiles can also be rolled
+back without rebooting the system.
+
+The DDP package loads during device initialization. The driver looks for
+``intel/ice/ddp/ice.pkg`` in your firmware root (typically ``/lib/firmware/``
+or ``/lib/firmware/updates/``) and checks that it contains a valid DDP package
+file.
+
+NOTE: Your distribution should likely have provided the latest DDP file, but if
+ice.pkg is missing, you can find it in the linux-firmware repository or from
+intel.com.
+
+If the driver is unable to load the DDP package, the device will enter Safe
+Mode. Safe Mode disables advanced and performance features and supports only
+basic traffic and minimal functionality, such as updating the NVM or
+downloading a new driver or DDP package. Safe Mode only applies to the affected
+physical function and does not impact any other PFs. See the "Intel(R) Ethernet
+Adapters and Devices User Guide" for more details on DDP and Safe Mode.
+
+NOTES:
+
+- If you encounter issues with the DDP package file, you may need to download
+ an updated driver or DDP package file. See the log messages for more
+ information.
+
+- The ice.pkg file is a symbolic link to the default DDP package file.
+
+- You cannot update the DDP package if any PF drivers are already loaded. To
+ overwrite a package, unload all PFs and then reload the driver with the new
+ package.
+
+- Only the first loaded PF per device can download a package for that device.
+
+You can install specific DDP package files for different physical devices in
+the same system. To install a specific DDP package file:
+
+1. Download the DDP package file you want for your device.
+
+2. Rename the file ice-xxxxxxxxxxxxxxxx.pkg, where 'xxxxxxxxxxxxxxxx' is the
+ unique 64-bit PCI Express device serial number (in hex) of the device you
+ want the package downloaded on. The filename must include the complete
+ serial number (including leading zeros) and be all lowercase. For example,
+ if the 64-bit serial number is b887a3ffffca0568, then the file name would be
+ ice-b887a3ffffca0568.pkg.
+
+ To find the serial number from the PCI bus address, you can use the
+ following command::
+
+ # lspci -vv -s af:00.0 | grep -i Serial
+ Capabilities: [150 v1] Device Serial Number b8-87-a3-ff-ff-ca-05-68
+
+ You can use the following command to format the serial number without the
+ dashes::
+
+ # lspci -vv -s af:00.0 | grep -i Serial | awk '{print $7}' | sed s/-//g
+ b887a3ffffca0568
+
+3. Copy the renamed DDP package file to
+ ``/lib/firmware/updates/intel/ice/ddp/``. If the directory does not yet
+ exist, create it before copying the file.
+
+4. Unload all of the PFs on the device.
+
+5. Reload the driver with the new package.
+
+NOTE: The presence of a device-specific DDP package file overrides the loading
+of the default DDP package file (ice.pkg).
+
+
+Intel(R) Ethernet Flow Director
+-------------------------------
+The Intel Ethernet Flow Director performs the following tasks:
+
+- Directs receive packets according to their flows to different queues
+- Enables tight control on routing a flow in the platform
+- Matches flows and CPU cores for flow affinity
+
+NOTE: This driver supports the following flow types:
+
+- IPv4
+- TCPv4
+- UDPv4
+- SCTPv4
+- IPv6
+- TCPv6
+- UDPv6
+- SCTPv6
+
+Each flow type supports valid combinations of IP addresses (source or
+destination) and UDP/TCP/SCTP ports (source and destination). You can supply
+only a source IP address, a source IP address and a destination port, or any
+combination of one or more of these four parameters.
+
+NOTE: This driver allows you to filter traffic based on a user-defined flexible
+two-byte pattern and offset by using the ethtool user-def and mask fields. Only
+L3 and L4 flow types are supported for user-defined flexible filters. For a
+given flow type, you must clear all Intel Ethernet Flow Director filters before
+changing the input set (for that flow type).
+
+
+Flow Director Filters
+---------------------
+Flow Director filters are used to direct traffic that matches specified
+characteristics. They are enabled through ethtool's ntuple interface. To enable
+or disable the Intel Ethernet Flow Director and these filters::
+
+ # ethtool -K <ethX> ntuple <off|on>
+
+NOTE: When you disable ntuple filters, all the user programmed filters are
+flushed from the driver cache and hardware. All needed filters must be re-added
+when ntuple is re-enabled.
+
+To display all of the active filters::
+
+ # ethtool -u <ethX>
+
+To add a new filter::
+
+ # ethtool -U <ethX> flow-type <type> src-ip <ip> [m <ip_mask>] dst-ip <ip>
+ [m <ip_mask>] src-port <port> [m <port_mask>] dst-port <port> [m <port_mask>]
+ action <queue>
+
+ Where:
+ <ethX> - the Ethernet device to program
+ <type> - can be ip4, tcp4, udp4, sctp4, ip6, tcp6, udp6, sctp6
+ <ip> - the IP address to match on
+ <ip_mask> - the IPv4 address to mask on
+ NOTE: These filters use inverted masks.
+ <port> - the port number to match on
+ <port_mask> - the 16-bit integer for masking
+ NOTE: These filters use inverted masks.
+ <queue> - the queue to direct traffic toward (-1 discards the
+ matched traffic)
+
+To delete a filter::
+
+ # ethtool -U <ethX> delete <N>
+
+ Where <N> is the filter ID displayed when printing all the active filters,
+ and may also have been specified using "loc <N>" when adding the filter.
+
+EXAMPLES:
+
+To add a filter that directs packet to queue 2::
+
+ # ethtool -U <ethX> flow-type tcp4 src-ip 192.168.10.1 dst-ip \
+ 192.168.10.2 src-port 2000 dst-port 2001 action 2 [loc 1]
+
+To set a filter using only the source and destination IP address::
+
+ # ethtool -U <ethX> flow-type tcp4 src-ip 192.168.10.1 dst-ip \
+ 192.168.10.2 action 2 [loc 1]
+
+To set a filter based on a user-defined pattern and offset::
+
+ # ethtool -U <ethX> flow-type tcp4 src-ip 192.168.10.1 dst-ip \
+ 192.168.10.2 user-def 0x4FFFF action 2 [loc 1]
+
+ where the value of the user-def field contains the offset (4 bytes) and
+ the pattern (0xffff).
+
+To match TCP traffic sent from 192.168.0.1, port 5300, directed to 192.168.0.5,
+port 80, and then send it to queue 7::
+
+ # ethtool -U enp130s0 flow-type tcp4 src-ip 192.168.0.1 dst-ip 192.168.0.5
+ src-port 5300 dst-port 80 action 7
+
+To add a TCPv4 filter with a partial mask for a source IP subnet::
+
+ # ethtool -U <ethX> flow-type tcp4 src-ip 192.168.0.0 m 0.255.255.255 dst-ip
+ 192.168.5.12 src-port 12600 dst-port 31 action 12
+
+NOTES:
+
+For each flow-type, the programmed filters must all have the same matching
+input set. For example, issuing the following two commands is acceptable::
+
+ # ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7
+ # ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.5 src-port 55 action 10
+
+Issuing the next two commands, however, is not acceptable, since the first
+specifies src-ip and the second specifies dst-ip::
+
+ # ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7
+ # ethtool -U enp130s0 flow-type ip4 dst-ip 192.168.0.5 src-port 55 action 10
+
+The second command will fail with an error. You may program multiple filters
+with the same fields, using different values, but, on one device, you may not
+program two tcp4 filters with different matching fields.
+
+The ice driver does not support matching on a subportion of a field, thus
+partial mask fields are not supported.
+
+
+Flex Byte Flow Director Filters
+-------------------------------
+The driver also supports matching user-defined data within the packet payload.
+This flexible data is specified using the "user-def" field of the ethtool
+command in the following way:
+
+.. table::
+
+ ============================== ============================
+ ``31 28 24 20 16`` ``15 12 8 4 0``
+ ``offset into packet payload`` ``2 bytes of flexible data``
+ ============================== ============================
+
+For example,
+
+::
+
+ ... user-def 0x4FFFF ...
+
+tells the filter to look 4 bytes into the payload and match that value against
+0xFFFF. The offset is based on the beginning of the payload, and not the
+beginning of the packet. Thus
+
+::
+
+ flow-type tcp4 ... user-def 0x8BEAF ...
+
+would match TCP/IPv4 packets which have the value 0xBEAF 8 bytes into the
+TCP/IPv4 payload.
+
+Note that ICMP headers are parsed as 4 bytes of header and 4 bytes of payload.
+Thus to match the first byte of the payload, you must actually add 4 bytes to
+the offset. Also note that ip4 filters match both ICMP frames as well as raw
+(unknown) ip4 frames, where the payload will be the L3 payload of the IP4
+frame.
+
+The maximum offset is 64. The hardware will only read up to 64 bytes of data
+from the payload. The offset must be even because the flexible data is 2 bytes
+long and must be aligned to byte 0 of the packet payload.
+
+The user-defined flexible offset is also considered part of the input set and
+cannot be programmed separately for multiple filters of the same type. However,
+the flexible data is not part of the input set and multiple filters may use the
+same offset but match against different data.
+
+
+RSS Hash Flow
+-------------
+Allows you to set the hash bytes per flow type and any combination of one or
+more options for Receive Side Scaling (RSS) hash byte configuration.
+
+::
+
+ # ethtool -N <ethX> rx-flow-hash <type> <option>
+
+ Where <type> is:
+ tcp4 signifying TCP over IPv4
+ udp4 signifying UDP over IPv4
+ tcp6 signifying TCP over IPv6
+ udp6 signifying UDP over IPv6
+ And <option> is one or more of:
+ s Hash on the IP source address of the Rx packet.
+ d Hash on the IP destination address of the Rx packet.
+ f Hash on bytes 0 and 1 of the Layer 4 header of the Rx packet.
+ n Hash on bytes 2 and 3 of the Layer 4 header of the Rx packet.
+
+
+Accelerated Receive Flow Steering (aRFS)
+----------------------------------------
+Devices based on the Intel(R) Ethernet Controller 800 Series support
+Accelerated Receive Flow Steering (aRFS) on the PF. aRFS is a load-balancing
+mechanism that allows you to direct packets to the same CPU where an
+application is running or consuming the packets in that flow.
+
+NOTES:
+
+- aRFS requires that ntuple filtering is enabled via ethtool.
+- aRFS support is limited to the following packet types:
+
+ - TCP over IPv4 and IPv6
+ - UDP over IPv4 and IPv6
+ - Nonfragmented packets
+
+- aRFS only supports Flow Director filters, which consist of the
+ source/destination IP addresses and source/destination ports.
+- aRFS and ethtool's ntuple interface both use the device's Flow Director. aRFS
+ and ntuple features can coexist, but you may encounter unexpected results if
+ there's a conflict between aRFS and ntuple requests. See "Intel(R) Ethernet
+ Flow Director" for additional information.
+
+To set up aRFS:
+
+1. Enable the Intel Ethernet Flow Director and ntuple filters using ethtool.
+
+::
+
+ # ethtool -K <ethX> ntuple on
+
+2. Set up the number of entries in the global flow table. For example:
+
+::
+
+ # NUM_RPS_ENTRIES=16384
+ # echo $NUM_RPS_ENTRIES > /proc/sys/net/core/rps_sock_flow_entries
+
+3. Set up the number of entries in the per-queue flow table. For example:
+
+::
+
+ # NUM_RX_QUEUES=64
+ # for file in /sys/class/net/$IFACE/queues/rx-*/rps_flow_cnt; do
+ # echo $(($NUM_RPS_ENTRIES/$NUM_RX_QUEUES)) > $file;
+ # done
+
+4. Disable the IRQ balance daemon (this is only a temporary stop of the service
+ until the next reboot).
+
+::
+
+ # systemctl stop irqbalance
+
+5. Configure the interrupt affinity.
+
+ See ``/Documentation/core-api/irq/irq-affinity.rst``
+
+
+To disable aRFS using ethtool::
+
+ # ethtool -K <ethX> ntuple off
+
+NOTE: This command will disable ntuple filters and clear any aRFS filters in
+software and hardware.
+
+Example Use Case:
+
+1. Set the server application on the desired CPU (e.g., CPU 4).
+
+::
+
+ # taskset -c 4 netserver
+
+2. Use netperf to route traffic from the client to CPU 4 on the server with
+ aRFS configured. This example uses TCP over IPv4.
+
+::
+
+ # netperf -H <Host IPv4 Address> -t TCP_STREAM
+
+
+Enabling Virtual Functions (VFs)
+--------------------------------
+Use sysfs to enable virtual functions (VF).
+
+For example, you can create 4 VFs as follows::
+
+ # echo 4 > /sys/class/net/<ethX>/device/sriov_numvfs
+
+To disable VFs, write 0 to the same file::
+
+ # echo 0 > /sys/class/net/<ethX>/device/sriov_numvfs
+
+The maximum number of VFs for the ice driver is 256 total (all ports). To check
+how many VFs each PF supports, use the following command::
+
+ # cat /sys/class/net/<ethX>/device/sriov_totalvfs
+
+Note: You cannot use SR-IOV when link aggregation (LAG)/bonding is active, and
+vice versa. To enforce this, the driver checks for this mutual exclusion.
+
+
+Displaying VF Statistics on the PF
+----------------------------------
+Use the following command to display the statistics for the PF and its VFs::
+
+ # ip -s link show dev <ethX>
+
+NOTE: The output of this command can be very large due to the maximum number of
+possible VFs.
+
+The PF driver will display a subset of the statistics for the PF and for all
+VFs that are configured. The PF will always print a statistics block for each
+of the possible VFs, and it will show zero for all unconfigured VFs.
+
+
+Configuring VLAN Tagging on SR-IOV Enabled Adapter Ports
+--------------------------------------------------------
+To configure VLAN tagging for the ports on an SR-IOV enabled adapter, use the
+following command. The VLAN configuration should be done before the VF driver
+is loaded or the VM is booted. The VF is not aware of the VLAN tag being
+inserted on transmit and removed on received frames (sometimes called "port
+VLAN" mode).
+
+::
+
+ # ip link set dev <ethX> vf <id> vlan <vlan id>
+
+For example, the following will configure PF eth0 and the first VF on VLAN 10::
+
+ # ip link set dev eth0 vf 0 vlan 10
+
+
+Enabling a VF link if the port is disconnected
+----------------------------------------------
+If the physical function (PF) link is down, you can force link up (from the
+host PF) on any virtual functions (VF) bound to the PF.
+
+For example, to force link up on VF 0 bound to PF eth0::
+
+ # ip link set eth0 vf 0 state enable
+
+Note: If the command does not work, it may not be supported by your system.
+
+
+Setting the MAC Address for a VF
+--------------------------------
+To change the MAC address for the specified VF::
+
+ # ip link set <ethX> vf 0 mac <address>
+
+For example::
+
+ # ip link set <ethX> vf 0 mac 00:01:02:03:04:05
+
+This setting lasts until the PF is reloaded.
+
+NOTE: Assigning a MAC address for a VF from the host will disable any
+subsequent requests to change the MAC address from within the VM. This is a
+security feature. The VM is not aware of this restriction, so if this is
+attempted in the VM, it will trigger MDD events.
+
+
+Trusted VFs and VF Promiscuous Mode
+-----------------------------------
+This feature allows you to designate a particular VF as trusted and allows that
+trusted VF to request selective promiscuous mode on the Physical Function (PF).
+
+To set a VF as trusted or untrusted, enter the following command in the
+Hypervisor::
+
+ # ip link set dev <ethX> vf 1 trust [on|off]
+
+NOTE: It's important to set the VF to trusted before setting promiscuous mode.
+If the VM is not trusted, the PF will ignore promiscuous mode requests from the
+VF. If the VM becomes trusted after the VF driver is loaded, you must make a
+new request to set the VF to promiscuous.
+
+Once the VF is designated as trusted, use the following commands in the VM to
+set the VF to promiscuous mode.
+
+For promiscuous all::
+
+ # ip link set <ethX> promisc on
+ Where <ethX> is a VF interface in the VM
+
+For promiscuous Multicast::
+
+ # ip link set <ethX> allmulticast on
+ Where <ethX> is a VF interface in the VM
+
+NOTE: By default, the ethtool private flag vf-true-promisc-support is set to
+"off," meaning that promiscuous mode for the VF will be limited. To set the
+promiscuous mode for the VF to true promiscuous and allow the VF to see all
+ingress traffic, use the following command::
+
+ # ethtool --set-priv-flags <ethX> vf-true-promisc-support on
+
+The vf-true-promisc-support private flag does not enable promiscuous mode;
+rather, it designates which type of promiscuous mode (limited or true) you will
+get when you enable promiscuous mode using the ip link commands above. Note
+that this is a global setting that affects the entire device. However, the
+vf-true-promisc-support private flag is only exposed to the first PF of the
+device. The PF remains in limited promiscuous mode regardless of the
+vf-true-promisc-support setting.
+
+Next, add a VLAN interface on the VF interface. For example::
+
+ # ip link add link eth2 name eth2.100 type vlan id 100
+
+Note that the order in which you set the VF to promiscuous mode and add the
+VLAN interface does not matter (you can do either first). The result in this
+example is that the VF will get all traffic that is tagged with VLAN 100.
+
+
+Malicious Driver Detection (MDD) for VFs
+----------------------------------------
+Some Intel Ethernet devices use Malicious Driver Detection (MDD) to detect
+malicious traffic from the VF and disable Tx/Rx queues or drop the offending
+packet until a VF driver reset occurs. You can view MDD messages in the PF's
+system log using the dmesg command.
+
+- If the PF driver logs MDD events from the VF, confirm that the correct VF
+ driver is installed.
+- To restore functionality, you can manually reload the VF or VM or enable
+ automatic VF resets.
+- When automatic VF resets are enabled, the PF driver will immediately reset
+ the VF and reenable queues when it detects MDD events on the receive path.
+- If automatic VF resets are disabled, the PF will not automatically reset the
+ VF when it detects MDD events.
+
+To enable or disable automatic VF resets, use the following command::
+
+ # ethtool --set-priv-flags <ethX> mdd-auto-reset-vf on|off
+
+
+MAC and VLAN Anti-Spoofing Feature for VFs
+------------------------------------------
+When a malicious driver on a Virtual Function (VF) interface attempts to send a
+spoofed packet, it is dropped by the hardware and not transmitted.
+
+NOTE: This feature can be disabled for a specific VF::
+
+ # ip link set <ethX> vf <vf id> spoofchk {off|on}
+
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ifconfig command to increase the MTU size. For example, enter the
+following where <ethX> is the interface number::
+
+ # ifconfig <ethX> mtu 9000 up
+
+Alternatively, you can use the ip command as follows::
+
+ # ip link set mtu 9000 dev <ethX>
+ # ip link set up dev <ethX>
+
+This setting is not saved across reboots.
+
+
+NOTE: The maximum MTU setting for jumbo frames is 9702. This corresponds to the
+maximum jumbo frame size of 9728 bytes.
+
+NOTE: This driver will attempt to use multiple page sized buffers to receive
+each jumbo packet. This should help to avoid buffer starvation issues when
+allocating receive packets.
+
+NOTE: Packet loss may have a greater impact on throughput when you use jumbo
+frames. If you observe a drop in performance after enabling jumbo frames,
+enabling flow control may mitigate the issue.
+
+
+Speed and Duplex Configuration
+------------------------------
+In addressing speed and duplex configuration issues, you need to distinguish
+between copper-based adapters and fiber-based adapters.
+
+In the default mode, an Intel(R) Ethernet Network Adapter using copper
+connections will attempt to auto-negotiate with its link partner to determine
+the best setting. If the adapter cannot establish link with the link partner
+using auto-negotiation, you may need to manually configure the adapter and link
+partner to identical settings to establish link and pass packets. This should
+only be needed when attempting to link with an older switch that does not
+support auto-negotiation or one that has been forced to a specific speed or
+duplex mode. Your link partner must match the setting you choose. 1 Gbps speeds
+and higher cannot be forced. Use the autonegotiation advertising setting to
+manually set devices for 1 Gbps and higher.
+
+Speed, duplex, and autonegotiation advertising are configured through the
+ethtool utility. For the latest version, download and install ethtool from the
+following website:
+
+ https://kernel.org/pub/software/network/ethtool/
+
+To see the speed configurations your device supports, run the following::
+
+ # ethtool <ethX>
+
+Caution: Only experienced network administrators should force speed and duplex
+or change autonegotiation advertising manually. The settings at the switch must
+always match the adapter settings. Adapter performance may suffer or your
+adapter may not operate if you configure the adapter differently from your
+switch.
+
+
+Data Center Bridging (DCB)
+--------------------------
+NOTE: The kernel assumes that TC0 is available, and will disable Priority Flow
+Control (PFC) on the device if TC0 is not available. To fix this, ensure TC0 is
+enabled when setting up DCB on your switch.
+
+DCB is a configuration Quality of Service implementation in hardware. It uses
+the VLAN priority tag (802.1p) to filter traffic. That means that there are 8
+different priorities that traffic can be filtered into. It also enables
+priority flow control (802.1Qbb) which can limit or eliminate the number of
+dropped packets during network stress. Bandwidth can be allocated to each of
+these priorities, which is enforced at the hardware level (802.1Qaz).
+
+DCB is normally configured on the network using the DCBX protocol (802.1Qaz), a
+specialization of LLDP (802.1AB). The ice driver supports the following
+mutually exclusive variants of DCBX support:
+
+1) Firmware-based LLDP Agent
+2) Software-based LLDP Agent
+
+In firmware-based mode, firmware intercepts all LLDP traffic and handles DCBX
+negotiation transparently for the user. In this mode, the adapter operates in
+"willing" DCBX mode, receiving DCB settings from the link partner (typically a
+switch). The local user can only query the negotiated DCB configuration. For
+information on configuring DCBX parameters on a switch, please consult the
+switch manufacturer's documentation.
+
+In software-based mode, LLDP traffic is forwarded to the network stack and user
+space, where a software agent can handle it. In this mode, the adapter can
+operate in either "willing" or "nonwilling" DCBX mode and DCB configuration can
+be both queried and set locally. This mode requires the FW-based LLDP Agent to
+be disabled.
+
+NOTE:
+
+- You can enable and disable the firmware-based LLDP Agent using an ethtool
+ private flag. Refer to the "FW-LLDP (Firmware Link Layer Discovery Protocol)"
+ section in this README for more information.
+- In software-based DCBX mode, you can configure DCB parameters using software
+ LLDP/DCBX agents that interface with the Linux kernel's DCB Netlink API. We
+ recommend using OpenLLDP as the DCBX agent when running in software mode. For
+ more information, see the OpenLLDP man pages and
+ https://github.com/intel/openlldp.
+- The driver implements the DCB netlink interface layer to allow the user space
+ to communicate with the driver and query DCB configuration for the port.
+- iSCSI with DCB is not supported.
+
+
+FW-LLDP (Firmware Link Layer Discovery Protocol)
+------------------------------------------------
+Use ethtool to change FW-LLDP settings. The FW-LLDP setting is per port and
+persists across boots.
+
+To enable LLDP::
+
+ # ethtool --set-priv-flags <ethX> fw-lldp-agent on
+
+To disable LLDP::
+
+ # ethtool --set-priv-flags <ethX> fw-lldp-agent off
+
+To check the current LLDP setting::
+
+ # ethtool --show-priv-flags <ethX>
+
+NOTE: You must enable the UEFI HII "LLDP Agent" attribute for this setting to
+take effect. If "LLDP AGENT" is set to disabled, you cannot enable it from the
+OS.
+
+
+Flow Control
+------------
+Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable
+receiving and transmitting pause frames for ice. When transmit is enabled,
+pause frames are generated when the receive packet buffer crosses a predefined
+threshold. When receive is enabled, the transmit unit will halt for the time
+delay specified when a pause frame is received.
+
+NOTE: You must have a flow control capable link partner.
+
+Flow Control is disabled by default.
+
+Use ethtool to change the flow control settings.
+
+To enable or disable Rx or Tx Flow Control::
+
+ # ethtool -A <ethX> rx <on|off> tx <on|off>
+
+Note: This command only enables or disables Flow Control if auto-negotiation is
+disabled. If auto-negotiation is enabled, this command changes the parameters
+used for auto-negotiation with the link partner.
+
+Note: Flow Control auto-negotiation is part of link auto-negotiation. Depending
+on your device, you may not be able to change the auto-negotiation setting.
+
+NOTE:
+
+- The ice driver requires flow control on both the port and link partner. If
+ flow control is disabled on one of the sides, the port may appear to hang on
+ heavy traffic.
+- You may encounter issues with link-level flow control (LFC) after disabling
+ DCB. The LFC status may show as enabled but traffic is not paused. To resolve
+ this issue, disable and reenable LFC using ethtool::
+
+ # ethtool -A <ethX> rx off tx off
+ # ethtool -A <ethX> rx on tx on
+
+
+NAPI
+----
+This driver supports NAPI (Rx polling mode).
+For more information on NAPI, see
+https://www.linuxfoundation.org/collaborate/workgroups/networking/napi
+
+
+MACVLAN
+-------
+This driver supports MACVLAN. Kernel support for MACVLAN can be tested by
+checking if the MACVLAN driver is loaded. You can run 'lsmod | grep macvlan' to
+see if the MACVLAN driver is loaded or run 'modprobe macvlan' to try to load
+the MACVLAN driver.
+
+NOTE:
+
+- In passthru mode, you can only set up one MACVLAN device. It will inherit the
+ MAC address of the underlying PF (Physical Function) device.
+
+
+IEEE 802.1ad (QinQ) Support
+---------------------------
+The IEEE 802.1ad standard, informally known as QinQ, allows for multiple VLAN
+IDs within a single Ethernet frame. VLAN IDs are sometimes referred to as
+"tags," and multiple VLAN IDs are thus referred to as a "tag stack." Tag stacks
+allow L2 tunneling and the ability to segregate traffic within a particular
+VLAN ID, among other uses.
+
+NOTES:
+
+- Receive checksum offloads and VLAN acceleration are not supported for 802.1ad
+ (QinQ) packets.
+
+- 0x88A8 traffic will not be received unless VLAN stripping is disabled with
+ the following command::
+
+ # ethool -K <ethX> rxvlan off
+
+- 0x88A8/0x8100 double VLANs cannot be used with 0x8100 or 0x8100/0x8100 VLANS
+ configured on the same port. 0x88a8/0x8100 traffic will not be received if
+ 0x8100 VLANs are configured.
+
+- The VF can only transmit 0x88A8/0x8100 (i.e., 802.1ad/802.1Q) traffic if:
+
+ 1) The VF is not assigned a port VLAN.
+ 2) spoofchk is disabled from the PF. If you enable spoofchk, the VF will
+ not transmit 0x88A8/0x8100 traffic.
+
+- The VF may not receive all network traffic based on the Inner VLAN header
+ when VF true promiscuous mode (vf-true-promisc-support) and double VLANs are
+ enabled in SR-IOV mode.
+
+The following are examples of how to configure 802.1ad (QinQ)::
+
+ # ip link add link eth0 eth0.24 type vlan proto 802.1ad id 24
+ # ip link add link eth0.24 eth0.24.371 type vlan proto 802.1Q id 371
+
+ Where "24" and "371" are example VLAN IDs.
+
+
+Tunnel/Overlay Stateless Offloads
+---------------------------------
+Supported tunnels and overlays include VXLAN, GENEVE, and others depending on
+hardware and software configuration. Stateless offloads are enabled by default.
+
+To view the current state of all offloads::
+
+ # ethtool -k <ethX>
+
+
+UDP Segmentation Offload
+------------------------
+Allows the adapter to offload transmit segmentation of UDP packets with
+payloads up to 64K into valid Ethernet frames. Because the adapter hardware is
+able to complete data segmentation much faster than operating system software,
+this feature may improve transmission performance.
+In addition, the adapter may use fewer CPU resources.
+
+NOTE:
+
+- The application sending UDP packets must support UDP segmentation offload.
+
+To enable/disable UDP Segmentation Offload, issue the following command::
+
+ # ethtool -K <ethX> tx-udp-segmentation [off|on]
+
+
+Performance Optimization
+========================
+Driver defaults are meant to fit a wide variety of workloads, but if further
+optimization is required, we recommend experimenting with the following
+settings.
+
+
+Rx Descriptor Ring Size
+-----------------------
+To reduce the number of Rx packet discards, increase the number of Rx
+descriptors for each Rx ring using ethtool.
+
+ Check if the interface is dropping Rx packets due to buffers being full
+ (rx_dropped.nic can mean that there is no PCIe bandwidth)::
+
+ # ethtool -S <ethX> | grep "rx_dropped"
+
+ If the previous command shows drops on queues, it may help to increase
+ the number of descriptors using 'ethtool -G'::
+
+ # ethtool -G <ethX> rx <N>
+ Where <N> is the desired number of ring entries/descriptors
+
+ This can provide temporary buffering for issues that create latency while
+ the CPUs process descriptors.
+
+
+Interrupt Rate Limiting
+-----------------------
+This driver supports an adaptive interrupt throttle rate (ITR) mechanism that
+is tuned for general workloads. The user can customize the interrupt rate
+control for specific workloads, via ethtool, adjusting the number of
+microseconds between interrupts.
+
+To set the interrupt rate manually, you must disable adaptive mode::
+
+ # ethtool -C <ethX> adaptive-rx off adaptive-tx off
+
+For lower CPU utilization:
+
+ Disable adaptive ITR and lower Rx and Tx interrupts. The examples below
+ affect every queue of the specified interface.
+
+ Setting rx-usecs and tx-usecs to 80 will limit interrupts to about
+ 12,500 interrupts per second per queue::
+
+ # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs 80 tx-usecs 80
+
+For reduced latency:
+
+ Disable adaptive ITR and ITR by setting rx-usecs and tx-usecs to 0
+ using ethtool::
+
+ # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs 0 tx-usecs 0
+
+Per-queue interrupt rate settings:
+
+ The following examples are for queues 1 and 3, but you can adjust other
+ queues.
+
+ To disable Rx adaptive ITR and set static Rx ITR to 10 microseconds or
+ about 100,000 interrupts/second, for queues 1 and 3::
+
+ # ethtool --per-queue <ethX> queue_mask 0xa --coalesce adaptive-rx off
+ rx-usecs 10
+
+ To show the current coalesce settings for queues 1 and 3::
+
+ # ethtool --per-queue <ethX> queue_mask 0xa --show-coalesce
+
+Bounding interrupt rates using rx-usecs-high:
+
+ :Valid Range: 0-236 (0=no limit)
+
+ The range of 0-236 microseconds provides an effective range of 4,237 to
+ 250,000 interrupts per second. The value of rx-usecs-high can be set
+ independently of rx-usecs and tx-usecs in the same ethtool command, and is
+ also independent of the adaptive interrupt moderation algorithm. The
+ underlying hardware supports granularity in 4-microsecond intervals, so
+ adjacent values may result in the same interrupt rate.
+
+ The following command would disable adaptive interrupt moderation, and allow
+ a maximum of 5 microseconds before indicating a receive or transmit was
+ complete. However, instead of resulting in as many as 200,000 interrupts per
+ second, it limits total interrupts per second to 50,000 via the rx-usecs-high
+ parameter.
+
+ ::
+
+ # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs-high 20
+ rx-usecs 5 tx-usecs 5
+
+
+Virtualized Environments
+------------------------
+In addition to the other suggestions in this section, the following may be
+helpful to optimize performance in VMs.
+
+ Using the appropriate mechanism (vcpupin) in the VM, pin the CPUs to
+ individual LCPUs, making sure to use a set of CPUs included in the
+ device's local_cpulist: ``/sys/class/net/<ethX>/device/local_cpulist``.
+
+ Configure as many Rx/Tx queues in the VM as available. (See the iavf driver
+ documentation for the number of queues supported.) For example::
+
+ # ethtool -L <virt_interface> rx <max> tx <max>
- -> Device Drivers
- -> Network device support (NETDEVICES [=y])
- -> Ethernet driver support
- -> Intel devices
- -> Intel(R) Ethernet Connection E800 Series Support
Support
=======
For general information, go to the Intel support website at:
-
https://www.intel.com/support/
or the Intel Wired Networking project hosted by Sourceforge at:
-
https://sourceforge.net/projects/e1000
If an issue is identified with the released source code on a supported kernel
with a supported adapter, email the specific information related to the issue
+
+
+Trademarks
+==========
+Intel is a trademark or registered trademark of Intel Corporation or its
+subsidiaries in the United States and/or other countries.
+
+* Other names and brands may be claimed as the property of others.
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 61a358301f12..581bfce86dca 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -180,7 +180,7 @@ min_adv_mss - INTEGER
fib_notify_on_flag_change - INTEGER
Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/
- RTM_F_TRAP flags are changed.
+ RTM_F_TRAP/RTM_F_OFFLOAD_FAILED flags are changed.
After installing a route to the kernel, user space receives an
acknowledgment, which means the route was installed in the kernel,
@@ -197,6 +197,7 @@ fib_notify_on_flag_change - INTEGER
- 0 - Do not emit notifications.
- 1 - Emit notifications.
+ - 2 - Emit notifications only for RTM_F_OFFLOAD_FAILED flag change.
IP Fragmentation:
@@ -1445,6 +1446,25 @@ rp_filter - INTEGER
Default value is 0. Note that some distributions enable it
in startup scripts.
+src_valid_mark - BOOLEAN
+ - 0 - The fwmark of the packet is not included in reverse path
+ route lookup. This allows for asymmetric routing configurations
+ utilizing the fwmark in only one direction, e.g., transparent
+ proxying.
+
+ - 1 - The fwmark of the packet is included in reverse path route
+ lookup. This permits rp_filter to function when the fwmark is
+ used for routing traffic in both directions.
+
+ This setting also affects the utilization of fmwark when
+ performing source address selection for ICMP replies, or
+ determining addresses stored for the IPOPT_TS_TSANDADDR and
+ IPOPT_RR IP options.
+
+ The max value from conf/{all,interface}/src_valid_mark is used.
+
+ Default value is 0.
+
arp_filter - BOOLEAN
- 1 - Allows you to have multiple network interfaces on the same
subnet, and have the ARPs for each interface be answered
@@ -1797,7 +1817,7 @@ nexthop_compat_mode - BOOLEAN
fib_notify_on_flag_change - INTEGER
Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/
- RTM_F_TRAP flags are changed.
+ RTM_F_TRAP/RTM_F_OFFLOAD_FAILED flags are changed.
After installing a route to the kernel, user space receives an
acknowledgment, which means the route was installed in the kernel,
@@ -1814,6 +1834,7 @@ fib_notify_on_flag_change - INTEGER
- 0 - Do not emit notifications.
- 1 - Emit notifications.
+ - 2 - Emit notifications only for RTM_F_OFFLOAD_FAILED flag change.
IPv6 Fragmentation:
diff --git a/MAINTAINERS b/MAINTAINERS
index d1b0057a9797..cbf4b94f89d4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3445,6 +3445,15 @@ F: Documentation/devicetree/bindings/mips/brcm/
F: arch/mips/bcm47xx/*
F: arch/mips/include/asm/mach-bcm47xx/*
+BROADCOM BCM4908 ETHERNET DRIVER
+M: Rafał Miłecki <[email protected]>
+S: Maintained
+F: Documentation/devicetree/bindings/net/brcm,bcm4908enet.yaml
+F: drivers/net/ethernet/broadcom/bcm4908enet.*
+F: drivers/net/ethernet/broadcom/unimac.h
+
BROADCOM BCM5301X ARM ARCHITECTURE
M: Hauke Mehrtens <[email protected]>
M: Rafał Miłecki <[email protected]>
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 4bdf8fbe75a6..bcf9e0a410fd 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -51,6 +51,14 @@ config B44_PCI
depends on B44_PCI_AUTOSELECT && B44_PCICORE_AUTOSELECT
default y
+config BCM4908ENET
+ tristate "Broadcom BCM4908 internal mac support"
+ depends on ARCH_BCM4908 || COMPILE_TEST
+ default y
+ help
+ This driver supports Ethernet controller integrated into Broadcom
+ BCM4908 family SoCs.
+
config BCM63XX_ENET
tristate "Broadcom 63xx internal mac support"
depends on BCM63XX
diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile
index 7046ad6d3d0e..379012de3569 100644
--- a/drivers/net/ethernet/broadcom/Makefile
+++ b/drivers/net/ethernet/broadcom/Makefile
@@ -4,6 +4,7 @@
#
obj-$(CONFIG_B44) += b44.o
+obj-$(CONFIG_BCM4908ENET) += bcm4908enet.o
obj-$(CONFIG_BCM63XX_ENET) += bcm63xx_enet.o
obj-$(CONFIG_BCMGENET) += genet/
obj-$(CONFIG_BNX2) += bnx2.o
diff --git a/drivers/net/ethernet/broadcom/bcm4908enet.c b/drivers/net/ethernet/broadcom/bcm4908enet.c
new file mode 100644
index 000000000000..d68b328e7596
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bcm4908enet.c
@@ -0,0 +1,676 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Rafał Miłecki <[email protected]>
+ */
+
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "bcm4908enet.h"
+#include "unimac.h"
+
+#define ENET_DMA_CH_RX_CFG ENET_DMA_CH0_CFG
+#define ENET_DMA_CH_TX_CFG ENET_DMA_CH1_CFG
+#define ENET_DMA_CH_RX_STATE_RAM ENET_DMA_CH0_STATE_RAM
+#define ENET_DMA_CH_TX_STATE_RAM ENET_DMA_CH1_STATE_RAM
+
+#define ENET_TX_BDS_NUM 200
+#define ENET_RX_BDS_NUM 200
+#define ENET_RX_BDS_NUM_MAX 8192
+
+#define ENET_DMA_INT_DEFAULTS (ENET_DMA_CH_CFG_INT_DONE | \
+ ENET_DMA_CH_CFG_INT_NO_DESC | \
+ ENET_DMA_CH_CFG_INT_BUFF_DONE)
+#define ENET_DMA_MAX_BURST_LEN 8 /* in 64 bit words */
+
+#define ENET_MTU_MIN 60
+#define ENET_MTU_MAX 1500 /* Is it possible to support 2044? */
+#define ENET_MTU_MAX_EXTRA_SIZE 32 /* L2 */
+
+struct bcm4908enet_dma_ring_bd {
+ __le32 ctl;
+ __le32 addr;
+} __packed;
+
+struct bcm4908enet_dma_ring_slot {
+ struct sk_buff *skb;
+ unsigned int len;
+ dma_addr_t dma_addr;
+};
+
+struct bcm4908enet_dma_ring {
+ int is_tx;
+ int read_idx;
+ int write_idx;
+ int length;
+ u16 cfg_block;
+ u16 st_ram_block;
+
+ union {
+ void *cpu_addr;
+ struct bcm4908enet_dma_ring_bd *buf_desc;
+ };
+ dma_addr_t dma_addr;
+
+ struct bcm4908enet_dma_ring_slot *slots;
+};
+
+struct bcm4908enet {
+ struct device *dev;
+ struct net_device *netdev;
+ struct napi_struct napi;
+ void __iomem *base;
+
+ struct bcm4908enet_dma_ring tx_ring;
+ struct bcm4908enet_dma_ring rx_ring;
+};
+
+/***
+ * R/W ops
+ */
+
+static inline u32 enet_read(struct bcm4908enet *enet, u16 offset)
+{
+ return readl(enet->base + offset);
+}
+
+static inline void enet_write(struct bcm4908enet *enet, u16 offset, u32 value)
+{
+ writel(value, enet->base + offset);
+}
+
+static inline void enet_maskset(struct bcm4908enet *enet, u16 offset, u32 mask, u32 set)
+{
+ u32 val;
+
+ WARN_ON(set & ~mask);
+
+ val = enet_read(enet, offset);
+ val = (val & ~mask) | (set & mask);
+ enet_write(enet, offset, val);
+}
+
+static inline void enet_set(struct bcm4908enet *enet, u16 offset, u32 set)
+{
+ enet_maskset(enet, offset, set, set);
+}
+
+static inline u32 enet_umac_read(struct bcm4908enet *enet, u16 offset)
+{
+ return enet_read(enet, ENET_UNIMAC + offset);
+}
+
+static inline void enet_umac_write(struct bcm4908enet *enet, u16 offset, u32 value)
+{
+ enet_write(enet, ENET_UNIMAC + offset, value);
+}
+
+static inline void enet_umac_maskset(struct bcm4908enet *enet, u16 offset, u32 mask, u32 set)
+{
+ enet_maskset(enet, ENET_UNIMAC + offset, mask, set);
+}
+
+static inline void enet_umac_set(struct bcm4908enet *enet, u16 offset, u32 set)
+{
+ enet_set(enet, ENET_UNIMAC + offset, set);
+}
+
+/***
+ * Helpers
+ */
+
+static void bcm4908enet_intrs_on(struct bcm4908enet *enet)
+{
+ enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS);
+}
+
+static void bcm4908enet_intrs_off(struct bcm4908enet *enet)
+{
+ enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, 0);
+}
+
+static void bcm4908enet_intrs_ack(struct bcm4908enet *enet)
+{
+ enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS);
+}
+
+/***
+ * DMA
+ */
+
+static int bcm4908_dma_alloc_buf_descs(struct bcm4908enet *enet, struct bcm4908enet_dma_ring *ring)
+{
+ int size = ring->length * sizeof(struct bcm4908enet_dma_ring_bd);
+ struct device *dev = enet->dev;
+
+ ring->cpu_addr = dma_alloc_coherent(dev, size, &ring->dma_addr, GFP_KERNEL);
+ if (!ring->cpu_addr)
+ return -ENOMEM;
+
+ if (((uintptr_t)ring->cpu_addr) & (0x40 - 1)) {
+ dev_err(dev, "Invalid DMA ring alignment\n");
+ goto err_free_buf_descs;
+ }
+
+ ring->slots = kzalloc(ring->length * sizeof(*ring->slots), GFP_KERNEL);
+ if (!ring->slots)
+ goto err_free_buf_descs;
+
+ memset(ring->cpu_addr, 0, size);
+
+ ring->read_idx = 0;
+ ring->write_idx = 0;
+
+ return 0;
+
+err_free_buf_descs:
+ dma_free_coherent(dev, size, ring->cpu_addr, ring->dma_addr);
+ return -ENOMEM;
+}
+
+static void bcm4908enet_dma_free(struct bcm4908enet *enet)
+{
+ struct bcm4908enet_dma_ring *tx_ring = &enet->tx_ring;
+ struct bcm4908enet_dma_ring *rx_ring = &enet->rx_ring;
+ struct device *dev = enet->dev;
+ int size;
+
+ size = rx_ring->length * sizeof(struct bcm4908enet_dma_ring_bd);
+ if (rx_ring->cpu_addr)
+ dma_free_coherent(dev, size, rx_ring->cpu_addr, rx_ring->dma_addr);
+ kfree(rx_ring->slots);
+
+ size = tx_ring->length * sizeof(struct bcm4908enet_dma_ring_bd);
+ if (tx_ring->cpu_addr)
+ dma_free_coherent(dev, size, tx_ring->cpu_addr, tx_ring->dma_addr);
+ kfree(tx_ring->slots);
+}
+
+static int bcm4908enet_dma_alloc(struct bcm4908enet *enet)
+{
+ struct bcm4908enet_dma_ring *tx_ring = &enet->tx_ring;
+ struct bcm4908enet_dma_ring *rx_ring = &enet->rx_ring;
+ struct device *dev = enet->dev;
+ int err;
+
+ tx_ring->length = ENET_TX_BDS_NUM;
+ tx_ring->is_tx = 1;
+ tx_ring->cfg_block = ENET_DMA_CH_TX_CFG;
+ tx_ring->st_ram_block = ENET_DMA_CH_TX_STATE_RAM;
+ err = bcm4908_dma_alloc_buf_descs(enet, tx_ring);
+ if (err) {
+ dev_err(dev, "Failed to alloc TX buf descriptors: %d\n", err);
+ return err;
+ }
+
+ rx_ring->length = ENET_RX_BDS_NUM;
+ rx_ring->is_tx = 0;
+ rx_ring->cfg_block = ENET_DMA_CH_RX_CFG;
+ rx_ring->st_ram_block = ENET_DMA_CH_RX_STATE_RAM;
+ err = bcm4908_dma_alloc_buf_descs(enet, rx_ring);
+ if (err) {
+ dev_err(dev, "Failed to alloc RX buf descriptors: %d\n", err);
+ bcm4908enet_dma_free(enet);
+ return err;
+ }
+
+ return 0;
+}
+
+static void bcm4908enet_dma_reset(struct bcm4908enet *enet)
+{
+ struct bcm4908enet_dma_ring *rings[] = { &enet->rx_ring, &enet->tx_ring };
+ int i;
+
+ /* Disable the DMA controller and channel */
+ for (i = 0; i < ARRAY_SIZE(rings); i++)
+ enet_write(enet, rings[i]->cfg_block + ENET_DMA_CH_CFG, 0);
+ enet_maskset(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_MASTER_EN, 0);
+
+ /* Reset channels state */
+ for (i = 0; i < ARRAY_SIZE(rings); i++) {
+ struct bcm4908enet_dma_ring *ring = rings[i];
+
+ enet_write(enet, ring->st_ram_block + ENET_DMA_CH_STATE_RAM_BASE_DESC_PTR, 0);
+ enet_write(enet, ring->st_ram_block + ENET_DMA_CH_STATE_RAM_STATE_DATA, 0);
+ enet_write(enet, ring->st_ram_block + ENET_DMA_CH_STATE_RAM_DESC_LEN_STATUS, 0);
+ enet_write(enet, ring->st_ram_block + ENET_DMA_CH_STATE_RAM_DESC_BASE_BUFPTR, 0);
+ }
+}
+
+static int bcm4908enet_dma_alloc_rx_buf(struct bcm4908enet *enet, unsigned int idx)
+{
+ struct bcm4908enet_dma_ring_bd *buf_desc = &enet->rx_ring.buf_desc[idx];
+ struct bcm4908enet_dma_ring_slot *slot = &enet->rx_ring.slots[idx];
+ struct device *dev = enet->dev;
+ u32 tmp;
+ int err;
+
+ slot->len = ENET_MTU_MAX + ENET_MTU_MAX_EXTRA_SIZE;
+
+ slot->skb = netdev_alloc_skb(enet->netdev, slot->len);
+ if (!slot->skb)
+ return -ENOMEM;
+
+ slot->dma_addr = dma_map_single(dev, slot->skb->data, slot->len, DMA_FROM_DEVICE);
+ err = dma_mapping_error(dev, slot->dma_addr);
+ if (err) {
+ dev_err(dev, "Failed to map DMA buffer: %d\n", err);
+ kfree_skb(slot->skb);
+ slot->skb = NULL;
+ return err;
+ }
+
+ tmp = slot->len << DMA_CTL_LEN_DESC_BUFLENGTH_SHIFT;
+ tmp |= DMA_CTL_STATUS_OWN;
+ if (idx == enet->rx_ring.length - 1)
+ tmp |= DMA_CTL_STATUS_WRAP;
+ buf_desc->ctl = cpu_to_le32(tmp);
+ buf_desc->addr = cpu_to_le32(slot->dma_addr);
+
+ return 0;
+}
+
+static void bcm4908enet_dma_ring_init(struct bcm4908enet *enet,
+ struct bcm4908enet_dma_ring *ring)
+{
+ int reset_channel = 0; /* We support only 1 main channel (with TX and RX) */
+ int reset_subch = ring->is_tx ? 1 : 0;
+
+ /* Reset the DMA channel */
+ enet_write(enet, ENET_DMA_CTRL_CHANNEL_RESET, BIT(reset_channel * 2 + reset_subch));
+ enet_write(enet, ENET_DMA_CTRL_CHANNEL_RESET, 0);
+
+ enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG, 0);
+ enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_MAX_BURST, ENET_DMA_MAX_BURST_LEN);
+ enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, 0);
+
+ enet_write(enet, ring->st_ram_block + ENET_DMA_CH_STATE_RAM_BASE_DESC_PTR,
+ (uint32_t)ring->dma_addr);
+}
+
+static void bcm4908enet_dma_uninit(struct bcm4908enet *enet)
+{
+ struct bcm4908enet_dma_ring *rx_ring = &enet->rx_ring;
+ struct bcm4908enet_dma_ring_slot *slot;
+ struct device *dev = enet->dev;
+ int i;
+
+ for (i = rx_ring->length - 1; i >= 0; i--) {
+ slot = &rx_ring->slots[i];
+ if (!slot->skb)
+ continue;
+ dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_FROM_DEVICE);
+ kfree_skb(slot->skb);
+ slot->skb = NULL;
+ }
+}
+
+static int bcm4908enet_dma_init(struct bcm4908enet *enet)
+{
+ struct bcm4908enet_dma_ring *rx_ring = &enet->rx_ring;
+ struct device *dev = enet->dev;
+ int err;
+ int i;
+
+ for (i = 0; i < rx_ring->length; i++) {
+ err = bcm4908enet_dma_alloc_rx_buf(enet, i);
+ if (err) {
+ dev_err(dev, "Failed to alloc RX buffer: %d\n", err);
+ bcm4908enet_dma_uninit(enet);
+ return err;
+ }
+ }
+
+ bcm4908enet_dma_ring_init(enet, &enet->tx_ring);
+ bcm4908enet_dma_ring_init(enet, &enet->rx_ring);
+
+ return 0;
+}
+
+static void bcm4908enet_dma_tx_ring_ensable(struct bcm4908enet *enet,
+ struct bcm4908enet_dma_ring *ring)
+{
+ enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG, ENET_DMA_CH_CFG_ENABLE);
+}
+
+static void bcm4908enet_dma_tx_ring_disable(struct bcm4908enet *enet,
+ struct bcm4908enet_dma_ring *ring)
+{
+ enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG, 0);
+}
+
+static void bcm4908enet_dma_rx_ring_enable(struct bcm4908enet *enet,
+ struct bcm4908enet_dma_ring *ring)
+{
+ enet_set(enet, ring->cfg_block + ENET_DMA_CH_CFG, ENET_DMA_CH_CFG_ENABLE);
+}
+
+static void bcm4908enet_dma_rx_ring_disable(struct bcm4908enet *enet,
+ struct bcm4908enet_dma_ring *ring)
+{
+ unsigned long deadline;
+ u32 tmp;
+
+ enet_maskset(enet, ring->cfg_block + ENET_DMA_CH_CFG, ENET_DMA_CH_CFG_ENABLE, 0);
+
+ deadline = jiffies + usecs_to_jiffies(2000);
+ do {
+ tmp = enet_read(enet, ring->cfg_block + ENET_DMA_CH_CFG);
+ if (!(tmp & ENET_DMA_CH_CFG_ENABLE))
+ return;
+ enet_maskset(enet, ring->cfg_block + ENET_DMA_CH_CFG, ENET_DMA_CH_CFG_ENABLE, 0);
+ usleep_range(10, 30);
+ } while (!time_after_eq(jiffies, deadline));
+
+ dev_warn(enet->dev, "Timeout waiting for DMA TX stop\n");
+}
+
+/***
+ * Ethernet driver
+ */
+
+static void bcm4908enet_gmac_init(struct bcm4908enet *enet)
+{
+ u32 cmd;
+
+ cmd = enet_umac_read(enet, UMAC_CMD);
+ enet_umac_write(enet, UMAC_CMD, cmd | CMD_SW_RESET);
+ enet_umac_write(enet, UMAC_CMD, cmd & ~CMD_SW_RESET);
+
+ enet_set(enet, ENET_FLUSH, ENET_FLUSH_RXFIFO_FLUSH | ENET_FLUSH_TXFIFO_FLUSH);
+ enet_maskset(enet, ENET_FLUSH, ENET_FLUSH_RXFIFO_FLUSH | ENET_FLUSH_TXFIFO_FLUSH, 0);
+
+ enet_set(enet, ENET_MIB_CTRL, ENET_MIB_CTRL_CLR_MIB);
+ enet_maskset(enet, ENET_MIB_CTRL, ENET_MIB_CTRL_CLR_MIB, 0);
+
+ cmd = enet_umac_read(enet, UMAC_CMD);
+ cmd &= ~(CMD_SPEED_MASK << CMD_SPEED_SHIFT);
+ cmd &= ~CMD_TX_EN;
+ cmd &= ~CMD_RX_EN;
+ cmd |= CMD_SPEED_1000 << CMD_SPEED_SHIFT;
+ enet_umac_write(enet, UMAC_CMD, cmd);
+
+ enet_maskset(enet, ENET_GMAC_STATUS,
+ ENET_GMAC_STATUS_ETH_SPEED_MASK |
+ ENET_GMAC_STATUS_HD |
+ ENET_GMAC_STATUS_AUTO_CFG_EN |
+ ENET_GMAC_STATUS_LINK_UP,
+ ENET_GMAC_STATUS_ETH_SPEED_1000 |
+ ENET_GMAC_STATUS_AUTO_CFG_EN |
+ ENET_GMAC_STATUS_LINK_UP);
+}
+
+static irqreturn_t bcm4908enet_irq_handler(int irq, void *dev_id)
+{
+ struct bcm4908enet *enet = dev_id;
+
+ bcm4908enet_intrs_off(enet);
+ bcm4908enet_intrs_ack(enet);
+
+ napi_schedule(&enet->napi);
+
+ return IRQ_HANDLED;
+}
+
+static int bcm4908enet_open(struct net_device *netdev)
+{
+ struct bcm4908enet *enet = netdev_priv(netdev);
+ struct device *dev = enet->dev;
+ int err;
+
+ err = request_irq(netdev->irq, bcm4908enet_irq_handler, 0, "enet", enet);
+ if (err) {
+ dev_err(dev, "Failed to request IRQ %d: %d\n", netdev->irq, err);
+ return err;
+ }
+
+ bcm4908enet_gmac_init(enet);
+ bcm4908enet_dma_reset(enet);
+ bcm4908enet_dma_init(enet);
+
+ enet_umac_set(enet, UMAC_CMD, CMD_TX_EN | CMD_RX_EN);
+
+ enet_set(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_MASTER_EN);
+ enet_maskset(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_FLOWC_CH1_EN, 0);
+ bcm4908enet_dma_rx_ring_enable(enet, &enet->rx_ring);
+
+ napi_enable(&enet->napi);
+ netif_carrier_on(netdev);
+ netif_start_queue(netdev);
+
+ bcm4908enet_intrs_ack(enet);
+ bcm4908enet_intrs_on(enet);
+
+ return 0;
+}
+
+static int bcm4908enet_stop(struct net_device *netdev)
+{
+ struct bcm4908enet *enet = netdev_priv(netdev);
+
+ netif_stop_queue(netdev);
+ netif_carrier_off(netdev);
+ napi_disable(&enet->napi);
+
+ bcm4908enet_dma_rx_ring_disable(enet, &enet->rx_ring);
+ bcm4908enet_dma_tx_ring_disable(enet, &enet->tx_ring);
+
+ bcm4908enet_dma_uninit(enet);
+
+ free_irq(enet->netdev->irq, enet);
+
+ return 0;
+}
+
+static int bcm4908enet_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct bcm4908enet *enet = netdev_priv(netdev);
+ struct bcm4908enet_dma_ring *ring = &enet->tx_ring;
+ struct bcm4908enet_dma_ring_slot *slot;
+ struct device *dev = enet->dev;
+ struct bcm4908enet_dma_ring_bd *buf_desc;
+ int free_buf_descs;
+ u32 tmp;
+
+ /* Free transmitted skbs */
+ while (ring->read_idx != ring->write_idx) {
+ buf_desc = &ring->buf_desc[ring->read_idx];
+ if (buf_desc->ctl & DMA_CTL_STATUS_OWN)
+ break;
+ slot = &ring->slots[ring->read_idx];
+
+ dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE);
+ dev_kfree_skb(slot->skb);
+ if (++ring->read_idx == ring->length)
+ ring->read_idx = 0;
+ }
+
+ /* Don't use the last empty buf descriptor */
+ if (ring->read_idx <= ring->write_idx)
+ free_buf_descs = ring->read_idx - ring->write_idx + ring->length;
+ else
+ free_buf_descs = ring->read_idx - ring->write_idx;
+ if (free_buf_descs < 2)
+ return NETDEV_TX_BUSY;
+
+ /* Hardware removes OWN bit after sending data */
+ buf_desc = &ring->buf_desc[ring->write_idx];
+ if (unlikely(le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN)) {
+ netif_stop_queue(netdev);
+ return NETDEV_TX_BUSY;
+ }
+
+ slot = &ring->slots[ring->write_idx];
+ slot->skb = skb;
+ slot->len = skb->len;
+ slot->dma_addr = dma_map_single(dev, skb->data, skb->len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, slot->dma_addr)))
+ return NETDEV_TX_BUSY;
+
+ tmp = skb->len << DMA_CTL_LEN_DESC_BUFLENGTH_SHIFT;
+ tmp |= DMA_CTL_STATUS_OWN;
+ tmp |= DMA_CTL_STATUS_SOP;
+ tmp |= DMA_CTL_STATUS_EOP;
+ tmp |= DMA_CTL_STATUS_APPEND_CRC;
+ if (ring->write_idx + 1 == ring->length - 1)
+ tmp |= DMA_CTL_STATUS_WRAP;
+
+ buf_desc->addr = cpu_to_le32((uint32_t)slot->dma_addr);
+ buf_desc->ctl = cpu_to_le32(tmp);
+
+ bcm4908enet_dma_tx_ring_ensable(enet, &enet->tx_ring);
+
+ if (++ring->write_idx == ring->length - 1)
+ ring->write_idx = 0;
+ enet->netdev->stats.tx_bytes += skb->len;
+ enet->netdev->stats.tx_packets++;
+
+ return NETDEV_TX_OK;
+}
+
+static int bcm4908enet_poll(struct napi_struct *napi, int weight)
+{
+ struct bcm4908enet *enet = container_of(napi, struct bcm4908enet, napi);
+ struct device *dev = enet->dev;
+ int handled = 0;
+
+ while (handled < weight) {
+ struct bcm4908enet_dma_ring_bd *buf_desc;
+ struct bcm4908enet_dma_ring_slot slot;
+ u32 ctl;
+ int len;
+ int err;
+
+ buf_desc = &enet->rx_ring.buf_desc[enet->rx_ring.read_idx];
+ ctl = le32_to_cpu(buf_desc->ctl);
+ if (ctl & DMA_CTL_STATUS_OWN)
+ break;
+
+ slot = enet->rx_ring.slots[enet->rx_ring.read_idx];
+
+ /* Provide new buffer before unpinning the old one */
+ err = bcm4908enet_dma_alloc_rx_buf(enet, enet->rx_ring.read_idx);
+ if (err)
+ break;
+
+ if (++enet->rx_ring.read_idx == enet->rx_ring.length)
+ enet->rx_ring.read_idx = 0;
+
+ len = (ctl & DMA_CTL_LEN_DESC_BUFLENGTH) >> DMA_CTL_LEN_DESC_BUFLENGTH_SHIFT;
+
+ if (len < ENET_MTU_MIN ||
+ (ctl & (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) != (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) {
+ enet->netdev->stats.rx_dropped++;
+ break;
+ }
+
+ dma_unmap_single(dev, slot.dma_addr, slot.len, DMA_FROM_DEVICE);
+
+ skb_put(slot.skb, len - 4 + 2);
+ slot.skb->protocol = eth_type_trans(slot.skb, enet->netdev);
+ netif_receive_skb(slot.skb);
+
+ enet->netdev->stats.rx_packets++;
+ enet->netdev->stats.rx_bytes += len;
+ }
+
+ if (handled < weight) {
+ napi_complete_done(napi, handled);
+ bcm4908enet_intrs_on(enet);
+ }
+
+ return handled;
+}
+
+static const struct net_device_ops bcm96xx_netdev_ops = {
+ .ndo_open = bcm4908enet_open,
+ .ndo_stop = bcm4908enet_stop,
+ .ndo_start_xmit = bcm4908enet_start_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
+};
+
+static int bcm4908enet_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct net_device *netdev;
+ struct bcm4908enet *enet;
+ int err;
+
+ netdev = devm_alloc_etherdev(dev, sizeof(*enet));
+ if (!netdev)
+ return -ENOMEM;
+
+ enet = netdev_priv(netdev);
+ enet->dev = dev;
+ enet->netdev = netdev;
+
+ enet->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(enet->base)) {
+ dev_err(dev, "Failed to map registers: %ld\n", PTR_ERR(enet->base));
+ return PTR_ERR(enet->base);
+ }
+
+ netdev->irq = platform_get_irq_byname(pdev, "rx");
+ if (netdev->irq < 0)
+ return netdev->irq;
+
+ dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
+
+ err = bcm4908enet_dma_alloc(enet);
+ if (err)
+ return err;
+
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+ eth_hw_addr_random(netdev);
+ netdev->netdev_ops = &bcm96xx_netdev_ops;
+ netdev->min_mtu = ETH_ZLEN;
+ netdev->mtu = ENET_MTU_MAX;
+ netdev->max_mtu = ENET_MTU_MAX;
+ netif_napi_add(netdev, &enet->napi, bcm4908enet_poll, 64);
+
+ err = register_netdev(netdev);
+ if (err) {
+ bcm4908enet_dma_free(enet);
+ return err;
+ }
+
+ platform_set_drvdata(pdev, enet);
+
+ return 0;
+}
+
+static int bcm4908enet_remove(struct platform_device *pdev)
+{
+ struct bcm4908enet *enet = platform_get_drvdata(pdev);
+
+ unregister_netdev(enet->netdev);
+ netif_napi_del(&enet->napi);
+ bcm4908enet_dma_free(enet);
+
+ return 0;
+}
+
+static const struct of_device_id bcm4908enet_of_match[] = {
+ { .compatible = "brcm,bcm4908enet"},
+ {},
+};
+
+static struct platform_driver bcm4908enet_driver = {
+ .driver = {
+ .name = "bcm4908enet",
+ .of_match_table = bcm4908enet_of_match,
+ },
+ .probe = bcm4908enet_probe,
+ .remove = bcm4908enet_remove,
+};
+module_platform_driver(bcm4908enet_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(of, bcm4908enet_of_match);
diff --git a/drivers/net/ethernet/broadcom/bcm4908enet.h b/drivers/net/ethernet/broadcom/bcm4908enet.h
new file mode 100644
index 000000000000..11aadf0715d3
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bcm4908enet.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __BCM4908ENET_H
+#define __BCM4908ENET_H
+
+#define ENET_CONTROL 0x000
+#define ENET_MIB_CTRL 0x004
+#define ENET_MIB_CTRL_CLR_MIB 0x00000001
+#define ENET_RX_ERR_MASK 0x008
+#define ENET_MIB_MAX_PKT_SIZE 0x00C
+#define ENET_MIB_MAX_PKT_SIZE_VAL 0x00003fff
+#define ENET_DIAG_OUT 0x01c
+#define ENET_ENABLE_DROP_PKT 0x020
+#define ENET_IRQ_ENABLE 0x024
+#define ENET_IRQ_ENABLE_OVFL 0x00000001
+#define ENET_GMAC_STATUS 0x028
+#define ENET_GMAC_STATUS_ETH_SPEED_MASK 0x00000003
+#define ENET_GMAC_STATUS_ETH_SPEED_10 0x00000000
+#define ENET_GMAC_STATUS_ETH_SPEED_100 0x00000001
+#define ENET_GMAC_STATUS_ETH_SPEED_1000 0x00000002
+#define ENET_GMAC_STATUS_HD 0x00000004
+#define ENET_GMAC_STATUS_AUTO_CFG_EN 0x00000008
+#define ENET_GMAC_STATUS_LINK_UP 0x00000010
+#define ENET_IRQ_STATUS 0x02c
+#define ENET_IRQ_STATUS_OVFL 0x00000001
+#define ENET_OVERFLOW_COUNTER 0x030
+#define ENET_FLUSH 0x034
+#define ENET_FLUSH_RXFIFO_FLUSH 0x00000001
+#define ENET_FLUSH_TXFIFO_FLUSH 0x00000002
+#define ENET_RSV_SELECT 0x038
+#define ENET_BP_FORCE 0x03c
+#define ENET_BP_FORCE_FORCE 0x00000001
+#define ENET_DMA_RX_OK_TO_SEND_COUNT 0x040
+#define ENET_DMA_RX_OK_TO_SEND_COUNT_VAL 0x0000000f
+#define ENET_TX_CRC_CTRL 0x044
+#define ENET_MIB 0x200
+#define ENET_UNIMAC 0x400
+#define ENET_DMA 0x800
+#define ENET_DMA_CONTROLLER_CFG 0x800
+#define ENET_DMA_CTRL_CFG_MASTER_EN 0x00000001
+#define ENET_DMA_CTRL_CFG_FLOWC_CH1_EN 0x00000002
+#define ENET_DMA_CTRL_CFG_FLOWC_CH3_EN 0x00000004
+#define ENET_DMA_FLOWCTL_CH1_THRESH_LO 0x804
+#define ENET_DMA_FLOWCTL_CH1_THRESH_HI 0x808
+#define ENET_DMA_FLOWCTL_CH1_ALLOC 0x80c
+#define ENET_DMA_FLOWCTL_CH1_ALLOC_FORCE 0x80000000
+#define ENET_DMA_FLOWCTL_CH3_THRESH_LO 0x810
+#define ENET_DMA_FLOWCTL_CH3_THRESH_HI 0x814
+#define ENET_DMA_FLOWCTL_CH3_ALLOC 0x818
+#define ENET_DMA_FLOWCTL_CH5_THRESH_LO 0x81C
+#define ENET_DMA_FLOWCTL_CH5_THRESH_HI 0x820
+#define ENET_DMA_FLOWCTL_CH5_ALLOC 0x824
+#define ENET_DMA_FLOWCTL_CH7_THRESH_LO 0x828
+#define ENET_DMA_FLOWCTL_CH7_THRESH_HI 0x82C
+#define ENET_DMA_FLOWCTL_CH7_ALLOC 0x830
+#define ENET_DMA_CTRL_CHANNEL_RESET 0x834
+#define ENET_DMA_CTRL_CHANNEL_DEBUG 0x838
+#define ENET_DMA_CTRL_GLOBAL_INTERRUPT_STATUS 0x840
+#define ENET_DMA_CTRL_GLOBAL_INTERRUPT_MASK 0x844
+#define ENET_DMA_CH0_CFG 0xa00 /* RX */
+#define ENET_DMA_CH1_CFG 0xa10 /* TX */
+#define ENET_DMA_CH0_STATE_RAM 0xc00 /* RX */
+#define ENET_DMA_CH1_STATE_RAM 0xc10 /* TX */
+
+#define ENET_DMA_CH_CFG 0x00 /* assorted configuration */
+#define ENET_DMA_CH_CFG_ENABLE 0x00000001 /* set to enable channel */
+#define ENET_DMA_CH_CFG_PKT_HALT 0x00000002 /* idle after an EOP flag is detected */
+#define ENET_DMA_CH_CFG_BURST_HALT 0x00000004 /* idle after finish current memory burst */
+#define ENET_DMA_CH_CFG_INT_STAT 0x04 /* interrupts control and status */
+#define ENET_DMA_CH_CFG_INT_MASK 0x08 /* interrupts mask */
+#define ENET_DMA_CH_CFG_INT_BUFF_DONE 0x00000001 /* buffer done */
+#define ENET_DMA_CH_CFG_INT_DONE 0x00000002 /* packet xfer complete */
+#define ENET_DMA_CH_CFG_INT_NO_DESC 0x00000004 /* no valid descriptors */
+#define ENET_DMA_CH_CFG_INT_RX_ERROR 0x00000008 /* rxdma detect client protocol error */
+#define ENET_DMA_CH_CFG_MAX_BURST 0x0c /* max burst length permitted */
+#define ENET_DMA_CH_CFG_MAX_BURST_DESCSIZE_SEL 0x00040000 /* DMA Descriptor Size Selection */
+#define ENET_DMA_CH_CFG_SIZE 0x10
+
+#define ENET_DMA_CH_STATE_RAM_BASE_DESC_PTR 0x00 /* descriptor ring start address */
+#define ENET_DMA_CH_STATE_RAM_STATE_DATA 0x04 /* state/bytes done/ring offset */
+#define ENET_DMA_CH_STATE_RAM_DESC_LEN_STATUS 0x08 /* buffer descriptor status and len */
+#define ENET_DMA_CH_STATE_RAM_DESC_BASE_BUFPTR 0x0c /* buffer descrpitor current processing */
+#define ENET_DMA_CH_STATE_RAM_SIZE 0x10
+
+#define DMA_CTL_STATUS_APPEND_CRC 0x00000100
+#define DMA_CTL_STATUS_APPEND_BRCM_TAG 0x00000200
+#define DMA_CTL_STATUS_PRIO 0x00000C00 /* Prio for Tx */
+#define DMA_CTL_STATUS_WRAP 0x00001000 /* */
+#define DMA_CTL_STATUS_SOP 0x00002000 /* first buffer in packet */
+#define DMA_CTL_STATUS_EOP 0x00004000 /* last buffer in packet */
+#define DMA_CTL_STATUS_OWN 0x00008000 /* cleared by DMA, set by SW */
+#define DMA_CTL_LEN_DESC_BUFLENGTH 0x0fff0000
+#define DMA_CTL_LEN_DESC_BUFLENGTH_SHIFT 16
+#define DMA_CTL_LEN_DESC_MULTICAST 0x40000000
+#define DMA_CTL_LEN_DESC_USEFPM 0x80000000
+
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index e20a1b3267b9..e9e60a935f40 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -272,7 +272,7 @@ struct hnae3_ring_chain_node {
};
#define HNAE3_IS_TX_RING(node) \
- (((node)->flag & (1 << HNAE3_RING_TYPE_B)) == HNAE3_RING_TYPE_TX)
+ (((node)->flag & 1 << HNAE3_RING_TYPE_B) == HNAE3_RING_TYPE_TX)
/* device specification info from firmware */
struct hnae3_dev_specs {
@@ -292,7 +292,6 @@ struct hnae3_client_ops {
int (*init_instance)(struct hnae3_handle *handle);
void (*uninit_instance)(struct hnae3_handle *handle, bool reset);
void (*link_status_change)(struct hnae3_handle *handle, bool state);
- int (*setup_tc)(struct hnae3_handle *handle, u8 tc);
int (*reset_notify)(struct hnae3_handle *handle,
enum hnae3_reset_notify_type type);
void (*process_hw_error)(struct hnae3_handle *handle,
@@ -776,9 +775,9 @@ struct hnae3_handle {
#define hnae3_get_field(origin, mask, shift) (((origin) & (mask)) >> (shift))
#define hnae3_set_bit(origin, shift, val) \
- hnae3_set_field((origin), (0x1 << (shift)), (shift), (val))
+ hnae3_set_field(origin, 0x1 << (shift), shift, val)
#define hnae3_get_bit(origin, shift) \
- hnae3_get_field((origin), (0x1 << (shift)), (shift))
+ hnae3_get_field(origin, 0x1 << (shift), shift)
#define HNAE3_DBG_TM_NODES "tm_nodes"
#define HNAE3_DBG_TM_PRI "tm_priority"
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 36c7813b5996..818ac2c7c7ea 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -162,7 +162,7 @@ static int hns3_dbg_queue_map(struct hnae3_handle *h)
continue;
dev_info(&h->pdev->dev,
- " %4d %4d %4d\n",
+ " %4d %4u %4d\n",
i, global_qid, priv->ring[i].tqp_vector->vector_irq);
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index cf16d5f31f26..9565b7999426 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -32,7 +32,7 @@
#define CREATE_TRACE_POINTS
#include "hns3_trace.h"
-#define hns3_set_field(origin, shift, val) ((origin) |= ((val) << (shift)))
+#define hns3_set_field(origin, shift, val) ((origin) |= (val) << (shift))
#define hns3_tx_bd_count(S) DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE)
#define hns3_rl_err(fmt, ...) \
@@ -2329,7 +2329,7 @@ static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev,
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
pci_ers_result_t ret;
- dev_info(&pdev->dev, "PCI error detected, state(=%d)!!\n", state);
+ dev_info(&pdev->dev, "PCI error detected, state(=%u)!!\n", state);
if (state == pci_channel_io_perm_failure)
return PCI_ERS_RESULT_DISCONNECT;
@@ -4084,7 +4084,7 @@ out_when_alloc_ring_memory:
return -ENOMEM;
}
-int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
+static void hns3_uninit_all_ring(struct hns3_nic_priv *priv)
{
struct hnae3_handle *h = priv->ae_handle;
int i;
@@ -4093,7 +4093,6 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
hns3_fini_ring(&priv->ring[i]);
hns3_fini_ring(&priv->ring[i + h->kinfo.num_tqps]);
}
- return 0;
}
/* Set mac addr if it is configured. or leave it to the AE driver */
@@ -4321,7 +4320,6 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
{
struct net_device *netdev = handle->kinfo.netdev;
struct hns3_nic_priv *priv = netdev_priv(netdev);
- int ret;
if (netdev->reg_state != NETREG_UNINITIALIZED)
unregister_netdev(netdev);
@@ -4347,9 +4345,7 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
hns3_nic_dealloc_vector_data(priv);
- ret = hns3_uninit_all_ring(priv);
- if (ret)
- netdev_err(netdev, "uninit ring error\n");
+ hns3_uninit_all_ring(priv);
hns3_put_ring_config(priv);
@@ -4378,20 +4374,6 @@ static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup)
}
}
-static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
-{
- struct hnae3_knic_private_info *kinfo = &handle->kinfo;
- struct net_device *ndev = kinfo->netdev;
-
- if (tc > HNAE3_MAX_TC)
- return -EINVAL;
-
- if (!ndev)
- return -ENODEV;
-
- return hns3_nic_set_real_num_queue(ndev);
-}
-
static void hns3_clear_tx_ring(struct hns3_enet_ring *ring)
{
while (ring->next_to_clean != ring->next_to_use) {
@@ -4676,9 +4658,7 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
hns3_nic_dealloc_vector_data(priv);
- ret = hns3_uninit_all_ring(priv);
- if (ret)
- netdev_err(netdev, "uninit ring error\n");
+ hns3_uninit_all_ring(priv);
hns3_put_ring_config(priv);
@@ -4828,7 +4808,6 @@ static const struct hnae3_client_ops client_ops = {
.init_instance = hns3_client_init,
.uninit_instance = hns3_client_uninit,
.link_status_change = hns3_link_status_change,
- .setup_tc = hns3_client_setup_tc,
.reset_notify = hns3_reset_notify,
.process_hw_error = hns3_process_hw_error,
};
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index d70af1d0d554..d069b04ee587 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -554,7 +554,7 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
}
#define hns3_read_dev(a, reg) \
- hns3_read_reg((a)->io_base, (reg))
+ hns3_read_reg((a)->io_base, reg)
static inline bool hns3_nic_resetting(struct net_device *netdev)
{
@@ -564,7 +564,7 @@ static inline bool hns3_nic_resetting(struct net_device *netdev)
}
#define hns3_write_dev(a, reg, value) \
- hns3_write_reg((a)->io_base, (reg), (value))
+ hns3_write_reg((a)->io_base, reg, value)
#define ring_to_dev(ring) ((ring)->dev)
@@ -588,15 +588,15 @@ static inline unsigned int hns3_page_order(struct hns3_enet_ring *ring)
/* iterator for handling rings in ring group */
#define hns3_for_each_ring(pos, head) \
- for (pos = (head).ring; pos; pos = pos->next)
+ for (pos = (head).ring; (pos); pos = (pos)->next)
#define hns3_get_handle(ndev) \
(((struct hns3_nic_priv *)netdev_priv(ndev))->ae_handle)
-#define hns3_gl_usec_to_reg(int_gl) (int_gl >> 1)
+#define hns3_gl_usec_to_reg(int_gl) ((int_gl) >> 1)
#define hns3_gl_round_down(int_gl) round_down(int_gl, 2)
-#define hns3_rl_usec_to_reg(int_rl) (int_rl >> 2)
+#define hns3_rl_usec_to_reg(int_rl) ((int_rl) >> 2)
#define hns3_rl_round_down(int_rl) round_down(int_rl, 4)
void hns3_ethtool_set_ops(struct net_device *netdev);
@@ -605,7 +605,6 @@ int hns3_set_channels(struct net_device *netdev,
void hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
int hns3_init_all_ring(struct hns3_nic_priv *priv);
-int hns3_uninit_all_ring(struct hns3_nic_priv *priv);
int hns3_nic_reset_all_ring(struct hnae3_handle *h);
void hns3_fini_ring(struct hns3_enet_ring *ring);
netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 79e0a9b14b68..adcec4ea7cb9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -456,7 +456,7 @@ static void *hns3_update_strings(u8 *data, const struct hns3_stats *stats,
data[ETH_GSTRING_LEN - 1] = '\0';
/* first, prepend the prefix string */
- n1 = scnprintf(data, MAX_PREFIX_SIZE, "%s%d_",
+ n1 = scnprintf(data, MAX_PREFIX_SIZE, "%s%u_",
prefix, i);
size_left = (ETH_GSTRING_LEN - 1) - n1;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index e7c915eabc8a..ff52a65b4cff 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -1144,9 +1144,9 @@ static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
}
#define hclge_write_dev(a, reg, value) \
- hclge_write_reg((a)->io_base, (reg), (value))
+ hclge_write_reg((a)->io_base, reg, value)
#define hclge_read_dev(a, reg) \
- hclge_read_reg((a)->io_base, (reg))
+ hclge_read_reg((a)->io_base, reg)
static inline u32 hclge_read_reg(u8 __iomem *base, u32 reg)
{
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index e08d11b8ecf1..5bf5db91d16c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -176,29 +176,6 @@ static int hclge_map_update(struct hclge_dev *hdev)
return hclge_rss_init_hw(hdev);
}
-static int hclge_client_setup_tc(struct hclge_dev *hdev)
-{
- struct hclge_vport *vport = hdev->vport;
- struct hnae3_client *client;
- struct hnae3_handle *handle;
- int ret;
- u32 i;
-
- for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
- handle = &vport[i].nic;
- client = handle->client;
-
- if (!client || !client->ops || !client->ops->setup_tc)
- continue;
-
- ret = client->ops->setup_tc(handle, hdev->tm_info.num_tc);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
static int hclge_notify_down_uinit(struct hclge_dev *hdev)
{
int ret;
@@ -257,10 +234,6 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
if (ret)
goto err_out;
- ret = hclge_client_setup_tc(hdev);
- if (ret)
- goto err_out;
-
ret = hclge_notify_init_up(hdev);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 113efd4ae157..a0a33c02ce25 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -696,17 +696,16 @@ static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev,
u32 qset_mapping[HCLGE_BP_EXT_GRP_NUM];
struct hclge_qs_to_pri_link_cmd *map;
struct hclge_tqp_tx_queue_tc_cmd *tc;
+ u16 group_id, queue_id, qset_id;
enum hclge_opcode_type cmd;
+ u8 grp_num, pri_id, tc_id;
struct hclge_desc desc;
- int queue_id, group_id;
- int tc_id, qset_id;
- int pri_id, ret;
u16 qs_id_l;
u16 qs_id_h;
- u8 grp_num;
+ int ret;
u32 i;
- ret = kstrtouint(cmd_buf, 0, &queue_id);
+ ret = kstrtou16(cmd_buf, 0, &queue_id);
queue_id = (ret != 0) ? 0 : queue_id;
cmd = HCLGE_OPC_TM_NQ_TO_QS_LINK;
@@ -754,7 +753,7 @@ static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev,
tc_id = tc->tc_id & 0x7;
dev_info(&hdev->pdev->dev, "queue_id | qset_id | pri_id | tc_id\n");
- dev_info(&hdev->pdev->dev, "%04d | %04d | %02d | %02d\n",
+ dev_info(&hdev->pdev->dev, "%04u | %04u | %02u | %02u\n",
queue_id, qset_id, pri_id, tc_id);
if (!hnae3_dev_dcb_supported(hdev)) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 9ee55ee0487d..0ca7f1b984bf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -1073,7 +1073,7 @@ static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en)
* This function querys number of mpf and pf buffer descriptors.
*/
static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras,
- int *mpf_bd_num, int *pf_bd_num)
+ u32 *mpf_bd_num, u32 *pf_bd_num)
{
struct device *dev = &hdev->pdev->dev;
u32 mpf_min_bd_num, pf_min_bd_num;
@@ -1102,7 +1102,7 @@ static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras,
*mpf_bd_num = le32_to_cpu(desc_bd.data[0]);
*pf_bd_num = le32_to_cpu(desc_bd.data[1]);
if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) {
- dev_err(dev, "Invalid bd num: mpf(%d), pf(%d)\n",
+ dev_err(dev, "Invalid bd num: mpf(%u), pf(%u)\n",
*mpf_bd_num, *pf_bd_num);
return -EINVAL;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index f5a988498cc3..64a80a5933cb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -24,7 +24,7 @@
#include "hnae3.h"
#define HCLGE_NAME "hclge"
-#define HCLGE_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
+#define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset)))
#define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f))
#define HCLGE_BUF_SIZE_UNIT 256U
@@ -626,7 +626,7 @@ static u8 *hclge_tqps_get_strings(struct hnae3_handle *handle, u8 *data)
for (i = 0; i < kinfo->num_tqps; i++) {
struct hclge_tqp *tqp = container_of(handle->kinfo.tqp[i],
struct hclge_tqp, q);
- snprintf(buff, ETH_GSTRING_LEN, "txq%d_pktnum_rcd",
+ snprintf(buff, ETH_GSTRING_LEN, "txq%u_pktnum_rcd",
tqp->index);
buff = buff + ETH_GSTRING_LEN;
}
@@ -634,7 +634,7 @@ static u8 *hclge_tqps_get_strings(struct hnae3_handle *handle, u8 *data)
for (i = 0; i < kinfo->num_tqps; i++) {
struct hclge_tqp *tqp = container_of(kinfo->tqp[i],
struct hclge_tqp, q);
- snprintf(buff, ETH_GSTRING_LEN, "rxq%d_pktnum_rcd",
+ snprintf(buff, ETH_GSTRING_LEN, "rxq%u_pktnum_rcd",
tqp->index);
buff = buff + ETH_GSTRING_LEN;
}
@@ -928,7 +928,7 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
return 0;
}
-static int hclge_parse_speed(int speed_cmd, int *speed)
+static int hclge_parse_speed(u8 speed_cmd, u32 *speed)
{
switch (speed_cmd) {
case 6:
@@ -5595,7 +5595,7 @@ static int hclge_fd_check_ext_tuple(struct hclge_dev *hdev,
if (fs->m_ext.vlan_tci &&
be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID) {
dev_err(&hdev->pdev->dev,
- "failed to config vlan_tci, invalid vlan_tci: %u, max is %u.\n",
+ "failed to config vlan_tci, invalid vlan_tci: %u, max is %d.\n",
ntohs(fs->h_ext.vlan_tci), VLAN_N_VID - 1);
return -EINVAL;
}
@@ -10831,7 +10831,7 @@ static void hclge_reset_vf_rate(struct hclge_dev *hdev)
}
}
-static int hclge_vf_rate_param_check(struct hclge_dev *hdev, int vf,
+static int hclge_vf_rate_param_check(struct hclge_dev *hdev,
int min_tx_rate, int max_tx_rate)
{
if (min_tx_rate != 0 ||
@@ -10852,7 +10852,7 @@ static int hclge_set_vf_rate(struct hnae3_handle *handle, int vf,
struct hclge_dev *hdev = vport->back;
int ret;
- ret = hclge_vf_rate_param_check(hdev, vf, min_tx_rate, max_tx_rate);
+ ret = hclge_vf_rate_param_check(hdev, min_tx_rate, max_tx_rate);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 33b17a199e18..19d7f28773f3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -46,15 +46,12 @@
#define HCLGE_CMDQ_RX_DEPTH_REG 0x27020
#define HCLGE_CMDQ_RX_TAIL_REG 0x27024
#define HCLGE_CMDQ_RX_HEAD_REG 0x27028
-#define HCLGE_CMDQ_INTR_SRC_REG 0x27100
#define HCLGE_CMDQ_INTR_STS_REG 0x27104
#define HCLGE_CMDQ_INTR_EN_REG 0x27108
#define HCLGE_CMDQ_INTR_GEN_REG 0x2710C
/* bar registers for common func */
#define HCLGE_VECTOR0_OTER_EN_REG 0x20600
-#define HCLGE_RAS_OTHER_STS_REG 0x20B00
-#define HCLGE_FUNC_RESET_STS_REG 0x20C00
#define HCLGE_GRO_EN_REG 0x28000
/* bar registers for rcb */
@@ -728,7 +725,7 @@ struct hclge_vf_vlan_cfg {
* x = (~k) & v
* y = (k ^ ~v) & k
*/
-#define calc_x(x, k, v) ((x) = (~(k) & (v)))
+#define calc_x(x, k, v) (x = ~(k) & (v))
#define calc_y(y, k, v) \
do { \
const typeof(k) _k_ = (k); \
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 754c09ada901..52a373722589 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -56,7 +56,7 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport,
resp_pf_to_vf->msg.resp_status = resp;
} else {
dev_warn(&hdev->pdev->dev,
- "failed to send response to VF, response status %d is out-of-bound\n",
+ "failed to send response to VF, response status %u is out-of-bound\n",
resp);
resp_pf_to_vf->msg.resp_status = EIO;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 906d98e515aa..151afd1f0688 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -41,8 +41,9 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
struct hclge_shaper_ir_para *ir_para,
u32 max_tm_rate)
{
+#define DEFAULT_SHAPER_IR_B 126
#define DIVISOR_CLK (1000 * 8)
-#define DIVISOR_IR_B_126 (126 * DIVISOR_CLK)
+#define DEFAULT_DIVISOR_IR_B (DEFAULT_SHAPER_IR_B * DIVISOR_CLK)
static const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = {
6 * 256, /* Prioriy level */
@@ -69,10 +70,10 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
* ir_calc = ---------------- * 1000
* tick * 1
*/
- ir_calc = (DIVISOR_IR_B_126 + (tick >> 1) - 1) / tick;
+ ir_calc = (DEFAULT_DIVISOR_IR_B + (tick >> 1) - 1) / tick;
if (ir_calc == ir) {
- ir_para->ir_b = 126;
+ ir_para->ir_b = DEFAULT_SHAPER_IR_B;
ir_para->ir_u = 0;
ir_para->ir_s = 0;
@@ -81,7 +82,8 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
/* Increasing the denominator to select ir_s value */
while (ir_calc >= ir && ir) {
ir_s_calc++;
- ir_calc = DIVISOR_IR_B_126 / (tick * (1 << ir_s_calc));
+ ir_calc = DEFAULT_DIVISOR_IR_B /
+ (tick * (1 << ir_s_calc));
}
ir_para->ir_b = (ir * tick * (1 << ir_s_calc) +
@@ -92,12 +94,12 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
while (ir_calc < ir) {
ir_u_calc++;
- numerator = DIVISOR_IR_B_126 * (1 << ir_u_calc);
+ numerator = DEFAULT_DIVISOR_IR_B * (1 << ir_u_calc);
ir_calc = (numerator + (tick >> 1)) / tick;
}
if (ir_calc == ir) {
- ir_para->ir_b = 126;
+ ir_para->ir_b = DEFAULT_SHAPER_IR_B;
} else {
u32 denominator = DIVISOR_CLK * (1 << --ir_u_calc);
ir_para->ir_b = (ir * tick + (denominator >> 1)) /
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index d33cb04acbef..b25d76023af0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -17,7 +17,7 @@
/* SP or DWRR */
#define HCLGE_TM_TX_SCHD_DWRR_MSK BIT(0)
-#define HCLGE_TM_TX_SCHD_SP_MSK (0xFE)
+#define HCLGE_TM_TX_SCHD_SP_MSK 0xFE
#define HCLGE_ETHER_MAX_RATE 100000
@@ -214,8 +214,8 @@ struct hclge_pri_shaper_para {
(HCLGE_TM_SHAP_##string##_MSK), \
(HCLGE_TM_SHAP_##string##_LSH), val)
#define hclge_tm_get_field(src, string) \
- hnae3_get_field((src), (HCLGE_TM_SHAP_##string##_MSK), \
- (HCLGE_TM_SHAP_##string##_LSH))
+ hnae3_get_field((src), HCLGE_TM_SHAP_##string##_MSK, \
+ HCLGE_TM_SHAP_##string##_LSH)
int hclge_tm_schd_init(struct hclge_dev *hdev);
int hclge_tm_vport_map_update(struct hclge_dev *hdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
index ac2864a7ce8d..8a37a22a176b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
@@ -216,8 +216,8 @@ struct hclgevf_rss_input_tuple_cmd {
#define HCLGEVF_RSS_CFG_TBL_SIZE 16
struct hclgevf_rss_indirection_table_cmd {
- u16 start_table_index;
- u16 rss_set_bitmap;
+ __le16 start_table_index;
+ __le16 rss_set_bitmap;
u8 rsv[4];
u8 rss_result[HCLGEVF_RSS_CFG_TBL_SIZE];
};
@@ -229,7 +229,7 @@ struct hclgevf_rss_indirection_table_cmd {
#define HCLGEVF_RSS_TC_VALID_B 15
#define HCLGEVF_MAX_TC_NUM 8
struct hclgevf_rss_tc_mode_cmd {
- u16 rss_tc_mode[HCLGEVF_MAX_TC_NUM];
+ __le16 rss_tc_mode[HCLGEVF_MAX_TC_NUM];
u8 rsv[8];
};
@@ -278,7 +278,6 @@ struct hclgevf_cfg_tx_queue_pointer_cmd {
#define HCLGEVF_NIC_CMQ_DESC_NUM 1024
#define HCLGEVF_NIC_CMQ_DESC_NUM_S 3
-#define HCLGEVF_NIC_CMDQ_INT_SRC_REG 0x27100
#define HCLGEVF_QUERY_DEV_SPECS_BD_NUM 4
@@ -315,9 +314,9 @@ static inline u32 hclgevf_read_reg(u8 __iomem *base, u32 reg)
}
#define hclgevf_write_dev(a, reg, value) \
- hclgevf_write_reg((a)->io_base, (reg), (value))
+ hclgevf_write_reg((a)->io_base, reg, value)
#define hclgevf_read_dev(a, reg) \
- hclgevf_read_reg((a)->io_base, (reg))
+ hclgevf_read_reg((a)->io_base, reg)
#define HCLGEVF_SEND_SYNC(flag) \
((flag) & HCLGEVF_CMD_FLAG_NO_INTR)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index cdb1131ba239..ece31693e624 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -180,7 +180,7 @@ static u8 *hclgevf_tqps_get_strings(struct hnae3_handle *handle, u8 *data)
for (i = 0; i < kinfo->num_tqps; i++) {
struct hclgevf_tqp *tqp = container_of(kinfo->tqp[i],
struct hclgevf_tqp, q);
- snprintf(buff, ETH_GSTRING_LEN, "txq%d_pktnum_rcd",
+ snprintf(buff, ETH_GSTRING_LEN, "txq%u_pktnum_rcd",
tqp->index);
buff += ETH_GSTRING_LEN;
}
@@ -188,7 +188,7 @@ static u8 *hclgevf_tqps_get_strings(struct hnae3_handle *handle, u8 *data)
for (i = 0; i < kinfo->num_tqps; i++) {
struct hclgevf_tqp *tqp = container_of(kinfo->tqp[i],
struct hclgevf_tqp, q);
- snprintf(buff, ETH_GSTRING_LEN, "rxq%d_pktnum_rcd",
+ snprintf(buff, ETH_GSTRING_LEN, "rxq%u_pktnum_rcd",
tqp->index);
buff += ETH_GSTRING_LEN;
}
@@ -658,8 +658,9 @@ static int hclgevf_set_rss_indir_table(struct hclgevf_dev *hdev)
for (i = 0; i < rss_cfg_tbl_num; i++) {
hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_INDIR_TABLE,
false);
- req->start_table_index = i * HCLGEVF_RSS_CFG_TBL_SIZE;
- req->rss_set_bitmap = HCLGEVF_RSS_SET_BITMAP_MSK;
+ req->start_table_index =
+ cpu_to_le16(i * HCLGEVF_RSS_CFG_TBL_SIZE);
+ req->rss_set_bitmap = cpu_to_le16(HCLGEVF_RSS_SET_BITMAP_MSK);
for (j = 0; j < HCLGEVF_RSS_CFG_TBL_SIZE; j++)
req->rss_result[j] =
indir[i * HCLGEVF_RSS_CFG_TBL_SIZE + j];
@@ -700,12 +701,16 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size)
hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_TC_MODE, false);
for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++) {
- hnae3_set_bit(req->rss_tc_mode[i], HCLGEVF_RSS_TC_VALID_B,
+ u16 mode = 0;
+
+ hnae3_set_bit(mode, HCLGEVF_RSS_TC_VALID_B,
(tc_valid[i] & 0x1));
- hnae3_set_field(req->rss_tc_mode[i], HCLGEVF_RSS_TC_SIZE_M,
+ hnae3_set_field(mode, HCLGEVF_RSS_TC_SIZE_M,
HCLGEVF_RSS_TC_SIZE_S, tc_size[i]);
- hnae3_set_field(req->rss_tc_mode[i], HCLGEVF_RSS_TC_OFFSET_M,
+ hnae3_set_field(mode, HCLGEVF_RSS_TC_OFFSET_M,
HCLGEVF_RSS_TC_OFFSET_S, tc_offset[i]);
+
+ req->rss_tc_mode[i] = cpu_to_le16(mode);
}
status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
if (status)
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 6da4f43f2348..73da4f71f530 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -24,6 +24,7 @@ ice-y := ice_main.o \
ice_flow.o \
ice_devlink.o \
ice_fw_update.o \
+ ice_lag.o \
ice_ethtool.o
ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index fca428c879ec..dae8280ce17c 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -39,6 +39,7 @@
#include <net/devlink.h>
#include <net/ipv6.h>
#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include <net/geneve.h>
#include <net/gre.h>
#include <net/udp_tunnel.h>
@@ -55,6 +56,7 @@
#include "ice_fdir.h"
#include "ice_xsk.h"
#include "ice_arfs.h"
+#include "ice_lag.h"
#define ICE_BAR0 0
#define ICE_REQ_DESC_MULTIPLE 32
@@ -326,9 +328,11 @@ struct ice_vsi {
struct ice_ring **xdp_rings; /* XDP ring array */
u16 num_xdp_txq; /* Used XDP queues */
u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
- struct xsk_buff_pool **xsk_pools;
- u16 num_xsk_pools_used;
- u16 num_xsk_pools;
+
+ /* setup back reference, to which aggregator node this VSI
+ * corresponds to
+ */
+ struct ice_agg_node *agg_node;
} ____cacheline_internodealigned_in_smp;
/* struct that defines an interrupt vector */
@@ -377,6 +381,13 @@ enum ice_pf_flags {
ICE_PF_FLAGS_NBITS /* must be last */
};
+struct ice_agg_node {
+ u32 agg_id;
+#define ICE_MAX_VSIS_IN_AGG_NODE 64
+ u32 num_vsis;
+ u8 valid;
+};
+
struct ice_pf {
struct pci_dev *pdev;
@@ -455,6 +466,15 @@ struct ice_pf {
__le64 nvm_phy_type_lo; /* NVM PHY type low */
__le64 nvm_phy_type_hi; /* NVM PHY type high */
struct ice_link_default_override_tlv link_dflt_override;
+ struct ice_lag *lag; /* Link Aggregation information */
+
+#define ICE_INVALID_AGG_NODE_ID 0
+#define ICE_PF_AGG_NODE_ID_START 1
+#define ICE_MAX_PF_AGG_NODES 32
+ struct ice_agg_node pf_agg_node[ICE_MAX_PF_AGG_NODES];
+#define ICE_VF_AGG_NODE_ID_START 65
+#define ICE_MAX_VF_AGG_NODES 32
+ struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES];
};
struct ice_netdev_priv {
@@ -517,17 +537,15 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring)
*/
static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_ring *ring)
{
- struct xsk_buff_pool **pools = ring->vsi->xsk_pools;
u16 qid = ring->q_index;
if (ice_ring_is_xdp(ring))
qid -= ring->vsi->num_xdp_txq;
- if (qid >= ring->vsi->num_xsk_pools || !pools || !pools[qid] ||
- !ice_is_xdp_ena_vsi(ring->vsi))
+ if (!ice_is_xdp_ena_vsi(ring->vsi))
return NULL;
- return pools[qid];
+ return xsk_get_pool_from_qid(ring->vsi->netdev, qid);
}
/**
@@ -557,11 +575,31 @@ static inline struct ice_vsi *ice_get_ctrl_vsi(struct ice_pf *pf)
return pf->vsi[pf->ctrl_vsi_idx];
}
+/**
+ * ice_set_sriov_cap - enable SRIOV in PF flags
+ * @pf: PF struct
+ */
+static inline void ice_set_sriov_cap(struct ice_pf *pf)
+{
+ if (pf->hw.func_caps.common_cap.sr_iov_1_1)
+ set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+}
+
+/**
+ * ice_clear_sriov_cap - disable SRIOV in PF flags
+ * @pf: PF struct
+ */
+static inline void ice_clear_sriov_cap(struct ice_pf *pf)
+{
+ clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+}
+
#define ICE_FD_STAT_CTR_BLOCK_COUNT 256
#define ICE_FD_STAT_PF_IDX(base_idx) \
((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT)
#define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx)
+bool netif_is_ice(struct net_device *dev);
int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
int ice_vsi_open_ctrl(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index a51470b68d54..80186589153b 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -695,6 +695,18 @@ struct ice_aqc_sched_elem_cmd {
__le32 addr_low;
};
+struct ice_aqc_txsched_move_grp_info_hdr {
+ __le32 src_parent_teid;
+ __le32 dest_parent_teid;
+ __le16 num_elems;
+ __le16 reserved;
+};
+
+struct ice_aqc_move_elem {
+ struct ice_aqc_txsched_move_grp_info_hdr hdr;
+ __le32 teid[];
+};
+
struct ice_aqc_elem_info_bw {
__le16 bw_profile_idx;
__le16 bw_alloc;
@@ -1528,6 +1540,16 @@ struct ice_aqc_lldp_stop_start_specific_agent {
u8 reserved[15];
};
+/* LLDP Filter Control (direct 0x0A0A) */
+struct ice_aqc_lldp_filter_ctrl {
+ u8 cmd_flags;
+#define ICE_AQC_LLDP_FILTER_ACTION_ADD 0x0
+#define ICE_AQC_LLDP_FILTER_ACTION_DELETE 0x1
+ u8 reserved1;
+ __le16 vsi_num;
+ u8 reserved2[12];
+};
+
/* Get/Set RSS key (indirect 0x0B04/0x0B02) */
struct ice_aqc_get_set_rss_key {
#define ICE_AQC_GSET_RSS_KEY_VSI_VALID BIT(15)
@@ -1851,6 +1873,7 @@ struct ice_aq_desc {
struct ice_aqc_lldp_start lldp_start;
struct ice_aqc_lldp_set_local_mib lldp_set_mib;
struct ice_aqc_lldp_stop_start_specific_agent lldp_agent_ctrl;
+ struct ice_aqc_lldp_filter_ctrl lldp_filter_ctrl;
struct ice_aqc_get_set_rss_lut get_set_rss_lut;
struct ice_aqc_get_set_rss_key get_set_rss_key;
struct ice_aqc_add_txqs add_txqs;
@@ -1950,6 +1973,7 @@ enum ice_adminq_opc {
ice_aqc_opc_add_sched_elems = 0x0401,
ice_aqc_opc_cfg_sched_elems = 0x0403,
ice_aqc_opc_get_sched_elems = 0x0404,
+ ice_aqc_opc_move_sched_elems = 0x0408,
ice_aqc_opc_suspend_sched_elems = 0x0409,
ice_aqc_opc_resume_sched_elems = 0x040A,
ice_aqc_opc_query_port_ets = 0x040E,
@@ -1991,6 +2015,7 @@ enum ice_adminq_opc {
ice_aqc_opc_get_cee_dcb_cfg = 0x0A07,
ice_aqc_opc_lldp_set_local_mib = 0x0A08,
ice_aqc_opc_lldp_stop_start_specific_agent = 0x0A09,
+ ice_aqc_opc_lldp_filter_ctrl = 0x0A0A,
/* RSS commands */
ice_aqc_opc_set_rss_key = 0x0B02,
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 607d33d05a0c..3d9475e222cd 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -110,7 +110,7 @@ ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size,
if (status)
return status;
- resp = (struct ice_aqc_manage_mac_read_resp *)buf;
+ resp = buf;
flags = le16_to_cpu(cmd->flags) & ICE_AQC_MAN_MAC_READ_M;
if (!(flags & ICE_AQC_MAN_MAC_LAN_ADDR_VALID)) {
@@ -907,6 +907,7 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
ice_debug(hw, ICE_DBG_SCHED, "Failed to get scheduler allocated resources\n");
goto err_unroll_alloc;
}
+ ice_sched_get_psm_clk_freq(hw);
/* Initialize port_info struct with scheduler data */
status = ice_sched_init_port(hw->port_info);
@@ -1979,7 +1980,7 @@ ice_parse_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
struct ice_aqc_list_caps_elem *cap_resp;
u32 i;
- cap_resp = (struct ice_aqc_list_caps_elem *)buf;
+ cap_resp = buf;
memset(func_p, 0, sizeof(*func_p));
@@ -2109,7 +2110,7 @@ ice_parse_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
struct ice_aqc_list_caps_elem *cap_resp;
u32 i;
- cap_resp = (struct ice_aqc_list_caps_elem *)buf;
+ cap_resp = buf;
memset(dev_p, 0, sizeof(*dev_p));
@@ -4078,6 +4079,7 @@ static enum ice_status ice_replay_pre_init(struct ice_hw *hw)
for (i = 0; i < ICE_SW_LKUP_LAST; i++)
list_replace_init(&sw->recp_list[i].filt_rules,
&sw->recp_list[i].filt_replay_rules);
+ ice_sched_replay_agg_vsi_preinit(hw);
return 0;
}
@@ -4109,6 +4111,8 @@ enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle)
return status;
/* Replay per VSI all filters */
status = ice_replay_vsi_all_fltr(hw, vsi_handle);
+ if (!status)
+ status = ice_replay_vsi_agg(hw, vsi_handle);
return status;
}
@@ -4122,6 +4126,7 @@ void ice_replay_post(struct ice_hw *hw)
{
/* Delete old entries from replay filter list head */
ice_rm_all_sw_replay_rule_info(hw);
+ ice_sched_replay_agg(hw);
}
/**
@@ -4366,3 +4371,50 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
}
+
+/**
+ * ice_fw_supports_lldp_fltr - check NVM version supports lldp_fltr_ctrl
+ * @hw: pointer to HW struct
+ */
+bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw)
+{
+ if (hw->mac_type != ICE_MAC_E810)
+ return false;
+
+ if (hw->api_maj_ver == ICE_FW_API_LLDP_FLTR_MAJ) {
+ if (hw->api_min_ver > ICE_FW_API_LLDP_FLTR_MIN)
+ return true;
+ if (hw->api_min_ver == ICE_FW_API_LLDP_FLTR_MIN &&
+ hw->api_patch >= ICE_FW_API_LLDP_FLTR_PATCH)
+ return true;
+ } else if (hw->api_maj_ver > ICE_FW_API_LLDP_FLTR_MAJ) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * ice_lldp_fltr_add_remove - add or remove a LLDP Rx switch filter
+ * @hw: pointer to HW struct
+ * @vsi_num: absolute HW index for VSI
+ * @add: boolean for if adding or removing a filter
+ */
+enum ice_status
+ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add)
+{
+ struct ice_aqc_lldp_filter_ctrl *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.lldp_filter_ctrl;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_filter_ctrl);
+
+ if (add)
+ cmd->cmd_flags = ICE_AQC_LLDP_FILTER_ACTION_ADD;
+ else
+ cmd->cmd_flags = ICE_AQC_LLDP_FILTER_ACTION_DELETE;
+
+ cmd->vsi_num = cpu_to_le16(vsi_num);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 3ebb973878c7..baf4064fcbfe 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -175,4 +175,7 @@ ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
enum ice_status
ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
struct ice_sq_cd *cd);
+bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw);
+enum ice_status
+ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add);
#endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index 4db12d1f5808..b2d8a5932b1d 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -838,7 +838,7 @@ static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
*/
static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len)
{
- struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
+ struct ice_aq_desc *cq_desc = desc;
u16 len;
if (!IS_ENABLED(CONFIG_DYNAMIC_DEBUG) &&
@@ -868,7 +868,7 @@ static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len)
if (buf_len < len)
len = buf_len;
- ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, (u8 *)buf, len);
+ ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, buf, len);
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index 2a3147ee0bbb..e42727941ef5 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -850,9 +850,9 @@ ice_get_ieee_or_cee_dcb_cfg(struct ice_port_info *pi, u8 dcbx_mode)
return ICE_ERR_PARAM;
if (dcbx_mode == ICE_DCBX_MODE_IEEE)
- dcbx_cfg = &pi->local_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
else if (dcbx_mode == ICE_DCBX_MODE_CEE)
- dcbx_cfg = &pi->desired_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.desired_dcbx_cfg;
/* Get Local DCB Config in case of ICE_DCBX_MODE_IEEE
* or get CEE DCB Desired Config in case of ICE_DCBX_MODE_CEE
@@ -863,7 +863,7 @@ ice_get_ieee_or_cee_dcb_cfg(struct ice_port_info *pi, u8 dcbx_mode)
goto out;
/* Get Remote DCB Config */
- dcbx_cfg = &pi->remote_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.remote_dcbx_cfg;
ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg);
/* Don't treat ENOENT as an error for Remote MIBs */
@@ -892,14 +892,14 @@ enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi)
ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL);
if (!ret) {
/* CEE mode */
- dcbx_cfg = &pi->local_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_CEE;
dcbx_cfg->tlv_status = le32_to_cpu(cee_cfg.tlv_status);
ice_cee_to_dcb_cfg(&cee_cfg, dcbx_cfg);
ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE);
} else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
/* CEE mode not enabled try querying IEEE data */
- dcbx_cfg = &pi->local_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_IEEE);
}
@@ -916,26 +916,26 @@ enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi)
*/
enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
{
- struct ice_port_info *pi = hw->port_info;
+ struct ice_qos_cfg *qos_cfg = &hw->port_info->qos_cfg;
enum ice_status ret = 0;
if (!hw->func_caps.common_cap.dcb)
return ICE_ERR_NOT_SUPPORTED;
- pi->is_sw_lldp = true;
+ qos_cfg->is_sw_lldp = true;
/* Get DCBX status */
- pi->dcbx_status = ice_get_dcbx_status(hw);
+ qos_cfg->dcbx_status = ice_get_dcbx_status(hw);
- if (pi->dcbx_status == ICE_DCBX_STATUS_DONE ||
- pi->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS ||
- pi->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
+ if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DONE ||
+ qos_cfg->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS ||
+ qos_cfg->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
/* Get current DCBX configuration */
- ret = ice_get_dcb_cfg(pi);
+ ret = ice_get_dcb_cfg(hw->port_info);
if (ret)
return ret;
- pi->is_sw_lldp = false;
- } else if (pi->dcbx_status == ICE_DCBX_STATUS_DIS) {
+ qos_cfg->is_sw_lldp = false;
+ } else if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DIS) {
return ICE_ERR_NOT_READY;
}
@@ -943,7 +943,7 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
if (enable_mib_change) {
ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL);
if (ret)
- pi->is_sw_lldp = true;
+ qos_cfg->is_sw_lldp = true;
}
return ret;
@@ -958,21 +958,21 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
*/
enum ice_status ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib)
{
- struct ice_port_info *pi = hw->port_info;
+ struct ice_qos_cfg *qos_cfg = &hw->port_info->qos_cfg;
enum ice_status ret;
if (!hw->func_caps.common_cap.dcb)
return ICE_ERR_NOT_SUPPORTED;
/* Get DCBX status */
- pi->dcbx_status = ice_get_dcbx_status(hw);
+ qos_cfg->dcbx_status = ice_get_dcbx_status(hw);
- if (pi->dcbx_status == ICE_DCBX_STATUS_DIS)
+ if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DIS)
return ICE_ERR_NOT_READY;
ret = ice_aq_cfg_lldp_mib_change(hw, ena_mib, NULL);
if (!ret)
- pi->is_sw_lldp = !ena_mib;
+ qos_cfg->is_sw_lldp = !ena_mib;
return ret;
}
@@ -1270,7 +1270,7 @@ enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi)
hw = pi->hw;
/* update the HW local config */
- dcbcfg = &pi->local_dcbx_cfg;
+ dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
/* Allocate the LLDPDU */
lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL);
if (!lldpmib)
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 36abd6b7280c..1e8f71ffc8ce 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -28,7 +28,7 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc)
if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc))
return;
- dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+ dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
ice_for_each_traffic_class(i)
if (vsi->tc_cfg.ena_tc & BIT(i))
@@ -134,7 +134,7 @@ static u8 ice_dcb_get_mode(struct ice_port_info *port_info, bool host)
else
mode = DCB_CAP_DCBX_LLD_MANAGED;
- if (port_info->local_dcbx_cfg.dcbx_mode & ICE_DCBX_MODE_CEE)
+ if (port_info->qos_cfg.local_dcbx_cfg.dcbx_mode & ICE_DCBX_MODE_CEE)
return mode | DCB_CAP_DCBX_VER_CEE;
else
return mode | DCB_CAP_DCBX_VER_IEEE;
@@ -277,10 +277,10 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
int ret = ICE_DCB_NO_HW_CHG;
struct ice_vsi *pf_vsi;
- curr_cfg = &pf->hw.port_info->local_dcbx_cfg;
+ curr_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
/* FW does not care if change happened */
- if (!pf->hw.port_info->is_sw_lldp)
+ if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
ret = ICE_DCB_HW_CHG_RST;
/* Enable DCB tagging only when more than one TC */
@@ -327,7 +327,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
/* Only send new config to HW if we are in SW LLDP mode. Otherwise,
* the new config came from the HW in the first place.
*/
- if (pf->hw.port_info->is_sw_lldp) {
+ if (pf->hw.port_info->qos_cfg.is_sw_lldp) {
ret = ice_set_dcb_cfg(pf->hw.port_info);
if (ret) {
dev_err(dev, "Set DCB Config failed\n");
@@ -360,7 +360,7 @@ free_cfg:
*/
static void ice_cfg_etsrec_defaults(struct ice_port_info *pi)
{
- struct ice_dcbx_cfg *dcbcfg = &pi->local_dcbx_cfg;
+ struct ice_dcbx_cfg *dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
u8 i;
/* Ensure ETS recommended DCB configuration is not already set */
@@ -446,7 +446,7 @@ void ice_dcb_rebuild(struct ice_pf *pf)
mutex_lock(&pf->tc_mutex);
- if (!pf->hw.port_info->is_sw_lldp)
+ if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
ice_cfg_etsrec_defaults(pf->hw.port_info);
ret = ice_set_dcb_cfg(pf->hw.port_info);
@@ -455,9 +455,9 @@ void ice_dcb_rebuild(struct ice_pf *pf)
goto dcb_error;
}
- if (!pf->hw.port_info->is_sw_lldp) {
+ if (!pf->hw.port_info->qos_cfg.is_sw_lldp) {
ret = ice_cfg_lldp_mib_change(&pf->hw, true);
- if (ret && !pf->hw.port_info->is_sw_lldp) {
+ if (ret && !pf->hw.port_info->qos_cfg.is_sw_lldp) {
dev_err(dev, "Failed to register for MIB changes\n");
goto dcb_error;
}
@@ -510,11 +510,12 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
int ret = 0;
pi = pf->hw.port_info;
- newcfg = kmemdup(&pi->local_dcbx_cfg, sizeof(*newcfg), GFP_KERNEL);
+ newcfg = kmemdup(&pi->qos_cfg.local_dcbx_cfg, sizeof(*newcfg),
+ GFP_KERNEL);
if (!newcfg)
return -ENOMEM;
- memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg));
+ memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*newcfg));
dev_info(ice_pf_to_dev(pf), "Configuring initial DCB values\n");
if (ice_pf_dcb_cfg(pf, newcfg, locked))
@@ -545,7 +546,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked)
if (!dcbcfg)
return -ENOMEM;
- memset(&pi->local_dcbx_cfg, 0, sizeof(*dcbcfg));
+ memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*dcbcfg));
dcbcfg->etscfg.willing = ets_willing ? 1 : 0;
dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc;
@@ -608,7 +609,7 @@ static bool ice_dcb_tc_contig(u8 *prio_table)
*/
static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
{
- struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+ struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
struct device *dev = ice_pf_to_dev(pf);
int ret;
@@ -638,7 +639,7 @@ static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
*/
void ice_pf_dcb_recfg(struct ice_pf *pf)
{
- struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+ struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
u8 tc_map = 0;
int v, ret;
@@ -691,7 +692,7 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
port_info = hw->port_info;
err = ice_init_dcb(hw, false);
- if (err && !port_info->is_sw_lldp) {
+ if (err && !port_info->qos_cfg.is_sw_lldp) {
dev_err(dev, "Error initializing DCB %d\n", err);
goto dcb_init_err;
}
@@ -858,7 +859,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
/* Update the remote cached instance and return */
ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
- &pi->remote_dcbx_cfg);
+ &pi->qos_cfg.remote_dcbx_cfg);
if (ret) {
dev_err(dev, "Failed to get remote DCB config\n");
return;
@@ -868,10 +869,11 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
mutex_lock(&pf->tc_mutex);
/* store the old configuration */
- tmp_dcbx_cfg = pf->hw.port_info->local_dcbx_cfg;
+ tmp_dcbx_cfg = pf->hw.port_info->qos_cfg.local_dcbx_cfg;
/* Reset the old DCBX configuration data */
- memset(&pi->local_dcbx_cfg, 0, sizeof(pi->local_dcbx_cfg));
+ memset(&pi->qos_cfg.local_dcbx_cfg, 0,
+ sizeof(pi->qos_cfg.local_dcbx_cfg));
/* Get updated DCBX data from firmware */
ret = ice_get_dcb_cfg(pf->hw.port_info);
@@ -881,7 +883,8 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
}
/* No change detected in DCBX configs */
- if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) {
+ if (!memcmp(&tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg,
+ sizeof(tmp_dcbx_cfg))) {
dev_dbg(dev, "No change detected in DCBX configuration.\n");
goto out;
}
@@ -889,13 +892,13 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
pf->dcbx_cap = ice_dcb_get_mode(pi, false);
need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
- &pi->local_dcbx_cfg);
- ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg);
+ &pi->qos_cfg.local_dcbx_cfg);
+ ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg);
if (!need_reconfig)
goto out;
/* Enable DCB tagging only when more than one TC */
- if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) {
+ if (ice_dcb_get_num_tc(&pi->qos_cfg.local_dcbx_cfg) > 1) {
dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
set_bit(ICE_FLAG_DCB_ENA, pf->flags);
} else {
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
index 87f91b750d59..fcfefad00d1c 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
@@ -34,12 +34,10 @@ static void ice_dcbnl_devreset(struct net_device *netdev)
static int ice_dcbnl_getets(struct net_device *netdev, struct ieee_ets *ets)
{
struct ice_dcbx_cfg *dcbxcfg;
- struct ice_port_info *pi;
struct ice_pf *pf;
pf = ice_netdev_to_pf(netdev);
- pi = pf->hw.port_info;
- dcbxcfg = &pi->local_dcbx_cfg;
+ dcbxcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
ets->willing = dcbxcfg->etscfg.willing;
ets->ets_cap = dcbxcfg->etscfg.maxtcs;
@@ -74,7 +72,7 @@ static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets)
!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
return -EINVAL;
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
mutex_lock(&pf->tc_mutex);
@@ -159,6 +157,7 @@ static u8 ice_dcbnl_getdcbx(struct net_device *netdev)
static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_qos_cfg *qos_cfg;
/* No support for LLD_MANAGED modes or CEE+IEEE */
if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
@@ -171,10 +170,11 @@ static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode)
return ICE_DCB_NO_HW_CHG;
pf->dcbx_cap = mode;
+ qos_cfg = &pf->hw.port_info->qos_cfg;
if (mode & DCB_CAP_DCBX_VER_CEE)
- pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE;
+ qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE;
else
- pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE;
+ qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE;
dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode);
return ICE_DCB_HW_CHG_RST;
@@ -225,7 +225,7 @@ static int ice_dcbnl_getpfc(struct net_device *netdev, struct ieee_pfc *pfc)
struct ice_dcbx_cfg *dcbxcfg;
int i;
- dcbxcfg = &pi->local_dcbx_cfg;
+ dcbxcfg = &pi->qos_cfg.local_dcbx_cfg;
pfc->pfc_cap = dcbxcfg->pfc.pfccap;
pfc->pfc_en = dcbxcfg->pfc.pfcena;
pfc->mbc = dcbxcfg->pfc.mbc;
@@ -256,7 +256,7 @@ static int ice_dcbnl_setpfc(struct net_device *netdev, struct ieee_pfc *pfc)
mutex_lock(&pf->tc_mutex);
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
if (pfc->pfc_cap)
new_cfg->pfc.pfccap = pfc->pfc_cap;
@@ -293,9 +293,9 @@ ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting)
if (prio >= ICE_MAX_USER_PRIORITY)
return;
- *setting = (pi->local_dcbx_cfg.pfc.pfcena >> prio) & 0x1;
+ *setting = (pi->qos_cfg.local_dcbx_cfg.pfc.pfcena >> prio) & 0x1;
dev_dbg(ice_pf_to_dev(pf), "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n",
- prio, *setting, pi->local_dcbx_cfg.pfc.pfcena);
+ prio, *setting, pi->qos_cfg.local_dcbx_cfg.pfc.pfcena);
}
/**
@@ -316,7 +316,7 @@ static void ice_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio, u8 set)
if (prio >= ICE_MAX_USER_PRIORITY)
return;
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
if (set)
@@ -338,7 +338,7 @@ static u8 ice_dcbnl_getpfcstate(struct net_device *netdev)
struct ice_port_info *pi = pf->hw.port_info;
/* Return enabled if any UP enabled for PFC */
- if (pi->local_dcbx_cfg.pfc.pfcena)
+ if (pi->qos_cfg.local_dcbx_cfg.pfc.pfcena)
return 1;
return 0;
@@ -378,8 +378,8 @@ static u8 ice_dcbnl_setstate(struct net_device *netdev, u8 state)
if (state) {
set_bit(ICE_FLAG_DCB_ENA, pf->flags);
- memcpy(&pf->hw.port_info->desired_dcbx_cfg,
- &pf->hw.port_info->local_dcbx_cfg,
+ memcpy(&pf->hw.port_info->qos_cfg.desired_dcbx_cfg,
+ &pf->hw.port_info->qos_cfg.local_dcbx_cfg,
sizeof(struct ice_dcbx_cfg));
} else {
clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
@@ -413,7 +413,7 @@ ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio,
if (prio >= ICE_MAX_USER_PRIORITY)
return;
- *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio];
+ *pgid = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[prio];
dev_dbg(ice_pf_to_dev(pf), "Get PG config prio=%d tc=%d\n", prio,
*pgid);
}
@@ -444,7 +444,7 @@ ice_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
if (tc >= ICE_MAX_TRAFFIC_CLASS)
return;
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
/* prio_type, bwg_id and bw_pct per UP are not supported */
@@ -474,7 +474,7 @@ ice_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 *bw_pct)
if (pgid >= ICE_MAX_TRAFFIC_CLASS)
return;
- *bw_pct = pi->local_dcbx_cfg.etscfg.tcbwtable[pgid];
+ *bw_pct = pi->qos_cfg.local_dcbx_cfg.etscfg.tcbwtable[pgid];
dev_dbg(ice_pf_to_dev(pf), "Get PG BW config tc=%d bw_pct=%d\n",
pgid, *bw_pct);
}
@@ -498,7 +498,7 @@ ice_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 bw_pct)
if (pgid >= ICE_MAX_TRAFFIC_CLASS)
return;
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
new_cfg->etscfg.tcbwtable[pgid] = bw_pct;
}
@@ -528,7 +528,7 @@ ice_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio,
if (prio >= ICE_MAX_USER_PRIORITY)
return;
- *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio];
+ *pgid = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[prio];
}
/**
@@ -699,9 +699,9 @@ static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app)
mutex_lock(&pf->tc_mutex);
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
- old_cfg = &pf->hw.port_info->local_dcbx_cfg;
+ old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
if (old_cfg->numapps == ICE_DCBX_MAX_APPS) {
ret = -EINVAL;
@@ -751,7 +751,7 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app)
return -EINVAL;
mutex_lock(&pf->tc_mutex);
- old_cfg = &pf->hw.port_info->local_dcbx_cfg;
+ old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
if (old_cfg->numapps <= 1)
goto delapp_out;
@@ -760,7 +760,7 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app)
if (ret)
goto delapp_out;
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
for (i = 1; i < new_cfg->numapps; i++) {
if (app->selector == new_cfg->app[i].selector &&
@@ -813,7 +813,7 @@ static u8 ice_dcbnl_cee_set_all(struct net_device *netdev)
!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
return ICE_DCB_NO_HW_CHG;
- new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
mutex_lock(&pf->tc_mutex);
@@ -884,7 +884,7 @@ void ice_dcbnl_set_all(struct ice_vsi *vsi)
if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags))
return;
- dcbxcfg = &pi->local_dcbx_cfg;
+ dcbxcfg = &pi->qos_cfg.local_dcbx_cfg;
for (i = 0; i < dcbxcfg->numapps; i++) {
u8 prio, tc_map;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index e01b7e34da5e..5636c9b23896 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1242,6 +1242,11 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
enum ice_status status;
bool dcbx_agent_status;
+ /* Remove rule to direct LLDP packets to default VSI.
+ * The FW LLDP engine will now be consuming them.
+ */
+ ice_cfg_sw_lldp(vsi, false, false);
+
/* AQ command to start FW LLDP agent will return an
* error if the agent is already started
*/
@@ -1270,11 +1275,6 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
if (status)
dev_dbg(dev, "Fail to init DCB\n");
- /* Remove rule to direct LLDP packets to default VSI.
- * The FW LLDP engine will now be consuming them.
- */
- ice_cfg_sw_lldp(vsi, false, false);
-
/* Register for MIB change events */
status = ice_cfg_lldp_mib_change(&pf->hw, true);
if (status)
@@ -2979,7 +2979,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
pause->rx_pause = 0;
pause->tx_pause = 0;
- dcbx_cfg = &pi->local_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
@@ -3031,7 +3031,7 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
pi = vsi->port_info;
hw_link_info = &pi->phy.link_info;
- dcbx_cfg = &pi->local_dcbx_cfg;
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
/* Changing the port's flow control is not supported if this isn't the
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index cf5b717b9293..5e1fd30c0a0f 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -2727,7 +2727,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
case ICE_SID_XLT1_RSS:
case ICE_SID_XLT1_ACL:
case ICE_SID_XLT1_PE:
- xlt1 = (struct ice_xlt1_section *)sect;
+ xlt1 = sect;
src = xlt1->value;
sect_len = le16_to_cpu(xlt1->count) *
sizeof(*hw->blk[block_id].xlt1.t);
@@ -2740,7 +2740,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
case ICE_SID_XLT2_RSS:
case ICE_SID_XLT2_ACL:
case ICE_SID_XLT2_PE:
- xlt2 = (struct ice_xlt2_section *)sect;
+ xlt2 = sect;
src = (__force u8 *)xlt2->value;
sect_len = le16_to_cpu(xlt2->count) *
sizeof(*hw->blk[block_id].xlt2.t);
@@ -2753,7 +2753,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
case ICE_SID_PROFID_TCAM_RSS:
case ICE_SID_PROFID_TCAM_ACL:
case ICE_SID_PROFID_TCAM_PE:
- pid = (struct ice_prof_id_section *)sect;
+ pid = sect;
src = (u8 *)pid->entry;
sect_len = le16_to_cpu(pid->count) *
sizeof(*hw->blk[block_id].prof.t);
@@ -2766,7 +2766,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
case ICE_SID_PROFID_REDIR_RSS:
case ICE_SID_PROFID_REDIR_ACL:
case ICE_SID_PROFID_REDIR_PE:
- pr = (struct ice_prof_redir_section *)sect;
+ pr = sect;
src = pr->redir_value;
sect_len = le16_to_cpu(pr->count) *
sizeof(*hw->blk[block_id].prof_redir.t);
@@ -2779,7 +2779,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
case ICE_SID_FLD_VEC_RSS:
case ICE_SID_FLD_VEC_ACL:
case ICE_SID_FLD_VEC_PE:
- es = (struct ice_sw_fv_section *)sect;
+ es = sect;
src = (u8 *)es->fv;
sect_len = (u32)(le16_to_cpu(es->count) *
hw->blk[block_id].es.fvw) *
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 90abc8612a6a..093a1818a392 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -86,6 +86,9 @@
#define QRXFLXP_CNTXT_RXDID_PRIO_S 8
#define QRXFLXP_CNTXT_RXDID_PRIO_M ICE_M(0x7, 8)
#define QRXFLXP_CNTXT_TS_M BIT(11)
+#define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S 4
+#define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M ICE_M(0x3, 4)
+#define GLGEN_CLKSTAT_SRC 0x000B826C
#define GLGEN_RSTAT 0x000B8188
#define GLGEN_RSTAT_DEVSTATE_M ICE_M(0x3, 0)
#define GLGEN_RSTCTL 0x000B8180
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
new file mode 100644
index 000000000000..4599fc3b4ed8
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+/* Link Aggregation code */
+
+#include "ice.h"
+#include "ice_lag.h"
+
+/**
+ * ice_lag_nop_handler - no-op Rx handler to disable LAG
+ * @pskb: pointer to skb pointer
+ */
+rx_handler_result_t ice_lag_nop_handler(struct sk_buff __always_unused **pskb)
+{
+ return RX_HANDLER_PASS;
+}
+
+/**
+ * ice_lag_set_primary - set PF LAG state as Primary
+ * @lag: LAG info struct
+ */
+static void ice_lag_set_primary(struct ice_lag *lag)
+{
+ struct ice_pf *pf = lag->pf;
+
+ if (!pf)
+ return;
+
+ if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_BACKUP) {
+ dev_warn(ice_pf_to_dev(pf), "%s: Attempt to be Primary, but incompatible state.\n",
+ netdev_name(lag->netdev));
+ return;
+ }
+
+ lag->role = ICE_LAG_PRIMARY;
+}
+
+/**
+ * ice_lag_set_backup - set PF LAG state to Backup
+ * @lag: LAG info struct
+ */
+static void ice_lag_set_backup(struct ice_lag *lag)
+{
+ struct ice_pf *pf = lag->pf;
+
+ if (!pf)
+ return;
+
+ if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_PRIMARY) {
+ dev_dbg(ice_pf_to_dev(pf), "%s: Attempt to be Backup, but incompatible state\n",
+ netdev_name(lag->netdev));
+ return;
+ }
+
+ lag->role = ICE_LAG_BACKUP;
+}
+
+/**
+ * ice_display_lag_info - print LAG info
+ * @lag: LAG info struct
+ */
+static void ice_display_lag_info(struct ice_lag *lag)
+{
+ const char *name, *peer, *upper, *role, *bonded, *master;
+ struct device *dev = &lag->pf->pdev->dev;
+
+ name = lag->netdev ? netdev_name(lag->netdev) : "unset";
+ peer = lag->peer_netdev ? netdev_name(lag->peer_netdev) : "unset";
+ upper = lag->upper_netdev ? netdev_name(lag->upper_netdev) : "unset";
+ master = lag->master ? "TRUE" : "FALSE";
+ bonded = lag->bonded ? "BONDED" : "UNBONDED";
+
+ switch (lag->role) {
+ case ICE_LAG_NONE:
+ role = "NONE";
+ break;
+ case ICE_LAG_PRIMARY:
+ role = "PRIMARY";
+ break;
+ case ICE_LAG_BACKUP:
+ role = "BACKUP";
+ break;
+ case ICE_LAG_UNSET:
+ role = "UNSET";
+ break;
+ default:
+ role = "ERROR";
+ }
+
+ dev_dbg(dev, "%s %s, peer:%s, upper:%s, role:%s, master:%s\n", name,
+ bonded, peer, upper, role, master);
+}
+
+/**
+ * ice_lag_info_event - handle NETDEV_BONDING_INFO event
+ * @lag: LAG info struct
+ * @ptr: opaque data pointer
+ *
+ * ptr is to be cast to (netdev_notifier_bonding_info *)
+ */
+static void ice_lag_info_event(struct ice_lag *lag, void *ptr)
+{
+ struct net_device *event_netdev, *netdev_tmp;
+ struct netdev_notifier_bonding_info *info;
+ struct netdev_bonding_info *bonding_info;
+ const char *lag_netdev_name;
+
+ event_netdev = netdev_notifier_info_to_dev(ptr);
+ info = ptr;
+ lag_netdev_name = netdev_name(lag->netdev);
+ bonding_info = &info->bonding_info;
+
+ if (event_netdev != lag->netdev || !lag->bonded || !lag->upper_netdev)
+ return;
+
+ if (bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) {
+ netdev_dbg(lag->netdev, "Bonding event recv, but mode not active/backup\n");
+ goto lag_out;
+ }
+
+ if (strcmp(bonding_info->slave.slave_name, lag_netdev_name)) {
+ netdev_dbg(lag->netdev, "Bonding event recv, but slave info not for us\n");
+ goto lag_out;
+ }
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(lag->upper_netdev, netdev_tmp) {
+ if (!netif_is_ice(netdev_tmp))
+ continue;
+
+ if (netdev_tmp && netdev_tmp != lag->netdev &&
+ lag->peer_netdev != netdev_tmp) {
+ dev_hold(netdev_tmp);
+ lag->peer_netdev = netdev_tmp;
+ }
+ }
+ rcu_read_unlock();
+
+ if (bonding_info->slave.state)
+ ice_lag_set_backup(lag);
+ else
+ ice_lag_set_primary(lag);
+
+lag_out:
+ ice_display_lag_info(lag);
+}
+
+/**
+ * ice_lag_link - handle LAG link event
+ * @lag: LAG info struct
+ * @info: info from the netdev notifier
+ */
+static void
+ice_lag_link(struct ice_lag *lag, struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *netdev_tmp, *upper = info->upper_dev;
+ struct ice_pf *pf = lag->pf;
+ int peers = 0;
+
+ if (lag->bonded)
+ dev_warn(ice_pf_to_dev(pf), "%s Already part of a bond\n",
+ netdev_name(lag->netdev));
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper, netdev_tmp)
+ peers++;
+ rcu_read_unlock();
+
+ if (lag->upper_netdev != upper) {
+ dev_hold(upper);
+ lag->upper_netdev = upper;
+ }
+
+ ice_clear_sriov_cap(pf);
+
+ lag->bonded = true;
+ lag->role = ICE_LAG_UNSET;
+
+ /* if this is the first element in an LAG mark as master */
+ lag->master = !!(peers == 1);
+}
+
+/**
+ * ice_lag_unlink - handle unlink event
+ * @lag: LAG info struct
+ * @info: info from netdev notification
+ */
+static void
+ice_lag_unlink(struct ice_lag *lag,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *netdev_tmp, *upper = info->upper_dev;
+ struct ice_pf *pf = lag->pf;
+ bool found = false;
+
+ if (!lag->bonded) {
+ netdev_dbg(lag->netdev, "bonding unlink event on non-LAG netdev\n");
+ return;
+ }
+
+ /* determine if we are in the new LAG config or not */
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper, netdev_tmp) {
+ if (netdev_tmp == lag->netdev) {
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (found)
+ return;
+
+ if (lag->upper_netdev) {
+ dev_put(lag->upper_netdev);
+ lag->upper_netdev = NULL;
+ }
+
+ if (lag->peer_netdev) {
+ dev_put(lag->peer_netdev);
+ lag->peer_netdev = NULL;
+ }
+
+ ice_set_sriov_cap(pf);
+ lag->bonded = false;
+ lag->role = ICE_LAG_NONE;
+}
+
+/**
+ * ice_lag_changeupper_event - handle LAG changeupper event
+ * @lag: LAG info struct
+ * @ptr: opaque pointer data
+ *
+ * ptr is to be cast into netdev_notifier_changeupper_info
+ */
+static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *info;
+ struct net_device *netdev;
+
+ info = ptr;
+ netdev = netdev_notifier_info_to_dev(ptr);
+
+ /* not for this netdev */
+ if (netdev != lag->netdev)
+ return;
+
+ if (!info->upper_dev) {
+ netdev_dbg(netdev, "changeupper rcvd, but no upper defined\n");
+ return;
+ }
+
+ netdev_dbg(netdev, "bonding %s\n", info->linking ? "LINK" : "UNLINK");
+
+ if (!netif_is_lag_master(info->upper_dev)) {
+ netdev_dbg(netdev, "changeupper rcvd, but not master. bail\n");
+ return;
+ }
+
+ if (info->linking)
+ ice_lag_link(lag, info);
+ else
+ ice_lag_unlink(lag, info);
+
+ ice_display_lag_info(lag);
+}
+
+/**
+ * ice_lag_changelower_event - handle LAG changelower event
+ * @lag: LAG info struct
+ * @ptr: opaque data pointer
+ *
+ * ptr to be cast to netdev_notifier_changelowerstate_info
+ */
+static void ice_lag_changelower_event(struct ice_lag *lag, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ if (netdev != lag->netdev)
+ return;
+
+ netdev_dbg(netdev, "bonding info\n");
+
+ if (!netif_is_lag_port(netdev))
+ netdev_dbg(netdev, "CHANGELOWER rcvd, but netdev not in LAG. Bail\n");
+}
+
+/**
+ * ice_lag_event_handler - handle LAG events from netdev
+ * @notif_blk: notifier block registered by this netdev
+ * @event: event type
+ * @ptr: opaque data containing notifier event
+ */
+static int
+ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
+ void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct ice_lag *lag;
+
+ lag = container_of(notif_blk, struct ice_lag, notif_block);
+
+ if (!lag->netdev)
+ return NOTIFY_DONE;
+
+ /* Check that the netdev is in the working namespace */
+ if (!net_eq(dev_net(netdev), &init_net))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ ice_lag_changeupper_event(lag, ptr);
+ break;
+ case NETDEV_CHANGELOWERSTATE:
+ ice_lag_changelower_event(lag, ptr);
+ break;
+ case NETDEV_BONDING_INFO:
+ ice_lag_info_event(lag, ptr);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * ice_register_lag_handler - register LAG handler on netdev
+ * @lag: LAG struct
+ */
+static int ice_register_lag_handler(struct ice_lag *lag)
+{
+ struct device *dev = ice_pf_to_dev(lag->pf);
+ struct notifier_block *notif_blk;
+
+ notif_blk = &lag->notif_block;
+
+ if (!notif_blk->notifier_call) {
+ notif_blk->notifier_call = ice_lag_event_handler;
+ if (register_netdevice_notifier(notif_blk)) {
+ notif_blk->notifier_call = NULL;
+ dev_err(dev, "FAIL register LAG event handler!\n");
+ return -EINVAL;
+ }
+ dev_dbg(dev, "LAG event handler registered\n");
+ }
+ return 0;
+}
+
+/**
+ * ice_unregister_lag_handler - unregister LAG handler on netdev
+ * @lag: LAG struct
+ */
+static void ice_unregister_lag_handler(struct ice_lag *lag)
+{
+ struct device *dev = ice_pf_to_dev(lag->pf);
+ struct notifier_block *notif_blk;
+
+ notif_blk = &lag->notif_block;
+ if (notif_blk->notifier_call) {
+ unregister_netdevice_notifier(notif_blk);
+ dev_dbg(dev, "LAG event handler unregistered\n");
+ }
+}
+
+/**
+ * ice_init_lag - initialize support for LAG
+ * @pf: PF struct
+ *
+ * Alloc memory for LAG structs and initialize the elements.
+ * Memory will be freed in ice_deinit_lag
+ */
+int ice_init_lag(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_lag *lag;
+ struct ice_vsi *vsi;
+ int err;
+
+ pf->lag = kzalloc(sizeof(*lag), GFP_KERNEL);
+ if (!pf->lag)
+ return -ENOMEM;
+ lag = pf->lag;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi) {
+ dev_err(dev, "couldn't get main vsi, link aggregation init fail\n");
+ err = -EIO;
+ goto lag_error;
+ }
+
+ lag->pf = pf;
+ lag->netdev = vsi->netdev;
+ lag->role = ICE_LAG_NONE;
+ lag->bonded = false;
+ lag->peer_netdev = NULL;
+ lag->upper_netdev = NULL;
+ lag->notif_block.notifier_call = NULL;
+
+ err = ice_register_lag_handler(lag);
+ if (err) {
+ dev_warn(dev, "INIT LAG: Failed to register event handler\n");
+ goto lag_error;
+ }
+
+ ice_display_lag_info(lag);
+
+ dev_dbg(dev, "INIT LAG complete\n");
+ return 0;
+
+lag_error:
+ kfree(lag);
+ pf->lag = NULL;
+ return err;
+}
+
+/**
+ * ice_deinit_lag - Clean up LAG
+ * @pf: PF struct
+ *
+ * Clean up kernel LAG info and free memory
+ * This function is meant to only be called on driver remove/shutdown
+ */
+void ice_deinit_lag(struct ice_pf *pf)
+{
+ struct ice_lag *lag;
+
+ lag = pf->lag;
+
+ if (!lag)
+ return;
+
+ if (lag->pf)
+ ice_unregister_lag_handler(lag);
+
+ if (lag->upper_netdev)
+ dev_put(lag->upper_netdev);
+
+ if (lag->peer_netdev)
+ dev_put(lag->peer_netdev);
+
+ kfree(lag);
+
+ pf->lag = NULL;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h
new file mode 100644
index 000000000000..c2e3688dd8fd
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lag.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+#ifndef _ICE_LAG_H_
+#define _ICE_LAG_H_
+
+#include <linux/netdevice.h>
+
+/* LAG roles for netdev */
+enum ice_lag_role {
+ ICE_LAG_NONE,
+ ICE_LAG_PRIMARY,
+ ICE_LAG_BACKUP,
+ ICE_LAG_UNSET
+};
+
+struct ice_pf;
+
+/* LAG info struct */
+struct ice_lag {
+ struct ice_pf *pf; /* backlink to PF struct */
+ struct net_device *netdev; /* this PF's netdev */
+ struct net_device *peer_netdev;
+ struct net_device *upper_netdev; /* upper bonding netdev */
+ struct notifier_block notif_block;
+ u8 bonded:1; /* currently bonded */
+ u8 master:1; /* this is a master */
+ u8 handler:1; /* did we register a rx_netdev_handler */
+ /* each thing blocking bonding will increment this value by one.
+ * If this value is zero, then bonding is allowed.
+ */
+ u16 dis_lag;
+ u8 role;
+};
+
+int ice_init_lag(struct ice_pf *pf);
+void ice_deinit_lag(struct ice_pf *pf);
+rx_handler_result_t ice_lag_nop_handler(struct sk_buff **pskb);
+
+/**
+ * ice_disable_lag - increment LAG disable count
+ * @lag: LAG struct
+ */
+static inline void ice_disable_lag(struct ice_lag *lag)
+{
+ /* If LAG this PF is not already disabled, disable it */
+ rtnl_lock();
+ if (!netdev_is_rx_handler_busy(lag->netdev)) {
+ if (!netdev_rx_handler_register(lag->netdev,
+ ice_lag_nop_handler,
+ NULL))
+ lag->handler = true;
+ }
+ rtnl_unlock();
+ lag->dis_lag++;
+}
+
+/**
+ * ice_enable_lag - decrement disable count for a PF
+ * @lag: LAG struct
+ *
+ * Decrement the disable counter for a port, and if that count reaches
+ * zero, then remove the no-op Rx handler from that netdev
+ */
+static inline void ice_enable_lag(struct ice_lag *lag)
+{
+ if (lag->dis_lag)
+ lag->dis_lag--;
+ if (!lag->dis_lag && lag->handler) {
+ rtnl_lock();
+ netdev_rx_handler_unregister(lag->netdev);
+ rtnl_unlock();
+ lag->handler = false;
+ }
+}
+
+/**
+ * ice_is_lag_dis - is LAG disabled
+ * @lag: LAG struct
+ *
+ * Return true if bonding is disabled
+ */
+static inline bool ice_is_lag_dis(struct ice_lag *lag)
+{
+ return !!(lag->dis_lag);
+}
+#endif /* _ICE_LAG_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index ad9c22a1b97a..8d4e2ad4328d 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2078,7 +2078,7 @@ err_out:
static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
{
- struct ice_dcbx_cfg *cfg = &vsi->port_info->local_dcbx_cfg;
+ struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg);
vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg);
@@ -2145,11 +2145,18 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
dev = ice_pf_to_dev(pf);
eth_fltr = create ? ice_fltr_add_eth : ice_fltr_remove_eth;
- if (tx)
+ if (tx) {
status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_TX,
ICE_DROP_PACKET);
- else
- status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX, ICE_FWD_TO_VSI);
+ } else {
+ if (ice_fw_supports_lldp_fltr_ctrl(&pf->hw)) {
+ status = ice_lldp_fltr_add_remove(&pf->hw, vsi->vsi_num,
+ create);
+ } else {
+ status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX,
+ ICE_FWD_TO_VSI);
+ }
+ }
if (status)
dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n",
@@ -2158,6 +2165,126 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
}
/**
+ * ice_set_agg_vsi - sets up scheduler aggregator node and move VSI into it
+ * @vsi: pointer to the VSI
+ *
+ * This function will allocate new scheduler aggregator now if needed and will
+ * move specified VSI into it.
+ */
+static void ice_set_agg_vsi(struct ice_vsi *vsi)
+{
+ struct device *dev = ice_pf_to_dev(vsi->back);
+ struct ice_agg_node *agg_node_iter = NULL;
+ u32 agg_id = ICE_INVALID_AGG_NODE_ID;
+ struct ice_agg_node *agg_node = NULL;
+ int node_offset, max_agg_nodes = 0;
+ struct ice_port_info *port_info;
+ struct ice_pf *pf = vsi->back;
+ u32 agg_node_id_start = 0;
+ enum ice_status status;
+
+ /* create (as needed) scheduler aggregator node and move VSI into
+ * corresponding aggregator node
+ * - PF aggregator node to contains VSIs of type _PF and _CTRL
+ * - VF aggregator nodes will contain VF VSI
+ */
+ port_info = pf->hw.port_info;
+ if (!port_info)
+ return;
+
+ switch (vsi->type) {
+ case ICE_VSI_CTRL:
+ case ICE_VSI_LB:
+ case ICE_VSI_PF:
+ max_agg_nodes = ICE_MAX_PF_AGG_NODES;
+ agg_node_id_start = ICE_PF_AGG_NODE_ID_START;
+ agg_node_iter = &pf->pf_agg_node[0];
+ break;
+ case ICE_VSI_VF:
+ /* user can create 'n' VFs on a given PF, but since max children
+ * per aggregator node can be only 64. Following code handles
+ * aggregator(s) for VF VSIs, either selects a agg_node which
+ * was already created provided num_vsis < 64, otherwise
+ * select next available node, which will be created
+ */
+ max_agg_nodes = ICE_MAX_VF_AGG_NODES;
+ agg_node_id_start = ICE_VF_AGG_NODE_ID_START;
+ agg_node_iter = &pf->vf_agg_node[0];
+ break;
+ default:
+ /* other VSI type, handle later if needed */
+ dev_dbg(dev, "unexpected VSI type %s\n",
+ ice_vsi_type_str(vsi->type));
+ return;
+ }
+
+ /* find the appropriate aggregator node */
+ for (node_offset = 0; node_offset < max_agg_nodes; node_offset++) {
+ /* see if we can find space in previously created
+ * node if num_vsis < 64, otherwise skip
+ */
+ if (agg_node_iter->num_vsis &&
+ agg_node_iter->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) {
+ agg_node_iter++;
+ continue;
+ }
+
+ if (agg_node_iter->valid &&
+ agg_node_iter->agg_id != ICE_INVALID_AGG_NODE_ID) {
+ agg_id = agg_node_iter->agg_id;
+ agg_node = agg_node_iter;
+ break;
+ }
+
+ /* find unclaimed agg_id */
+ if (agg_node_iter->agg_id == ICE_INVALID_AGG_NODE_ID) {
+ agg_id = node_offset + agg_node_id_start;
+ agg_node = agg_node_iter;
+ break;
+ }
+ /* move to next agg_node */
+ agg_node_iter++;
+ }
+
+ if (!agg_node)
+ return;
+
+ /* if selected aggregator node was not created, create it */
+ if (!agg_node->valid) {
+ status = ice_cfg_agg(port_info, agg_id, ICE_AGG_TYPE_AGG,
+ (u8)vsi->tc_cfg.ena_tc);
+ if (status) {
+ dev_err(dev, "unable to create aggregator node with agg_id %u\n",
+ agg_id);
+ return;
+ }
+ /* aggregator node is created, store the neeeded info */
+ agg_node->valid = true;
+ agg_node->agg_id = agg_id;
+ }
+
+ /* move VSI to corresponding aggregator node */
+ status = ice_move_vsi_to_agg(port_info, agg_id, vsi->idx,
+ (u8)vsi->tc_cfg.ena_tc);
+ if (status) {
+ dev_err(dev, "unable to move VSI idx %u into aggregator %u node",
+ vsi->idx, agg_id);
+ return;
+ }
+
+ /* keep active children count for aggregator node */
+ agg_node->num_vsis++;
+
+ /* cache the 'agg_id' in VSI, so that after reset - VSI will be moved
+ * to aggregator node
+ */
+ vsi->agg_node = agg_node;
+ dev_dbg(dev, "successfully moved VSI idx %u tc_bitmap 0x%x) into aggregator node %d which has num_vsis %u\n",
+ vsi->idx, vsi->tc_cfg.ena_tc, vsi->agg_node->agg_id,
+ vsi->agg_node->num_vsis);
+}
+
+/**
* ice_vsi_setup - Set up a VSI by a given type
* @pf: board private structure
* @pi: pointer to the port_info instance
@@ -2327,6 +2454,8 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
ice_cfg_sw_lldp(vsi, true, true);
}
+ if (!vsi->agg_node)
+ ice_set_agg_vsi(vsi);
return vsi;
unroll_clear_rings:
@@ -2342,6 +2471,8 @@ unroll_vsi_init:
unroll_get_qs:
ice_vsi_put_qs(vsi);
unroll_vsi_alloc:
+ if (vsi_type == ICE_VSI_VF)
+ ice_enable_lag(pf->lag);
ice_vsi_clear(vsi);
return NULL;
@@ -2669,6 +2800,9 @@ int ice_vsi_release(struct ice_vsi *vsi)
vsi->netdev = NULL;
}
+ if (vsi->type == ICE_VSI_VF &&
+ vsi->agg_node && vsi->agg_node->valid)
+ vsi->agg_node->num_vsis--;
ice_vsi_clear_rings(vsi);
ice_vsi_put_qs(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 98cd44a3ccf7..813ec6b8ac23 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -44,6 +44,11 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
static void ice_vsi_release_all(struct ice_pf *pf);
+bool netif_is_ice(struct net_device *dev)
+{
+ return dev && (dev->netdev_ops == &ice_netdev_ops);
+}
+
/**
* ice_get_tx_pending - returns number of Tx descriptors not processed
* @ring: the ring of descriptors
@@ -430,11 +435,19 @@ static void ice_sync_fltr_subtask(struct ice_pf *pf)
*/
static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
{
+ int node;
int v;
ice_for_each_vsi(pf, v)
if (pf->vsi[v])
ice_dis_vsi(pf->vsi[v], locked);
+
+ for (node = 0; node < ICE_MAX_PF_AGG_NODES; node++)
+ pf->pf_agg_node[node].num_vsis = 0;
+
+ for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++)
+ pf->vf_agg_node[node].num_vsis = 0;
+
}
/**
@@ -2476,6 +2489,22 @@ free_qmap:
}
/**
+ * ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
+ * @vsi: VSI to schedule napi on
+ */
+static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi)
+{
+ int i;
+
+ ice_for_each_rxq(vsi, i) {
+ struct ice_ring *rx_ring = vsi->rx_rings[i];
+
+ if (rx_ring->xsk_pool)
+ napi_schedule(&rx_ring->q_vector->napi);
+ }
+}
+
+/**
* ice_xdp_setup_prog - Add or remove XDP eBPF program
* @vsi: VSI to setup XDP for
* @prog: XDP program
@@ -2519,16 +2548,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
if (if_running)
ret = ice_up(vsi);
- if (!ret && prog && vsi->xsk_pools) {
- int i;
-
- ice_for_each_rxq(vsi, i) {
- struct ice_ring *rx_ring = vsi->rx_rings[i];
-
- if (rx_ring->xsk_pool)
- napi_schedule(&rx_ring->q_vector->napi);
- }
- }
+ if (!ret && prog)
+ ice_vsi_rx_napi_schedule(vsi);
return (ret || xdp_ring_err) ? -ENOMEM : 0;
}
@@ -3370,28 +3391,20 @@ static int ice_init_pf(struct ice_pf *pf)
*/
static int ice_ena_msix_range(struct ice_pf *pf)
{
+ int v_left, v_actual, v_other, v_budget = 0;
struct device *dev = ice_pf_to_dev(pf);
- int v_left, v_actual, v_budget = 0;
int needed, err, i;
v_left = pf->hw.func_caps.common_cap.num_msix_vectors;
- /* reserve one vector for miscellaneous handler */
- needed = 1;
- if (v_left < needed)
- goto no_hw_vecs_left_err;
- v_budget += needed;
- v_left -= needed;
-
- /* reserve vectors for LAN traffic */
- needed = min_t(int, num_online_cpus(), v_left);
+ /* reserve for LAN miscellaneous handler */
+ needed = ICE_MIN_LAN_OICR_MSIX;
if (v_left < needed)
goto no_hw_vecs_left_err;
- pf->num_lan_msix = needed;
v_budget += needed;
v_left -= needed;
- /* reserve one vector for flow director */
+ /* reserve for flow director */
if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
needed = ICE_FDIR_MSIX;
if (v_left < needed)
@@ -3400,9 +3413,19 @@ static int ice_ena_msix_range(struct ice_pf *pf)
v_left -= needed;
}
+ /* total used for non-traffic vectors */
+ v_other = v_budget;
+
+ /* reserve vectors for LAN traffic */
+ needed = min_t(int, num_online_cpus(), v_left);
+ if (v_left < needed)
+ goto no_hw_vecs_left_err;
+ pf->num_lan_msix = needed;
+ v_budget += needed;
+ v_left -= needed;
+
pf->msix_entries = devm_kcalloc(dev, v_budget,
sizeof(*pf->msix_entries), GFP_KERNEL);
-
if (!pf->msix_entries) {
err = -ENOMEM;
goto exit_err;
@@ -3414,7 +3437,6 @@ static int ice_ena_msix_range(struct ice_pf *pf)
/* actually reserve the vectors */
v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries,
ICE_MIN_MSIX, v_budget);
-
if (v_actual < 0) {
dev_err(dev, "unable to reserve MSI-X vectors\n");
err = v_actual;
@@ -3431,7 +3453,16 @@ static int ice_ena_msix_range(struct ice_pf *pf)
err = -ERANGE;
goto msix_err;
} else {
- pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
+ int v_traffic = v_actual - v_other;
+
+ if (v_actual == ICE_MIN_MSIX ||
+ v_traffic < ICE_MIN_LAN_TXRX_MSIX)
+ pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
+ else
+ pf->num_lan_msix = v_traffic;
+
+ dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n",
+ pf->num_lan_msix);
}
}
@@ -4227,6 +4258,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
ice_cfg_lldp_mib_change(&pf->hw, true);
}
+ if (ice_init_lag(pf))
+ dev_warn(dev, "Failed to init link aggregation support\n");
+
/* print PCI link speed and width */
pcie_print_link_status(pf->pdev);
@@ -4349,6 +4383,7 @@ static void ice_remove(struct pci_dev *pdev)
ice_aq_cancel_waiting_tasks(pf);
mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
+ ice_deinit_lag(pf);
if (!ice_is_safe_mode(pf))
ice_remove_arfs(pf);
ice_setup_mc_magic_wake(pf);
@@ -6152,7 +6187,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
err = ice_down(vsi);
if (err) {
- netdev_err(netdev, "change MTU if_up err %d\n", err);
+ netdev_err(netdev, "change MTU if_down err %d\n", err);
return err;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index f0912e44d4ad..2403cb38b93c 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -431,6 +431,27 @@ ice_aq_cfg_sched_elems(struct ice_hw *hw, u16 elems_req,
}
/**
+ * ice_aq_move_sched_elems - move scheduler elements
+ * @hw: pointer to the HW struct
+ * @grps_req: number of groups to move
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_movd: returns total number of groups moved
+ * @cd: pointer to command details structure or NULL
+ *
+ * Move scheduling elements (0x0408)
+ */
+static enum ice_status
+ice_aq_move_sched_elems(struct ice_hw *hw, u16 grps_req,
+ struct ice_aqc_move_elem *buf, u16 buf_size,
+ u16 *grps_movd, struct ice_sq_cd *cd)
+{
+ return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_move_sched_elems,
+ grps_req, (void *)buf, buf_size,
+ grps_movd, cd);
+}
+
+/**
* ice_aq_suspend_sched_elems - suspend scheduler elements
* @hw: pointer to the HW struct
* @elems_req: number of elements to suspend
@@ -1022,6 +1043,28 @@ static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
}
/**
+ * ice_sched_get_agg_layer - get the current aggregator layer number
+ * @hw: pointer to the HW struct
+ *
+ * This function returns the current aggregator layer number
+ */
+static u8 ice_sched_get_agg_layer(struct ice_hw *hw)
+{
+ /* Num Layers aggregator layer
+ * 9 4
+ * 7 or less sw_entry_point_layer
+ */
+ /* calculate the aggregator layer based on number of layers. */
+ if (hw->num_tx_sched_layers > ICE_AGG_LAYER_OFFSET + 1) {
+ u8 layer = hw->num_tx_sched_layers - ICE_AGG_LAYER_OFFSET;
+
+ if (layer > hw->sw_entry_point_layer)
+ return layer;
+ }
+ return hw->sw_entry_point_layer;
+}
+
+/**
* ice_rm_dflt_leaf_node - remove the default leaf node in the tree
* @pi: port information structure
*
@@ -1239,6 +1282,46 @@ sched_query_out:
}
/**
+ * ice_sched_get_psm_clk_freq - determine the PSM clock frequency
+ * @hw: pointer to the HW struct
+ *
+ * Determine the PSM clock frequency and store in HW struct
+ */
+void ice_sched_get_psm_clk_freq(struct ice_hw *hw)
+{
+ u32 val, clk_src;
+
+ val = rd32(hw, GLGEN_CLKSTAT_SRC);
+ clk_src = (val & GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M) >>
+ GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S;
+
+#define PSM_CLK_SRC_367_MHZ 0x0
+#define PSM_CLK_SRC_416_MHZ 0x1
+#define PSM_CLK_SRC_446_MHZ 0x2
+#define PSM_CLK_SRC_390_MHZ 0x3
+
+ switch (clk_src) {
+ case PSM_CLK_SRC_367_MHZ:
+ hw->psm_clk_freq = ICE_PSM_CLK_367MHZ_IN_HZ;
+ break;
+ case PSM_CLK_SRC_416_MHZ:
+ hw->psm_clk_freq = ICE_PSM_CLK_416MHZ_IN_HZ;
+ break;
+ case PSM_CLK_SRC_446_MHZ:
+ hw->psm_clk_freq = ICE_PSM_CLK_446MHZ_IN_HZ;
+ break;
+ case PSM_CLK_SRC_390_MHZ:
+ hw->psm_clk_freq = ICE_PSM_CLK_390MHZ_IN_HZ;
+ break;
+ default:
+ ice_debug(hw, ICE_DBG_SCHED, "PSM clk_src unexpected %u\n",
+ clk_src);
+ /* fall back to a safe default */
+ hw->psm_clk_freq = ICE_PSM_CLK_446MHZ_IN_HZ;
+ }
+}
+
+/**
* ice_sched_find_node_in_subtree - Find node in part of base node subtree
* @hw: pointer to the HW struct
* @base: pointer to the base node
@@ -1364,7 +1447,7 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
/**
* ice_sched_get_vsi_node - Get a VSI node based on VSI ID
- * @hw: pointer to the HW struct
+ * @pi: pointer to the port information structure
* @tc_node: pointer to the TC node
* @vsi_handle: software VSI handle
*
@@ -1372,14 +1455,14 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
* TC branch
*/
static struct ice_sched_node *
-ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
+ice_sched_get_vsi_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
u16 vsi_handle)
{
struct ice_sched_node *node;
u8 vsi_layer;
- vsi_layer = ice_sched_get_vsi_layer(hw);
- node = ice_sched_get_first_node(hw->port_info, tc_node, vsi_layer);
+ vsi_layer = ice_sched_get_vsi_layer(pi->hw);
+ node = ice_sched_get_first_node(pi, tc_node, vsi_layer);
/* Check whether it already exists */
while (node) {
@@ -1392,6 +1475,38 @@ ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
}
/**
+ * ice_sched_get_agg_node - Get an aggregator node based on aggregator ID
+ * @pi: pointer to the port information structure
+ * @tc_node: pointer to the TC node
+ * @agg_id: aggregator ID
+ *
+ * This function retrieves an aggregator node for a given aggregator ID from
+ * a given TC branch
+ */
+static struct ice_sched_node *
+ice_sched_get_agg_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+ u32 agg_id)
+{
+ struct ice_sched_node *node;
+ struct ice_hw *hw = pi->hw;
+ u8 agg_layer;
+
+ if (!hw)
+ return NULL;
+ agg_layer = ice_sched_get_agg_layer(hw);
+ node = ice_sched_get_first_node(pi, tc_node, agg_layer);
+
+ /* Check whether it already exists */
+ while (node) {
+ if (node->agg_id == agg_id)
+ return node;
+ node = node->sibling;
+ }
+
+ return node;
+}
+
+/**
* ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
* @hw: pointer to the HW struct
* @num_qs: number of queues
@@ -1444,7 +1559,7 @@ ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
qgl = ice_sched_get_qgrp_layer(hw);
vsil = ice_sched_get_vsi_layer(hw);
- parent = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
+ parent = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
for (i = vsil + 1; i <= qgl; i++) {
if (!parent)
return ICE_ERR_CFG;
@@ -1477,7 +1592,7 @@ ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
/**
* ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
- * @hw: pointer to the HW struct
+ * @pi: pointer to the port info structure
* @tc_node: pointer to TC node
* @num_nodes: pointer to num nodes array
*
@@ -1486,15 +1601,15 @@ ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
* layers
*/
static void
-ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
+ice_sched_calc_vsi_support_nodes(struct ice_port_info *pi,
struct ice_sched_node *tc_node, u16 *num_nodes)
{
struct ice_sched_node *node;
u8 vsil;
int i;
- vsil = ice_sched_get_vsi_layer(hw);
- for (i = vsil; i >= hw->sw_entry_point_layer; i--)
+ vsil = ice_sched_get_vsi_layer(pi->hw);
+ for (i = vsil; i >= pi->hw->sw_entry_point_layer; i--)
/* Add intermediate nodes if TC has no children and
* need at least one node for VSI
*/
@@ -1504,11 +1619,10 @@ ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
/* If intermediate nodes are reached max children
* then add a new one.
*/
- node = ice_sched_get_first_node(hw->port_info, tc_node,
- (u8)i);
+ node = ice_sched_get_first_node(pi, tc_node, (u8)i);
/* scan all the siblings */
while (node) {
- if (node->num_children < hw->max_children[i])
+ if (node->num_children < pi->hw->max_children[i])
break;
node = node->sibling;
}
@@ -1588,14 +1702,13 @@ ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
{
u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
struct ice_sched_node *tc_node;
- struct ice_hw *hw = pi->hw;
tc_node = ice_sched_get_tc_node(pi, tc);
if (!tc_node)
return ICE_ERR_PARAM;
/* calculate number of supported nodes needed for this VSI */
- ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
+ ice_sched_calc_vsi_support_nodes(pi, tc_node, num_nodes);
/* add VSI supported nodes to TC subtree */
return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
@@ -1628,7 +1741,7 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
if (!tc_node)
return ICE_ERR_CFG;
- vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
if (!vsi_node)
return ICE_ERR_CFG;
@@ -1691,7 +1804,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
if (!vsi_ctx)
return ICE_ERR_PARAM;
- vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
/* suspend the VSI if TC is not enabled */
if (!enable) {
@@ -1712,7 +1825,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
if (status)
return status;
- vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
if (!vsi_node)
return ICE_ERR_CFG;
@@ -1821,7 +1934,7 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
if (!tc_node)
continue;
- vsi_node = ice_sched_get_vsi_node(pi->hw, tc_node, vsi_handle);
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
if (!vsi_node)
continue;
@@ -1874,6 +1987,720 @@ enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
}
/**
+ * ice_get_agg_info - get the aggregator ID
+ * @hw: pointer to the hardware structure
+ * @agg_id: aggregator ID
+ *
+ * This function validates aggregator ID. The function returns info if
+ * aggregator ID is present in list otherwise it returns null.
+ */
+static struct ice_sched_agg_info *
+ice_get_agg_info(struct ice_hw *hw, u32 agg_id)
+{
+ struct ice_sched_agg_info *agg_info;
+
+ list_for_each_entry(agg_info, &hw->agg_list, list_entry)
+ if (agg_info->agg_id == agg_id)
+ return agg_info;
+
+ return NULL;
+}
+
+/**
+ * ice_sched_get_free_vsi_parent - Find a free parent node in aggregator subtree
+ * @hw: pointer to the HW struct
+ * @node: pointer to a child node
+ * @num_nodes: num nodes count array
+ *
+ * This function walks through the aggregator subtree to find a free parent
+ * node
+ */
+static struct ice_sched_node *
+ice_sched_get_free_vsi_parent(struct ice_hw *hw, struct ice_sched_node *node,
+ u16 *num_nodes)
+{
+ u8 l = node->tx_sched_layer;
+ u8 vsil, i;
+
+ vsil = ice_sched_get_vsi_layer(hw);
+
+ /* Is it VSI parent layer ? */
+ if (l == vsil - 1)
+ return (node->num_children < hw->max_children[l]) ? node : NULL;
+
+ /* We have intermediate nodes. Let's walk through the subtree. If the
+ * intermediate node has space to add a new node then clear the count
+ */
+ if (node->num_children < hw->max_children[l])
+ num_nodes[l] = 0;
+ /* The below recursive call is intentional and wouldn't go more than
+ * 2 or 3 iterations.
+ */
+
+ for (i = 0; i < node->num_children; i++) {
+ struct ice_sched_node *parent;
+
+ parent = ice_sched_get_free_vsi_parent(hw, node->children[i],
+ num_nodes);
+ if (parent)
+ return parent;
+ }
+
+ return NULL;
+}
+
+/**
+ * ice_sched_update_parent - update the new parent in SW DB
+ * @new_parent: pointer to a new parent node
+ * @node: pointer to a child node
+ *
+ * This function removes the child from the old parent and adds it to a new
+ * parent
+ */
+static void
+ice_sched_update_parent(struct ice_sched_node *new_parent,
+ struct ice_sched_node *node)
+{
+ struct ice_sched_node *old_parent;
+ u8 i, j;
+
+ old_parent = node->parent;
+
+ /* update the old parent children */
+ for (i = 0; i < old_parent->num_children; i++)
+ if (old_parent->children[i] == node) {
+ for (j = i + 1; j < old_parent->num_children; j++)
+ old_parent->children[j - 1] =
+ old_parent->children[j];
+ old_parent->num_children--;
+ break;
+ }
+
+ /* now move the node to a new parent */
+ new_parent->children[new_parent->num_children++] = node;
+ node->parent = new_parent;
+ node->info.parent_teid = new_parent->info.node_teid;
+}
+
+/**
+ * ice_sched_move_nodes - move child nodes to a given parent
+ * @pi: port information structure
+ * @parent: pointer to parent node
+ * @num_items: number of child nodes to be moved
+ * @list: pointer to child node teids
+ *
+ * This function move the child nodes to a given parent.
+ */
+static enum ice_status
+ice_sched_move_nodes(struct ice_port_info *pi, struct ice_sched_node *parent,
+ u16 num_items, u32 *list)
+{
+ struct ice_aqc_move_elem *buf;
+ struct ice_sched_node *node;
+ enum ice_status status = 0;
+ u16 i, grps_movd = 0;
+ struct ice_hw *hw;
+ u16 buf_len;
+
+ hw = pi->hw;
+
+ if (!parent || !num_items)
+ return ICE_ERR_PARAM;
+
+ /* Does parent have enough space */
+ if (parent->num_children + num_items >
+ hw->max_children[parent->tx_sched_layer])
+ return ICE_ERR_AQ_FULL;
+
+ buf_len = struct_size(buf, teid, 1);
+ buf = kzalloc(buf_len, GFP_KERNEL);
+ if (!buf)
+ return ICE_ERR_NO_MEMORY;
+
+ for (i = 0; i < num_items; i++) {
+ node = ice_sched_find_node_by_teid(pi->root, list[i]);
+ if (!node) {
+ status = ICE_ERR_PARAM;
+ goto move_err_exit;
+ }
+
+ buf->hdr.src_parent_teid = node->info.parent_teid;
+ buf->hdr.dest_parent_teid = parent->info.node_teid;
+ buf->teid[0] = node->info.node_teid;
+ buf->hdr.num_elems = cpu_to_le16(1);
+ status = ice_aq_move_sched_elems(hw, 1, buf, buf_len,
+ &grps_movd, NULL);
+ if (status && grps_movd != 1) {
+ status = ICE_ERR_CFG;
+ goto move_err_exit;
+ }
+
+ /* update the SW DB */
+ ice_sched_update_parent(parent, node);
+ }
+
+move_err_exit:
+ kfree(buf);
+ return status;
+}
+
+/**
+ * ice_sched_move_vsi_to_agg - move VSI to aggregator node
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @agg_id: aggregator ID
+ * @tc: TC number
+ *
+ * This function moves a VSI to an aggregator node or its subtree.
+ * Intermediate nodes may be created if required.
+ */
+static enum ice_status
+ice_sched_move_vsi_to_agg(struct ice_port_info *pi, u16 vsi_handle, u32 agg_id,
+ u8 tc)
+{
+ struct ice_sched_node *vsi_node, *agg_node, *tc_node, *parent;
+ u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+ u32 first_node_teid, vsi_teid;
+ enum ice_status status;
+ u16 num_nodes_added;
+ u8 aggl, vsil, i;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (!tc_node)
+ return ICE_ERR_CFG;
+
+ agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id);
+ if (!agg_node)
+ return ICE_ERR_DOES_NOT_EXIST;
+
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+ if (!vsi_node)
+ return ICE_ERR_DOES_NOT_EXIST;
+
+ /* Is this VSI already part of given aggregator? */
+ if (ice_sched_find_node_in_subtree(pi->hw, agg_node, vsi_node))
+ return 0;
+
+ aggl = ice_sched_get_agg_layer(pi->hw);
+ vsil = ice_sched_get_vsi_layer(pi->hw);
+
+ /* set intermediate node count to 1 between aggregator and VSI layers */
+ for (i = aggl + 1; i < vsil; i++)
+ num_nodes[i] = 1;
+
+ /* Check if the aggregator subtree has any free node to add the VSI */
+ for (i = 0; i < agg_node->num_children; i++) {
+ parent = ice_sched_get_free_vsi_parent(pi->hw,
+ agg_node->children[i],
+ num_nodes);
+ if (parent)
+ goto move_nodes;
+ }
+
+ /* add new nodes */
+ parent = agg_node;
+ for (i = aggl + 1; i < vsil; i++) {
+ status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+ num_nodes[i],
+ &first_node_teid,
+ &num_nodes_added);
+ if (status || num_nodes[i] != num_nodes_added)
+ return ICE_ERR_CFG;
+
+ /* The newly added node can be a new parent for the next
+ * layer nodes
+ */
+ if (num_nodes_added)
+ parent = ice_sched_find_node_by_teid(tc_node,
+ first_node_teid);
+ else
+ parent = parent->children[0];
+
+ if (!parent)
+ return ICE_ERR_CFG;
+ }
+
+move_nodes:
+ vsi_teid = le32_to_cpu(vsi_node->info.node_teid);
+ return ice_sched_move_nodes(pi, parent, 1, &vsi_teid);
+}
+
+/**
+ * ice_move_all_vsi_to_dflt_agg - move all VSI(s) to default aggregator
+ * @pi: port information structure
+ * @agg_info: aggregator info
+ * @tc: traffic class number
+ * @rm_vsi_info: true or false
+ *
+ * This function move all the VSI(s) to the default aggregator and delete
+ * aggregator VSI info based on passed in boolean parameter rm_vsi_info. The
+ * caller holds the scheduler lock.
+ */
+static enum ice_status
+ice_move_all_vsi_to_dflt_agg(struct ice_port_info *pi,
+ struct ice_sched_agg_info *agg_info, u8 tc,
+ bool rm_vsi_info)
+{
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_sched_agg_vsi_info *tmp;
+ enum ice_status status = 0;
+
+ list_for_each_entry_safe(agg_vsi_info, tmp, &agg_info->agg_vsi_list,
+ list_entry) {
+ u16 vsi_handle = agg_vsi_info->vsi_handle;
+
+ /* Move VSI to default aggregator */
+ if (!ice_is_tc_ena(agg_vsi_info->tc_bitmap[0], tc))
+ continue;
+
+ status = ice_sched_move_vsi_to_agg(pi, vsi_handle,
+ ICE_DFLT_AGG_ID, tc);
+ if (status)
+ break;
+
+ clear_bit(tc, agg_vsi_info->tc_bitmap);
+ if (rm_vsi_info && !agg_vsi_info->tc_bitmap[0]) {
+ list_del(&agg_vsi_info->list_entry);
+ devm_kfree(ice_hw_to_dev(pi->hw), agg_vsi_info);
+ }
+ }
+
+ return status;
+}
+
+/**
+ * ice_sched_is_agg_inuse - check whether the aggregator is in use or not
+ * @pi: port information structure
+ * @node: node pointer
+ *
+ * This function checks whether the aggregator is attached with any VSI or not.
+ */
+static bool
+ice_sched_is_agg_inuse(struct ice_port_info *pi, struct ice_sched_node *node)
+{
+ u8 vsil, i;
+
+ vsil = ice_sched_get_vsi_layer(pi->hw);
+ if (node->tx_sched_layer < vsil - 1) {
+ for (i = 0; i < node->num_children; i++)
+ if (ice_sched_is_agg_inuse(pi, node->children[i]))
+ return true;
+ return false;
+ } else {
+ return node->num_children ? true : false;
+ }
+}
+
+/**
+ * ice_sched_rm_agg_cfg - remove the aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @tc: TC number
+ *
+ * This function removes the aggregator node and intermediate nodes if any
+ * from the given TC
+ */
+static enum ice_status
+ice_sched_rm_agg_cfg(struct ice_port_info *pi, u32 agg_id, u8 tc)
+{
+ struct ice_sched_node *tc_node, *agg_node;
+ struct ice_hw *hw = pi->hw;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (!tc_node)
+ return ICE_ERR_CFG;
+
+ agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id);
+ if (!agg_node)
+ return ICE_ERR_DOES_NOT_EXIST;
+
+ /* Can't remove the aggregator node if it has children */
+ if (ice_sched_is_agg_inuse(pi, agg_node))
+ return ICE_ERR_IN_USE;
+
+ /* need to remove the whole subtree if aggregator node is the
+ * only child.
+ */
+ while (agg_node->tx_sched_layer > hw->sw_entry_point_layer) {
+ struct ice_sched_node *parent = agg_node->parent;
+
+ if (!parent)
+ return ICE_ERR_CFG;
+
+ if (parent->num_children > 1)
+ break;
+
+ agg_node = parent;
+ }
+
+ ice_free_sched_node(pi, agg_node);
+ return 0;
+}
+
+/**
+ * ice_rm_agg_cfg_tc - remove aggregator configuration for TC
+ * @pi: port information structure
+ * @agg_info: aggregator ID
+ * @tc: TC number
+ * @rm_vsi_info: bool value true or false
+ *
+ * This function removes aggregator reference to VSI of given TC. It removes
+ * the aggregator configuration completely for requested TC. The caller needs
+ * to hold the scheduler lock.
+ */
+static enum ice_status
+ice_rm_agg_cfg_tc(struct ice_port_info *pi, struct ice_sched_agg_info *agg_info,
+ u8 tc, bool rm_vsi_info)
+{
+ enum ice_status status = 0;
+
+ /* If nothing to remove - return success */
+ if (!ice_is_tc_ena(agg_info->tc_bitmap[0], tc))
+ goto exit_rm_agg_cfg_tc;
+
+ status = ice_move_all_vsi_to_dflt_agg(pi, agg_info, tc, rm_vsi_info);
+ if (status)
+ goto exit_rm_agg_cfg_tc;
+
+ /* Delete aggregator node(s) */
+ status = ice_sched_rm_agg_cfg(pi, agg_info->agg_id, tc);
+ if (status)
+ goto exit_rm_agg_cfg_tc;
+
+ clear_bit(tc, agg_info->tc_bitmap);
+exit_rm_agg_cfg_tc:
+ return status;
+}
+
+/**
+ * ice_save_agg_tc_bitmap - save aggregator TC bitmap
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @tc_bitmap: 8 bits TC bitmap
+ *
+ * Save aggregator TC bitmap. This function needs to be called with scheduler
+ * lock held.
+ */
+static enum ice_status
+ice_save_agg_tc_bitmap(struct ice_port_info *pi, u32 agg_id,
+ unsigned long *tc_bitmap)
+{
+ struct ice_sched_agg_info *agg_info;
+
+ agg_info = ice_get_agg_info(pi->hw, agg_id);
+ if (!agg_info)
+ return ICE_ERR_PARAM;
+ bitmap_copy(agg_info->replay_tc_bitmap, tc_bitmap,
+ ICE_MAX_TRAFFIC_CLASS);
+ return 0;
+}
+
+/**
+ * ice_sched_add_agg_cfg - create an aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @tc: TC number
+ *
+ * This function creates an aggregator node and intermediate nodes if required
+ * for the given TC
+ */
+static enum ice_status
+ice_sched_add_agg_cfg(struct ice_port_info *pi, u32 agg_id, u8 tc)
+{
+ struct ice_sched_node *parent, *agg_node, *tc_node;
+ u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+ enum ice_status status = 0;
+ struct ice_hw *hw = pi->hw;
+ u32 first_node_teid;
+ u16 num_nodes_added;
+ u8 i, aggl;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (!tc_node)
+ return ICE_ERR_CFG;
+
+ agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id);
+ /* Does Agg node already exist ? */
+ if (agg_node)
+ return status;
+
+ aggl = ice_sched_get_agg_layer(hw);
+
+ /* need one node in Agg layer */
+ num_nodes[aggl] = 1;
+
+ /* Check whether the intermediate nodes have space to add the
+ * new aggregator. If they are full, then SW needs to allocate a new
+ * intermediate node on those layers
+ */
+ for (i = hw->sw_entry_point_layer; i < aggl; i++) {
+ parent = ice_sched_get_first_node(pi, tc_node, i);
+
+ /* scan all the siblings */
+ while (parent) {
+ if (parent->num_children < hw->max_children[i])
+ break;
+ parent = parent->sibling;
+ }
+
+ /* all the nodes are full, reserve one for this layer */
+ if (!parent)
+ num_nodes[i]++;
+ }
+
+ /* add the aggregator node */
+ parent = tc_node;
+ for (i = hw->sw_entry_point_layer; i <= aggl; i++) {
+ if (!parent)
+ return ICE_ERR_CFG;
+
+ status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+ num_nodes[i],
+ &first_node_teid,
+ &num_nodes_added);
+ if (status || num_nodes[i] != num_nodes_added)
+ return ICE_ERR_CFG;
+
+ /* The newly added node can be a new parent for the next
+ * layer nodes
+ */
+ if (num_nodes_added) {
+ parent = ice_sched_find_node_by_teid(tc_node,
+ first_node_teid);
+ /* register aggregator ID with the aggregator node */
+ if (parent && i == aggl)
+ parent->agg_id = agg_id;
+ } else {
+ parent = parent->children[0];
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_sched_cfg_agg - configure aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @agg_type: aggregator type queue, VSI, or aggregator group
+ * @tc_bitmap: bits TC bitmap
+ *
+ * It registers a unique aggregator node into scheduler services. It
+ * allows a user to register with a unique ID to track it's resources.
+ * The aggregator type determines if this is a queue group, VSI group
+ * or aggregator group. It then creates the aggregator node(s) for requested
+ * TC(s) or removes an existing aggregator node including its configuration
+ * if indicated via tc_bitmap. Call ice_rm_agg_cfg to release aggregator
+ * resources and remove aggregator ID.
+ * This function needs to be called with scheduler lock held.
+ */
+static enum ice_status
+ice_sched_cfg_agg(struct ice_port_info *pi, u32 agg_id,
+ enum ice_agg_type agg_type, unsigned long *tc_bitmap)
+{
+ struct ice_sched_agg_info *agg_info;
+ enum ice_status status = 0;
+ struct ice_hw *hw = pi->hw;
+ u8 tc;
+
+ agg_info = ice_get_agg_info(hw, agg_id);
+ if (!agg_info) {
+ /* Create new entry for new aggregator ID */
+ agg_info = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*agg_info),
+ GFP_KERNEL);
+ if (!agg_info)
+ return ICE_ERR_NO_MEMORY;
+
+ agg_info->agg_id = agg_id;
+ agg_info->agg_type = agg_type;
+ agg_info->tc_bitmap[0] = 0;
+
+ /* Initialize the aggregator VSI list head */
+ INIT_LIST_HEAD(&agg_info->agg_vsi_list);
+
+ /* Add new entry in aggregator list */
+ list_add(&agg_info->list_entry, &hw->agg_list);
+ }
+ /* Create aggregator node(s) for requested TC(s) */
+ ice_for_each_traffic_class(tc) {
+ if (!ice_is_tc_ena(*tc_bitmap, tc)) {
+ /* Delete aggregator cfg TC if it exists previously */
+ status = ice_rm_agg_cfg_tc(pi, agg_info, tc, false);
+ if (status)
+ break;
+ continue;
+ }
+
+ /* Check if aggregator node for TC already exists */
+ if (ice_is_tc_ena(agg_info->tc_bitmap[0], tc))
+ continue;
+
+ /* Create new aggregator node for TC */
+ status = ice_sched_add_agg_cfg(pi, agg_id, tc);
+ if (status)
+ break;
+
+ /* Save aggregator node's TC information */
+ set_bit(tc, agg_info->tc_bitmap);
+ }
+
+ return status;
+}
+
+/**
+ * ice_cfg_agg - config aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @agg_type: aggregator type queue, VSI, or aggregator group
+ * @tc_bitmap: bits TC bitmap
+ *
+ * This function configures aggregator node(s).
+ */
+enum ice_status
+ice_cfg_agg(struct ice_port_info *pi, u32 agg_id, enum ice_agg_type agg_type,
+ u8 tc_bitmap)
+{
+ unsigned long bitmap = tc_bitmap;
+ enum ice_status status;
+
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_cfg_agg(pi, agg_id, agg_type,
+ (unsigned long *)&bitmap);
+ if (!status)
+ status = ice_save_agg_tc_bitmap(pi, agg_id,
+ (unsigned long *)&bitmap);
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
+ * ice_get_agg_vsi_info - get the aggregator ID
+ * @agg_info: aggregator info
+ * @vsi_handle: software VSI handle
+ *
+ * The function returns aggregator VSI info based on VSI handle. This function
+ * needs to be called with scheduler lock held.
+ */
+static struct ice_sched_agg_vsi_info *
+ice_get_agg_vsi_info(struct ice_sched_agg_info *agg_info, u16 vsi_handle)
+{
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+
+ list_for_each_entry(agg_vsi_info, &agg_info->agg_vsi_list, list_entry)
+ if (agg_vsi_info->vsi_handle == vsi_handle)
+ return agg_vsi_info;
+
+ return NULL;
+}
+
+/**
+ * ice_get_vsi_agg_info - get the aggregator info of VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: Sw VSI handle
+ *
+ * The function returns aggregator info of VSI represented via vsi_handle. The
+ * VSI has in this case a different aggregator than the default one. This
+ * function needs to be called with scheduler lock held.
+ */
+static struct ice_sched_agg_info *
+ice_get_vsi_agg_info(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_sched_agg_info *agg_info;
+
+ list_for_each_entry(agg_info, &hw->agg_list, list_entry) {
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+
+ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+ if (agg_vsi_info)
+ return agg_info;
+ }
+ return NULL;
+}
+
+/**
+ * ice_save_agg_vsi_tc_bitmap - save aggregator VSI TC bitmap
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap of enabled TC(s)
+ *
+ * Save VSI to aggregator TC bitmap. This function needs to call with scheduler
+ * lock held.
+ */
+static enum ice_status
+ice_save_agg_vsi_tc_bitmap(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle,
+ unsigned long *tc_bitmap)
+{
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_sched_agg_info *agg_info;
+
+ agg_info = ice_get_agg_info(pi->hw, agg_id);
+ if (!agg_info)
+ return ICE_ERR_PARAM;
+ /* check if entry already exist */
+ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+ if (!agg_vsi_info)
+ return ICE_ERR_PARAM;
+ bitmap_copy(agg_vsi_info->replay_tc_bitmap, tc_bitmap,
+ ICE_MAX_TRAFFIC_CLASS);
+ return 0;
+}
+
+/**
+ * ice_sched_assoc_vsi_to_agg - associate/move VSI to new/default aggregator
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap of enabled TC(s)
+ *
+ * This function moves VSI to a new or default aggregator node. If VSI is
+ * already associated to the aggregator node then no operation is performed on
+ * the tree. This function needs to be called with scheduler lock held.
+ */
+static enum ice_status
+ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id,
+ u16 vsi_handle, unsigned long *tc_bitmap)
+{
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_sched_agg_info *agg_info;
+ enum ice_status status = 0;
+ struct ice_hw *hw = pi->hw;
+ u8 tc;
+
+ if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+ return ICE_ERR_PARAM;
+ agg_info = ice_get_agg_info(hw, agg_id);
+ if (!agg_info)
+ return ICE_ERR_PARAM;
+ /* check if entry already exist */
+ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+ if (!agg_vsi_info) {
+ /* Create new entry for VSI under aggregator list */
+ agg_vsi_info = devm_kzalloc(ice_hw_to_dev(hw),
+ sizeof(*agg_vsi_info), GFP_KERNEL);
+ if (!agg_vsi_info)
+ return ICE_ERR_PARAM;
+
+ /* add VSI ID into the aggregator list */
+ agg_vsi_info->vsi_handle = vsi_handle;
+ list_add(&agg_vsi_info->list_entry, &agg_info->agg_vsi_list);
+ }
+ /* Move VSI node to new aggregator node for requested TC(s) */
+ ice_for_each_traffic_class(tc) {
+ if (!ice_is_tc_ena(*tc_bitmap, tc))
+ continue;
+
+ /* Move VSI to new aggregator */
+ status = ice_sched_move_vsi_to_agg(pi, vsi_handle, agg_id, tc);
+ if (status)
+ break;
+
+ set_bit(tc, agg_vsi_info->tc_bitmap);
+ }
+ return status;
+}
+
+/**
* ice_sched_rm_unused_rl_prof - remove unused RL profile
* @pi: port information structure
*
@@ -1955,7 +2782,6 @@ ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
{
struct ice_aqc_txsched_elem_data buf;
struct ice_aqc_txsched_elem *data;
- enum ice_status status;
buf = node->info;
data = &buf.data;
@@ -1970,7 +2796,32 @@ ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
}
/* Configure element */
- status = ice_sched_update_elem(hw, node, &buf);
+ return ice_sched_update_elem(hw, node, &buf);
+}
+
+/**
+ * ice_move_vsi_to_agg - moves VSI to new or default aggregator
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap of enabled TC(s)
+ *
+ * Move or associate VSI to a new or default aggregator node.
+ */
+enum ice_status
+ice_move_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle,
+ u8 tc_bitmap)
+{
+ unsigned long bitmap = tc_bitmap;
+ enum ice_status status;
+
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_assoc_vsi_to_agg(pi, agg_id, vsi_handle,
+ (unsigned long *)&bitmap);
+ if (!status)
+ status = ice_save_agg_vsi_tc_bitmap(pi, agg_id, vsi_handle,
+ (unsigned long *)&bitmap);
+ mutex_unlock(&pi->sched_lock);
return status;
}
@@ -2045,11 +2896,12 @@ static void ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
/**
* ice_sched_calc_wakeup - calculate RL profile wakeup parameter
+ * @hw: pointer to the HW struct
* @bw: bandwidth in Kbps
*
* This function calculates the wakeup parameter of RL profile.
*/
-static u16 ice_sched_calc_wakeup(s32 bw)
+static u16 ice_sched_calc_wakeup(struct ice_hw *hw, s32 bw)
{
s64 bytes_per_sec, wakeup_int, wakeup_a, wakeup_b, wakeup_f;
s32 wakeup_f_int;
@@ -2057,7 +2909,7 @@ static u16 ice_sched_calc_wakeup(s32 bw)
/* Get the wakeup integer value */
bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
- wakeup_int = div64_long(ICE_RL_PROF_FREQUENCY, bytes_per_sec);
+ wakeup_int = div64_long(hw->psm_clk_freq, bytes_per_sec);
if (wakeup_int > 63) {
wakeup = (u16)((1 << 15) | wakeup_int);
} else {
@@ -2066,8 +2918,7 @@ static u16 ice_sched_calc_wakeup(s32 bw)
*/
wakeup_b = (s64)ICE_RL_PROF_MULTIPLIER * wakeup_int;
wakeup_a = div64_long((s64)ICE_RL_PROF_MULTIPLIER *
- ICE_RL_PROF_FREQUENCY,
- bytes_per_sec);
+ hw->psm_clk_freq, bytes_per_sec);
/* Get Fraction value */
wakeup_f = wakeup_a - wakeup_b;
@@ -2087,13 +2938,15 @@ static u16 ice_sched_calc_wakeup(s32 bw)
/**
* ice_sched_bw_to_rl_profile - convert BW to profile parameters
+ * @hw: pointer to the HW struct
* @bw: bandwidth in Kbps
* @profile: profile parameters to return
*
* This function converts the BW to profile structure format.
*/
static enum ice_status
-ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile)
+ice_sched_bw_to_rl_profile(struct ice_hw *hw, u32 bw,
+ struct ice_aqc_rl_profile_elem *profile)
{
enum ice_status status = ICE_ERR_PARAM;
s64 bytes_per_sec, ts_rate, mv_tmp;
@@ -2113,7 +2966,7 @@ ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile)
for (i = 0; i < 64; i++) {
u64 pow_result = BIT_ULL(i);
- ts_rate = div64_long((s64)ICE_RL_PROF_FREQUENCY,
+ ts_rate = div64_long((s64)hw->psm_clk_freq,
pow_result * ICE_RL_PROF_TS_MULTIPLIER);
if (ts_rate <= 0)
continue;
@@ -2137,7 +2990,7 @@ ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile)
if (found) {
u16 wm;
- wm = ice_sched_calc_wakeup(bw);
+ wm = ice_sched_calc_wakeup(hw, bw);
profile->rl_multiply = cpu_to_le16(mv);
profile->wake_up_calc = cpu_to_le16(wm);
profile->rl_encode = cpu_to_le16(encode);
@@ -2206,7 +3059,7 @@ ice_sched_add_rl_profile(struct ice_port_info *pi,
if (!rl_prof_elem)
return NULL;
- status = ice_sched_bw_to_rl_profile(bw, &rl_prof_elem->profile);
+ status = ice_sched_bw_to_rl_profile(hw, bw, &rl_prof_elem->profile);
if (status)
goto exit_add_rl_prof;
@@ -2941,6 +3794,156 @@ ice_sched_replay_node_bw(struct ice_hw *hw, struct ice_sched_node *node,
}
/**
+ * ice_sched_get_ena_tc_bitmap - get enabled TC bitmap
+ * @pi: port info struct
+ * @tc_bitmap: 8 bits TC bitmap to check
+ * @ena_tc_bitmap: 8 bits enabled TC bitmap to return
+ *
+ * This function returns enabled TC bitmap in variable ena_tc_bitmap. Some TCs
+ * may be missing, it returns enabled TCs. This function needs to be called with
+ * scheduler lock held.
+ */
+static void
+ice_sched_get_ena_tc_bitmap(struct ice_port_info *pi,
+ unsigned long *tc_bitmap,
+ unsigned long *ena_tc_bitmap)
+{
+ u8 tc;
+
+ /* Some TC(s) may be missing after reset, adjust for replay */
+ ice_for_each_traffic_class(tc)
+ if (ice_is_tc_ena(*tc_bitmap, tc) &&
+ (ice_sched_get_tc_node(pi, tc)))
+ set_bit(tc, ena_tc_bitmap);
+}
+
+/**
+ * ice_sched_replay_agg - recreate aggregator node(s)
+ * @hw: pointer to the HW struct
+ *
+ * This function recreate aggregator type nodes which are not replayed earlier.
+ * It also replay aggregator BW information. These aggregator nodes are not
+ * associated with VSI type node yet.
+ */
+void ice_sched_replay_agg(struct ice_hw *hw)
+{
+ struct ice_port_info *pi = hw->port_info;
+ struct ice_sched_agg_info *agg_info;
+
+ mutex_lock(&pi->sched_lock);
+ list_for_each_entry(agg_info, &hw->agg_list, list_entry)
+ /* replay aggregator (re-create aggregator node) */
+ if (!bitmap_equal(agg_info->tc_bitmap, agg_info->replay_tc_bitmap,
+ ICE_MAX_TRAFFIC_CLASS)) {
+ DECLARE_BITMAP(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ enum ice_status status;
+
+ bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ ice_sched_get_ena_tc_bitmap(pi,
+ agg_info->replay_tc_bitmap,
+ replay_bitmap);
+ status = ice_sched_cfg_agg(hw->port_info,
+ agg_info->agg_id,
+ ICE_AGG_TYPE_AGG,
+ replay_bitmap);
+ if (status) {
+ dev_info(ice_hw_to_dev(hw),
+ "Replay agg id[%d] failed\n",
+ agg_info->agg_id);
+ /* Move on to next one */
+ continue;
+ }
+ }
+ mutex_unlock(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_replay_agg_vsi_preinit - Agg/VSI replay pre initialization
+ * @hw: pointer to the HW struct
+ *
+ * This function initialize aggregator(s) TC bitmap to zero. A required
+ * preinit step for replaying aggregators.
+ */
+void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw)
+{
+ struct ice_port_info *pi = hw->port_info;
+ struct ice_sched_agg_info *agg_info;
+
+ mutex_lock(&pi->sched_lock);
+ list_for_each_entry(agg_info, &hw->agg_list, list_entry) {
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+
+ agg_info->tc_bitmap[0] = 0;
+ list_for_each_entry(agg_vsi_info, &agg_info->agg_vsi_list,
+ list_entry)
+ agg_vsi_info->tc_bitmap[0] = 0;
+ }
+ mutex_unlock(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_replay_vsi_agg - replay aggregator & VSI to aggregator node(s)
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ *
+ * This function replays aggregator node, VSI to aggregator type nodes, and
+ * their node bandwidth information. This function needs to be called with
+ * scheduler lock held.
+ */
+static enum ice_status
+ice_sched_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle)
+{
+ DECLARE_BITMAP(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_port_info *pi = hw->port_info;
+ struct ice_sched_agg_info *agg_info;
+ enum ice_status status;
+
+ bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return ICE_ERR_PARAM;
+ agg_info = ice_get_vsi_agg_info(hw, vsi_handle);
+ if (!agg_info)
+ return 0; /* Not present in list - default Agg case */
+ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+ if (!agg_vsi_info)
+ return 0; /* Not present in list - default Agg case */
+ ice_sched_get_ena_tc_bitmap(pi, agg_info->replay_tc_bitmap,
+ replay_bitmap);
+ /* Replay aggregator node associated to vsi_handle */
+ status = ice_sched_cfg_agg(hw->port_info, agg_info->agg_id,
+ ICE_AGG_TYPE_AGG, replay_bitmap);
+ if (status)
+ return status;
+
+ bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ ice_sched_get_ena_tc_bitmap(pi, agg_vsi_info->replay_tc_bitmap,
+ replay_bitmap);
+ /* Move this VSI (vsi_handle) to above aggregator */
+ return ice_sched_assoc_vsi_to_agg(pi, agg_info->agg_id, vsi_handle,
+ replay_bitmap);
+}
+
+/**
+ * ice_replay_vsi_agg - replay VSI to aggregator node
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ *
+ * This function replays association of VSI to aggregator type nodes, and
+ * node bandwidth information.
+ */
+enum ice_status ice_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_port_info *pi = hw->port_info;
+ enum ice_status status;
+
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_replay_vsi_agg(hw, vsi_handle);
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
* ice_sched_replay_q_bw - replay queue type node BW
* @pi: port information structure
* @q_ctx: queue context structure
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
index 0e55ae0d446f..9beef8f0ec76 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.h
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -8,6 +8,7 @@
#define ICE_QGRP_LAYER_OFFSET 2
#define ICE_VSI_LAYER_OFFSET 4
+#define ICE_AGG_LAYER_OFFSET 6
#define ICE_SCHED_INVAL_LAYER_NUM 0xFF
/* Burst size is a 12 bits register that is configured while creating the RL
* profile(s). MSB is a granularity bit and tells the granularity type
@@ -23,12 +24,16 @@
((BIT(11) - 1) * 64) /* In Bytes */
#define ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY ICE_MAX_BURST_SIZE_ALLOWED
-#define ICE_RL_PROF_FREQUENCY 446000000
#define ICE_RL_PROF_ACCURACY_BYTES 128
#define ICE_RL_PROF_MULTIPLIER 10000
#define ICE_RL_PROF_TS_MULTIPLIER 32
#define ICE_RL_PROF_FRACTION 512
+#define ICE_PSM_CLK_367MHZ_IN_HZ 367647059
+#define ICE_PSM_CLK_416MHZ_IN_HZ 416666667
+#define ICE_PSM_CLK_446MHZ_IN_HZ 446428571
+#define ICE_PSM_CLK_390MHZ_IN_HZ 390625000
+
/* BW rate limit profile parameters list entry along
* with bandwidth maintained per layer in port info
*/
@@ -43,6 +48,8 @@ struct ice_sched_agg_vsi_info {
struct list_head list_entry;
DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
u16 vsi_handle;
+ /* save aggregator VSI TC bitmap */
+ DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
};
struct ice_sched_agg_info {
@@ -51,6 +58,8 @@ struct ice_sched_agg_info {
DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
u32 agg_id;
enum ice_agg_type agg_type;
+ /* save aggregator TC bitmap */
+ DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
};
/* FW AQ command calls */
@@ -60,6 +69,8 @@ ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
u16 *elems_ret, struct ice_sq_cd *cd);
enum ice_status ice_sched_init_port(struct ice_port_info *pi);
enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw);
+void ice_sched_get_psm_clk_freq(struct ice_hw *hw);
+
void ice_sched_clear_port(struct ice_port_info *pi);
void ice_sched_cleanup_all(struct ice_hw *hw);
void ice_sched_clear_agg(struct ice_hw *hw);
@@ -78,6 +89,14 @@ enum ice_status
ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
u8 owner, bool enable);
enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle);
+
+/* Tx scheduler rate limiter functions */
+enum ice_status
+ice_cfg_agg(struct ice_port_info *pi, u32 agg_id,
+ enum ice_agg_type agg_type, u8 tc_bitmap);
+enum ice_status
+ice_move_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle,
+ u8 tc_bitmap);
enum ice_status
ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
u16 q_handle, enum ice_rl_type rl_type, u32 bw);
@@ -85,6 +104,9 @@ enum ice_status
ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
u16 q_handle, enum ice_rl_type rl_type);
enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes);
+void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw);
+void ice_sched_replay_agg(struct ice_hw *hw);
+enum ice_status ice_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle);
enum ice_status
ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx);
#endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index c33612132ddf..67c965a3f5d2 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -603,7 +603,7 @@ enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw)
}
} while (req_desc && !status);
- devm_kfree(ice_hw_to_dev(hw), (void *)rbuf);
+ devm_kfree(ice_hw_to_dev(hw), rbuf);
return status;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 8ca63c6a6ba4..580419813bb2 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1497,22 +1497,11 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
struct ice_vsi *vsi = q_vector->vsi;
u32 itr_val;
- /* when exiting WB_ON_ITR lets set a low ITR value and trigger
- * interrupts to expire right away in case we have more work ready to go
- * already
+ /* when exiting WB_ON_ITR just reset the countdown and let ITR
+ * resume it's normal "interrupts-enabled" path
*/
- if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) {
- itr_val = ice_buildreg_itr(rx->itr_idx, ICE_WB_ON_ITR_USECS);
- wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
- /* set target back to last user set value */
- rx->target_itr = rx->itr_setting;
- /* set current to what we just wrote and dynamic if needed */
- rx->current_itr = ICE_WB_ON_ITR_USECS |
- (rx->itr_setting & ICE_ITR_DYNAMIC);
- /* allow normal interrupt flow to start */
+ if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE)
q_vector->itr_countdown = 0;
- return;
- }
/* This will do nothing if dynamic updates are not enabled */
ice_update_itr(q_vector, tx);
@@ -1552,10 +1541,8 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
q_vector->itr_countdown--;
}
- if (!test_bit(__ICE_DOWN, q_vector->vsi->state))
- wr32(&q_vector->vsi->back->hw,
- GLINT_DYN_CTL(q_vector->reg_idx),
- itr_val);
+ if (!test_bit(__ICE_DOWN, vsi->state))
+ wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
}
/**
@@ -1565,30 +1552,29 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
* We need to tell hardware to write-back completed descriptors even when
* interrupts are disabled. Descriptors will be written back on cache line
* boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR
- * descriptors may not be written back if they don't fill a cache line until the
- * next interrupt.
+ * descriptors may not be written back if they don't fill a cache line until
+ * the next interrupt.
*
- * This sets the write-back frequency to 2 microseconds as that is the minimum
- * value that's not 0 due to ITR granularity. Also, set the INTENA_MSK bit to
- * make sure hardware knows we aren't meddling with the INTENA_M bit.
+ * This sets the write-back frequency to whatever was set previously for the
+ * ITR indices. Also, set the INTENA_MSK bit to make sure hardware knows we
+ * aren't meddling with the INTENA_M bit.
*/
static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
{
struct ice_vsi *vsi = q_vector->vsi;
- /* already in WB_ON_ITR mode no need to change it */
+ /* already in wb_on_itr mode no need to change it */
if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE)
return;
- if (q_vector->num_ring_rx)
- wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
- ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS,
- ICE_RX_ITR));
-
- if (q_vector->num_ring_tx)
- wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
- ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS,
- ICE_TX_ITR));
+ /* use previously set ITR values for all of the ITR indices by
+ * specifying ICE_ITR_NONE, which will vary in adaptive (AIM) mode and
+ * be static in non-adaptive mode (user configured)
+ */
+ wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
+ ((ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) &
+ GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M |
+ GLINT_DYN_CTL_WB_ON_ITR_M);
q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE;
}
@@ -1655,8 +1641,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
}
/* If work not completed, return budget and polling will return */
- if (!clean_complete)
+ if (!clean_complete) {
+ /* Set the writeback on ITR so partial completions of
+ * cache-lines will still continue even if we're polling.
+ */
+ ice_set_wb_on_itr(q_vector);
return budget;
+ }
/* Exit the polling mode, but don't re-enable interrupts if stack might
* poll us due to busy-polling
@@ -2413,7 +2404,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
/* allow CONTROL frames egress from main VSI if FW LLDP disabled */
if (unlikely(skb->priority == TC_PRIO_CONTROL &&
vsi->type == ICE_VSI_PF &&
- vsi->port_info->is_sw_lldp))
+ vsi->port_info->qos_cfg.is_sw_lldp))
offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
ICE_TX_CTX_DESC_SWTCH_UPLINK <<
ICE_TXD_CTX_QW1_CMD_S);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index ff1a1cbd078e..db56a0c8bfe1 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -240,7 +240,6 @@ enum ice_rx_dtype {
#define ICE_DFLT_INTRL 0
#define ICE_MAX_INTRL 236
-#define ICE_WB_ON_ITR_USECS 2
#define ICE_IN_WB_ON_ITR_MODE 255
/* Sets WB_ON_ITR and assumes INTENA bit is already cleared, which allows
* setting the MSK_M bit to tell hardware to ignore the INTENA_M bit. Also,
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index a98800a91045..a6cb0c35748c 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -403,7 +403,11 @@ struct ice_link_default_override_tlv {
#define ice_for_each_traffic_class(_i) \
for ((_i) = 0; (_i) < ICE_MAX_TRAFFIC_CLASS; (_i)++)
+/* ICE_DFLT_AGG_ID means that all new VM(s)/VSI node connects
+ * to driver defined policy for default aggregator
+ */
#define ICE_INVAL_TEID 0xFFFFFFFF
+#define ICE_DFLT_AGG_ID 0
struct ice_sched_node {
struct ice_sched_node *parent;
@@ -552,6 +556,14 @@ struct ice_dcbx_cfg {
#define ICE_DCBX_APPS_NON_WILLING 0x1
};
+struct ice_qos_cfg {
+ struct ice_dcbx_cfg local_dcbx_cfg; /* Oper/Local Cfg */
+ struct ice_dcbx_cfg desired_dcbx_cfg; /* CEE Desired Cfg */
+ struct ice_dcbx_cfg remote_dcbx_cfg; /* Peer Cfg */
+ u8 dcbx_status : 3; /* see ICE_DCBX_STATUS_DIS */
+ u8 is_sw_lldp : 1;
+};
+
struct ice_port_info {
struct ice_sched_node *root; /* Root Node per Port */
struct ice_hw *hw; /* back pointer to HW instance */
@@ -575,13 +587,7 @@ struct ice_port_info {
sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM];
/* List contain profile ID(s) and other params per layer */
struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM];
- struct ice_dcbx_cfg local_dcbx_cfg; /* Oper/Local Cfg */
- /* DCBX info */
- struct ice_dcbx_cfg remote_dcbx_cfg; /* Peer Cfg */
- struct ice_dcbx_cfg desired_dcbx_cfg; /* CEE Desired Cfg */
- /* LLDP/DCBX Status */
- u8 dcbx_status:3; /* see ICE_DCBX_STATUS_DIS */
- u8 is_sw_lldp:1;
+ struct ice_qos_cfg qos_cfg;
u8 is_vf:1;
};
@@ -614,6 +620,8 @@ struct ice_hw {
void *back;
struct ice_aqc_layer_props *layer_info;
struct ice_port_info *port_info;
+ /* PSM clock frequency for calculating RL profile params */
+ u32 psm_clk_freq;
u64 debug_mask; /* bitmap for debug mask */
enum ice_mac_type mac_type;
@@ -902,4 +910,9 @@ struct ice_hw_port_stats {
/* Hash redirection LUT for VSI - maximum array size */
#define ICE_VSIQF_HLUT_ARRAY_SIZE ((VSIQF_HLUT_MAX_INDEX + 1) * 4)
+/* AQ API version for LLDP_FILTER_CONTROL */
+#define ICE_FW_API_LLDP_FLTR_MAJ 1
+#define ICE_FW_API_LLDP_FLTR_MIN 7
+#define ICE_FW_API_LLDP_FLTR_PATCH 1
+
#endif /* _ICE_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index ec7f6c64132e..bf5fd812ea0e 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1057,11 +1057,45 @@ static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf)
}
/**
+ * ice_vf_rebuild_aggregator_node_cfg - rebuild aggregator node config
+ * @vsi: Pointer to VSI
+ *
+ * This function moves VSI into corresponding scheduler aggregator node
+ * based on cached value of "aggregator node info" per VSI
+ */
+static void ice_vf_rebuild_aggregator_node_cfg(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ enum ice_status status;
+ struct device *dev;
+
+ if (!vsi->agg_node)
+ return;
+
+ dev = ice_pf_to_dev(pf);
+ if (vsi->agg_node->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) {
+ dev_dbg(dev,
+ "agg_id %u already has reached max_num_vsis %u\n",
+ vsi->agg_node->agg_id, vsi->agg_node->num_vsis);
+ return;
+ }
+
+ status = ice_move_vsi_to_agg(pf->hw.port_info, vsi->agg_node->agg_id,
+ vsi->idx, vsi->tc_cfg.ena_tc);
+ if (status)
+ dev_dbg(dev, "unable to move VSI idx %u into aggregator %u node",
+ vsi->idx, vsi->agg_node->agg_id);
+ else
+ vsi->agg_node->num_vsis++;
+}
+
+/**
* ice_vf_rebuild_host_cfg - host admin configuration is persistent across reset
* @vf: VF to rebuild host configuration on
*/
static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
{
+ struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
struct device *dev = ice_pf_to_dev(vf->pf);
ice_vf_set_host_trust_cfg(vf);
@@ -1073,6 +1107,8 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
if (ice_vf_rebuild_host_vlan_cfg(vf))
dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n",
vf->vf_id);
+ /* rebuild aggregator node config for main VF VSI */
+ ice_vf_rebuild_aggregator_node_cfg(vsi);
}
/**
@@ -1677,6 +1713,8 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (!num_vfs) {
if (!pci_vfs_assigned(pdev)) {
ice_free_vfs(pf);
+ if (pf->lag)
+ ice_enable_lag(pf->lag);
return 0;
}
@@ -1688,6 +1726,8 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (err)
return err;
+ if (pf->lag)
+ ice_disable_lag(pf->lag);
return num_vfs;
}
@@ -2312,12 +2352,12 @@ bool ice_is_any_vf_in_promisc(struct ice_pf *pf)
static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ bool rm_promisc, alluni = false, allmulti = false;
struct virtchnl_promisc_info *info =
(struct virtchnl_promisc_info *)msg;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
struct device *dev;
- bool rm_promisc;
int ret = 0;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
@@ -2344,8 +2384,13 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
goto error_param;
}
- rm_promisc = !(info->flags & FLAG_VF_UNICAST_PROMISC) &&
- !(info->flags & FLAG_VF_MULTICAST_PROMISC);
+ if (info->flags & FLAG_VF_UNICAST_PROMISC)
+ alluni = true;
+
+ if (info->flags & FLAG_VF_MULTICAST_PROMISC)
+ allmulti = true;
+
+ rm_promisc = !allmulti && !alluni;
if (vsi->num_vlan || vf->port_vlan_info) {
struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
@@ -2399,12 +2444,12 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
enum ice_status status;
u8 promisc_m;
- if (info->flags & FLAG_VF_UNICAST_PROMISC) {
+ if (alluni) {
if (vf->port_vlan_info || vsi->num_vlan)
promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
else
promisc_m = ICE_UCAST_PROMISC_BITS;
- } else if (info->flags & FLAG_VF_MULTICAST_PROMISC) {
+ } else if (allmulti) {
if (vf->port_vlan_info || vsi->num_vlan)
promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
else
@@ -2432,15 +2477,16 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
}
}
- if (info->flags & FLAG_VF_MULTICAST_PROMISC)
- set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
- else
- clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+ if (allmulti &&
+ !test_and_set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
+ dev_info(dev, "VF %u successfully set multicast promiscuous mode\n", vf->vf_id);
+ else if (!allmulti && test_and_clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
+ dev_info(dev, "VF %u successfully unset multicast promiscuous mode\n", vf->vf_id);
- if (info->flags & FLAG_VF_UNICAST_PROMISC)
- set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
- else
- clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+ if (alluni && !test_and_set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states))
+ dev_info(dev, "VF %u successfully set unicast promiscuous mode\n", vf->vf_id);
+ else if (!alluni && test_and_clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states))
+ dev_info(dev, "VF %u successfully unset unicast promiscuous mode\n", vf->vf_id);
error_param:
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 1782146db644..875fa0cbef56 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -260,45 +260,6 @@ free_buf:
}
/**
- * ice_xsk_alloc_pools - allocate a buffer pool for an XDP socket
- * @vsi: VSI to allocate the buffer pool on
- *
- * Returns 0 on success, negative on error
- */
-static int ice_xsk_alloc_pools(struct ice_vsi *vsi)
-{
- if (vsi->xsk_pools)
- return 0;
-
- vsi->xsk_pools = kcalloc(vsi->num_xsk_pools, sizeof(*vsi->xsk_pools),
- GFP_KERNEL);
-
- if (!vsi->xsk_pools) {
- vsi->num_xsk_pools = 0;
- return -ENOMEM;
- }
-
- return 0;
-}
-
-/**
- * ice_xsk_remove_pool - Remove an buffer pool for a certain ring/qid
- * @vsi: VSI from which the VSI will be removed
- * @qid: Ring/qid associated with the buffer pool
- */
-static void ice_xsk_remove_pool(struct ice_vsi *vsi, u16 qid)
-{
- vsi->xsk_pools[qid] = NULL;
- vsi->num_xsk_pools_used--;
-
- if (vsi->num_xsk_pools_used == 0) {
- kfree(vsi->xsk_pools);
- vsi->xsk_pools = NULL;
- vsi->num_xsk_pools = 0;
- }
-}
-
-/**
* ice_xsk_pool_disable - disable a buffer pool region
* @vsi: Current VSI
* @qid: queue ID
@@ -307,12 +268,12 @@ static void ice_xsk_remove_pool(struct ice_vsi *vsi, u16 qid)
*/
static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
{
- if (!vsi->xsk_pools || qid >= vsi->num_xsk_pools ||
- !vsi->xsk_pools[qid])
+ struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid);
+
+ if (!pool)
return -EINVAL;
- xsk_pool_dma_unmap(vsi->xsk_pools[qid], ICE_RX_DMA_ATTR);
- ice_xsk_remove_pool(vsi, qid);
+ xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
return 0;
}
@@ -333,22 +294,11 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
if (vsi->type != ICE_VSI_PF)
return -EINVAL;
- if (!vsi->num_xsk_pools)
- vsi->num_xsk_pools = min_t(u16, vsi->num_rxq, vsi->num_txq);
- if (qid >= vsi->num_xsk_pools)
+ if (qid >= vsi->netdev->real_num_rx_queues ||
+ qid >= vsi->netdev->real_num_tx_queues)
return -EINVAL;
- err = ice_xsk_alloc_pools(vsi);
- if (err)
- return err;
-
- if (vsi->xsk_pools && vsi->xsk_pools[qid])
- return -EBUSY;
-
- vsi->xsk_pools[qid] = pool;
- vsi->num_xsk_pools_used++;
-
- err = xsk_pool_dma_map(vsi->xsk_pools[qid], ice_pf_to_dev(vsi->back),
+ err = xsk_pool_dma_map(pool, ice_pf_to_dev(vsi->back),
ICE_RX_DMA_ATTR);
if (err)
return err;
@@ -842,11 +792,8 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
{
int i;
- if (!vsi->xsk_pools)
- return false;
-
- for (i = 0; i < vsi->num_xsk_pools; i++) {
- if (vsi->xsk_pools[i])
+ ice_for_each_rxq(vsi, i) {
+ if (xsk_get_pool_from_qid(vsi->netdev, i))
return true;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index cf111e73f81e..9ce90841f92d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -4943,6 +4943,25 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
}
static void
+mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
+ struct fib_entry_notifier_info *fen_info)
+{
+ u32 *p_dst = (u32 *) &fen_info->dst;
+ struct fib_rt_info fri;
+
+ fri.fi = fen_info->fi;
+ fri.tb_id = fen_info->tb_id;
+ fri.dst = cpu_to_be32(*p_dst);
+ fri.dst_len = fen_info->dst_len;
+ fri.tos = fen_info->tos;
+ fri.type = fen_info->type;
+ fri.offload = false;
+ fri.trap = false;
+ fri.offload_failed = true;
+ fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
+}
+
+static void
mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
@@ -4963,6 +4982,7 @@ mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
fri.type = fib4_entry->type;
fri.offload = should_offload;
fri.trap = !should_offload;
+ fri.offload_failed = false;
fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}
@@ -4985,11 +5005,36 @@ mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
fri.type = fib4_entry->type;
fri.offload = false;
fri.trap = false;
+ fri.offload_failed = false;
fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}
#if IS_ENABLED(CONFIG_IPV6)
static void
+mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
+ struct fib6_info **rt_arr,
+ unsigned int nrt6)
+{
+ int i;
+
+ /* In IPv6 a multipath route is represented using multiple routes, so
+ * we need to set the flags on all of them.
+ */
+ for (i = 0; i < nrt6; i++)
+ fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i],
+ false, false, true);
+}
+#else
+static void
+mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
+ struct fib6_info **rt_arr,
+ unsigned int nrt6)
+{
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void
mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
@@ -5006,7 +5051,7 @@ mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
common);
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
- should_offload, !should_offload);
+ should_offload, !should_offload, false);
}
#else
static void
@@ -5028,7 +5073,7 @@ mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
common);
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
- false, false);
+ false, false, false);
}
#else
static void
@@ -7021,6 +7066,8 @@ static void mlxsw_sp_router_fib4_event_process(struct mlxsw_sp *mlxsw_sp,
if (err) {
mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
mlxsw_sp_router_fib_abort(mlxsw_sp);
+ mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp,
+ &fib_event->fen_info);
}
fib_info_put(fib_event->fen_info.fi);
break;
@@ -7042,6 +7089,7 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
struct mlxsw_sp_fib_event *fib_event)
{
+ struct mlxsw_sp_fib6_event *fib6_event = &fib_event->fib6_event;
int err;
mlxsw_sp_span_respin(mlxsw_sp);
@@ -7053,6 +7101,9 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp,
if (err) {
mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
mlxsw_sp_router_fib_abort(mlxsw_sp);
+ mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
+ fib6_event->rt_arr,
+ fib6_event->nrt6);
}
mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
break;
@@ -7062,6 +7113,9 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp,
if (err) {
mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
mlxsw_sp_router_fib_abort(mlxsw_sp);
+ mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
+ fib6_event->rt_arr,
+ fib6_event->nrt6);
}
mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
break;
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 816af1f55e2c..dbeb29fa16e8 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -1012,23 +1012,25 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
nsim_dev->fw_update_status = true;
nsim_dev->fw_update_overwrite_mask = 0;
- nsim_dev->fib_data = nsim_fib_create(devlink, extack);
- if (IS_ERR(nsim_dev->fib_data))
- return PTR_ERR(nsim_dev->fib_data);
-
nsim_devlink_param_load_driverinit_values(devlink);
err = nsim_dev_dummy_region_init(nsim_dev, devlink);
if (err)
- goto err_fib_destroy;
+ return err;
err = nsim_dev_traps_init(devlink);
if (err)
goto err_dummy_region_exit;
+ nsim_dev->fib_data = nsim_fib_create(devlink, extack);
+ if (IS_ERR(nsim_dev->fib_data)) {
+ err = PTR_ERR(nsim_dev->fib_data);
+ goto err_traps_exit;
+ }
+
err = nsim_dev_health_init(nsim_dev, devlink);
if (err)
- goto err_traps_exit;
+ goto err_fib_destroy;
err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
if (err)
@@ -1043,12 +1045,12 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
err_health_exit:
nsim_dev_health_exit(nsim_dev);
+err_fib_destroy:
+ nsim_fib_destroy(devlink, nsim_dev->fib_data);
err_traps_exit:
nsim_dev_traps_exit(devlink);
err_dummy_region_exit:
nsim_dev_dummy_region_exit(nsim_dev);
-err_fib_destroy:
- nsim_fib_destroy(devlink, nsim_dev->fib_data);
return err;
}
@@ -1080,15 +1082,9 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
if (err)
goto err_devlink_free;
- nsim_dev->fib_data = nsim_fib_create(devlink, NULL);
- if (IS_ERR(nsim_dev->fib_data)) {
- err = PTR_ERR(nsim_dev->fib_data);
- goto err_resources_unregister;
- }
-
err = devlink_register(devlink, &nsim_bus_dev->dev);
if (err)
- goto err_fib_destroy;
+ goto err_resources_unregister;
err = devlink_params_register(devlink, nsim_devlink_params,
ARRAY_SIZE(nsim_devlink_params));
@@ -1108,9 +1104,15 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
if (err)
goto err_traps_exit;
+ nsim_dev->fib_data = nsim_fib_create(devlink, NULL);
+ if (IS_ERR(nsim_dev->fib_data)) {
+ err = PTR_ERR(nsim_dev->fib_data);
+ goto err_debugfs_exit;
+ }
+
err = nsim_dev_health_init(nsim_dev, devlink);
if (err)
- goto err_debugfs_exit;
+ goto err_fib_destroy;
err = nsim_bpf_dev_init(nsim_dev);
if (err)
@@ -1128,6 +1130,8 @@ err_bpf_dev_exit:
nsim_bpf_dev_exit(nsim_dev);
err_health_exit:
nsim_dev_health_exit(nsim_dev);
+err_fib_destroy:
+ nsim_fib_destroy(devlink, nsim_dev->fib_data);
err_debugfs_exit:
nsim_dev_debugfs_exit(nsim_dev);
err_traps_exit:
@@ -1139,8 +1143,6 @@ err_params_unregister:
ARRAY_SIZE(nsim_devlink_params));
err_dl_unregister:
devlink_unregister(devlink);
-err_fib_destroy:
- nsim_fib_destroy(devlink, nsim_dev->fib_data);
err_resources_unregister:
devlink_resources_unregister(devlink, NULL);
err_devlink_free:
@@ -1157,10 +1159,10 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev)
debugfs_remove(nsim_dev->take_snapshot);
nsim_dev_port_del_all(nsim_dev);
nsim_dev_health_exit(nsim_dev);
+ nsim_fib_destroy(devlink, nsim_dev->fib_data);
nsim_dev_traps_exit(devlink);
nsim_dev_dummy_region_exit(nsim_dev);
mutex_destroy(&nsim_dev->port_list_lock);
- nsim_fib_destroy(devlink, nsim_dev->fib_data);
}
void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev)
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index 1779146926a5..46fb414f7ca6 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -26,6 +26,7 @@
#include <net/fib_rules.h>
#include <net/net_namespace.h>
#include <net/nexthop.h>
+#include <linux/debugfs.h>
#include "netdevsim.h"
@@ -53,6 +54,8 @@ struct nsim_fib_data {
struct work_struct fib_event_work;
struct list_head fib_event_queue;
spinlock_t fib_event_queue_lock; /* Protects fib event queue list */
+ struct dentry *ddir;
+ bool fail_route_offload;
};
struct nsim_fib_rt_key {
@@ -303,6 +306,25 @@ nsim_fib4_rt_lookup(struct rhashtable *fib_rt_ht,
return container_of(fib_rt, struct nsim_fib4_rt, common);
}
+static void
+nsim_fib4_rt_offload_failed_flag_set(struct net *net,
+ struct fib_entry_notifier_info *fen_info)
+{
+ u32 *p_dst = (u32 *)&fen_info->dst;
+ struct fib_rt_info fri;
+
+ fri.fi = fen_info->fi;
+ fri.tb_id = fen_info->tb_id;
+ fri.dst = cpu_to_be32(*p_dst);
+ fri.dst_len = fen_info->dst_len;
+ fri.tos = fen_info->tos;
+ fri.type = fen_info->type;
+ fri.offload = false;
+ fri.trap = false;
+ fri.offload_failed = true;
+ fib_alias_hw_flags_set(net, &fri);
+}
+
static void nsim_fib4_rt_hw_flags_set(struct net *net,
const struct nsim_fib4_rt *fib4_rt,
bool trap)
@@ -319,6 +341,7 @@ static void nsim_fib4_rt_hw_flags_set(struct net *net,
fri.type = fib4_rt->type;
fri.offload = false;
fri.trap = trap;
+ fri.offload_failed = false;
fib_alias_hw_flags_set(net, &fri);
}
@@ -383,6 +406,15 @@ static int nsim_fib4_rt_insert(struct nsim_fib_data *data,
struct nsim_fib4_rt *fib4_rt, *fib4_rt_old;
int err;
+ if (data->fail_route_offload) {
+ /* For testing purposes, user set debugfs fail_route_offload
+ * value to true. Simulate hardware programming latency and then
+ * fail.
+ */
+ msleep(1);
+ return -EINVAL;
+ }
+
fib4_rt = nsim_fib4_rt_create(data, fen_info);
if (!fib4_rt)
return -ENOMEM;
@@ -405,7 +437,7 @@ static void nsim_fib4_rt_remove(struct nsim_fib_data *data,
struct nsim_fib4_rt *fib4_rt;
fib4_rt = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info);
- if (WARN_ON_ONCE(!fib4_rt))
+ if (!fib4_rt)
return;
rhashtable_remove_fast(&data->fib_rt_ht, &fib4_rt->common.ht_node,
@@ -422,6 +454,11 @@ static int nsim_fib4_event(struct nsim_fib_data *data,
switch (event) {
case FIB_EVENT_ENTRY_REPLACE:
err = nsim_fib4_rt_insert(data, fen_info);
+ if (err) {
+ struct net *net = devlink_net(data->devlink);
+
+ nsim_fib4_rt_offload_failed_flag_set(net, fen_info);
+ }
break;
case FIB_EVENT_ENTRY_DEL:
nsim_fib4_rt_remove(data, fen_info);
@@ -481,7 +518,7 @@ static void nsim_fib6_rt_nh_del(struct nsim_fib6_rt *fib6_rt,
struct nsim_fib6_rt_nh *fib6_rt_nh;
fib6_rt_nh = nsim_fib6_rt_nh_find(fib6_rt, rt);
- if (WARN_ON_ONCE(!fib6_rt_nh))
+ if (!fib6_rt_nh)
return;
fib6_rt->nhs--;
@@ -563,8 +600,17 @@ static int nsim_fib6_rt_append(struct nsim_fib_data *data,
struct nsim_fib6_rt *fib6_rt;
int i, err;
+ if (data->fail_route_offload) {
+ /* For testing purposes, user set debugfs fail_route_offload
+ * value to true. Simulate hardware programming latency and then
+ * fail.
+ */
+ msleep(1);
+ return -EINVAL;
+ }
+
fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt);
- if (WARN_ON_ONCE(!fib6_rt))
+ if (!fib6_rt)
return -EINVAL;
for (i = 0; i < fib6_event->nrt6; i++) {
@@ -586,6 +632,26 @@ err_fib6_rt_nh_del:
}
#if IS_ENABLED(CONFIG_IPV6)
+static void nsim_fib6_rt_offload_failed_flag_set(struct nsim_fib_data *data,
+ struct fib6_info **rt_arr,
+ unsigned int nrt6)
+
+{
+ struct net *net = devlink_net(data->devlink);
+ int i;
+
+ for (i = 0; i < nrt6; i++)
+ fib6_info_hw_flags_set(net, rt_arr[i], false, false, true);
+}
+#else
+static void nsim_fib6_rt_offload_failed_flag_set(struct nsim_fib_data *data,
+ struct fib6_info **rt_arr,
+ unsigned int nrt6)
+{
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data,
const struct nsim_fib6_rt *fib6_rt,
bool trap)
@@ -594,7 +660,7 @@ static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data,
struct nsim_fib6_rt_nh *fib6_rt_nh;
list_for_each_entry(fib6_rt_nh, &fib6_rt->nh_list, list)
- fib6_info_hw_flags_set(net, fib6_rt_nh->rt, false, trap);
+ fib6_info_hw_flags_set(net, fib6_rt_nh->rt, false, trap, false);
}
#else
static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data,
@@ -666,6 +732,15 @@ static int nsim_fib6_rt_insert(struct nsim_fib_data *data,
struct nsim_fib6_rt *fib6_rt, *fib6_rt_old;
int err;
+ if (data->fail_route_offload) {
+ /* For testing purposes, user set debugfs fail_route_offload
+ * value to true. Simulate hardware programming latency and then
+ * fail.
+ */
+ msleep(1);
+ return -EINVAL;
+ }
+
fib6_rt = nsim_fib6_rt_create(data, fib6_event->rt_arr,
fib6_event->nrt6);
if (IS_ERR(fib6_rt))
@@ -763,7 +838,7 @@ static int nsim_fib6_event(struct nsim_fib_data *data,
struct nsim_fib6_event *fib6_event,
unsigned long event)
{
- int err = 0;
+ int err;
if (fib6_event->rt_arr[0]->fib6_src.plen)
return 0;
@@ -771,9 +846,13 @@ static int nsim_fib6_event(struct nsim_fib_data *data,
switch (event) {
case FIB_EVENT_ENTRY_REPLACE:
err = nsim_fib6_rt_insert(data, fib6_event);
+ if (err)
+ goto err_rt_offload_failed_flag_set;
break;
case FIB_EVENT_ENTRY_APPEND:
err = nsim_fib6_rt_append(data, fib6_event);
+ if (err)
+ goto err_rt_offload_failed_flag_set;
break;
case FIB_EVENT_ENTRY_DEL:
nsim_fib6_rt_remove(data, fib6_event);
@@ -782,6 +861,11 @@ static int nsim_fib6_event(struct nsim_fib_data *data,
break;
}
+ return 0;
+
+err_rt_offload_failed_flag_set:
+ nsim_fib6_rt_offload_failed_flag_set(data, fib6_event->rt_arr,
+ fib6_event->nrt6);
return err;
}
@@ -1289,10 +1373,29 @@ static void nsim_fib_event_work(struct work_struct *work)
mutex_unlock(&data->fib_lock);
}
+static int
+nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev)
+{
+ data->ddir = debugfs_create_dir("fib", nsim_dev->ddir);
+ if (IS_ERR(data->ddir))
+ return PTR_ERR(data->ddir);
+
+ data->fail_route_offload = false;
+ debugfs_create_bool("fail_route_offload", 0600, data->ddir,
+ &data->fail_route_offload);
+ return 0;
+}
+
+static void nsim_fib_debugfs_exit(struct nsim_fib_data *data)
+{
+ debugfs_remove_recursive(data->ddir);
+}
+
struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
struct netlink_ext_ack *extack)
{
struct nsim_fib_data *data;
+ struct nsim_dev *nsim_dev;
int err;
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -1300,10 +1403,15 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
return ERR_PTR(-ENOMEM);
data->devlink = devlink;
- err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params);
+ nsim_dev = devlink_priv(devlink);
+ err = nsim_fib_debugfs_init(data, nsim_dev);
if (err)
goto err_data_free;
+ err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params);
+ if (err)
+ goto err_debugfs_exit;
+
mutex_init(&data->fib_lock);
INIT_LIST_HEAD(&data->fib_rt_list);
err = rhashtable_init(&data->fib_rt_ht, &nsim_fib_rt_ht_params);
@@ -1364,6 +1472,8 @@ err_rhashtable_nexthop_destroy:
rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free,
data);
mutex_destroy(&data->fib_lock);
+err_debugfs_exit:
+ nsim_fib_debugfs_exit(data);
err_data_free:
kfree(data);
return ERR_PTR(err);
@@ -1391,5 +1501,6 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data)
WARN_ON_ONCE(!list_empty(&data->fib_event_queue));
WARN_ON_ONCE(!list_empty(&data->fib_rt_list));
mutex_destroy(&data->fib_lock);
+ nsim_fib_debugfs_exit(data);
kfree(data);
}
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 8a4ec3222168..3142ba768313 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -361,96 +361,6 @@ static int bcm54811_config_init(struct phy_device *phydev)
return err;
}
-static int bcm5482_config_init(struct phy_device *phydev)
-{
- int err, reg;
-
- err = bcm54xx_config_init(phydev);
-
- if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX) {
- /*
- * Enable secondary SerDes and its use as an LED source
- */
- reg = bcm_phy_read_shadow(phydev, BCM5482_SHD_SSD);
- bcm_phy_write_shadow(phydev, BCM5482_SHD_SSD,
- reg |
- BCM5482_SHD_SSD_LEDM |
- BCM5482_SHD_SSD_EN);
-
- /*
- * Enable SGMII slave mode and auto-detection
- */
- reg = BCM5482_SSD_SGMII_SLAVE | MII_BCM54XX_EXP_SEL_SSD;
- err = bcm_phy_read_exp(phydev, reg);
- if (err < 0)
- return err;
- err = bcm_phy_write_exp(phydev, reg, err |
- BCM5482_SSD_SGMII_SLAVE_EN |
- BCM5482_SSD_SGMII_SLAVE_AD);
- if (err < 0)
- return err;
-
- /*
- * Disable secondary SerDes powerdown
- */
- reg = BCM5482_SSD_1000BX_CTL | MII_BCM54XX_EXP_SEL_SSD;
- err = bcm_phy_read_exp(phydev, reg);
- if (err < 0)
- return err;
- err = bcm_phy_write_exp(phydev, reg,
- err & ~BCM5482_SSD_1000BX_CTL_PWRDOWN);
- if (err < 0)
- return err;
-
- /*
- * Select 1000BASE-X register set (primary SerDes)
- */
- reg = bcm_phy_read_shadow(phydev, BCM54XX_SHD_MODE);
- bcm_phy_write_shadow(phydev, BCM54XX_SHD_MODE,
- reg | BCM54XX_SHD_MODE_1000BX);
-
- /*
- * LED1=ACTIVITYLED, LED3=LINKSPD[2]
- * (Use LED1 as secondary SerDes ACTIVITY LED)
- */
- bcm_phy_write_shadow(phydev, BCM5482_SHD_LEDS1,
- BCM5482_SHD_LEDS1_LED1(BCM_LED_SRC_ACTIVITYLED) |
- BCM5482_SHD_LEDS1_LED3(BCM_LED_SRC_LINKSPD2));
-
- /*
- * Auto-negotiation doesn't seem to work quite right
- * in this mode, so we disable it and force it to the
- * right speed/duplex setting. Only 'link status'
- * is important.
- */
- phydev->autoneg = AUTONEG_DISABLE;
- phydev->speed = SPEED_1000;
- phydev->duplex = DUPLEX_FULL;
- }
-
- return err;
-}
-
-static int bcm5482_read_status(struct phy_device *phydev)
-{
- int err;
-
- err = genphy_read_status(phydev);
-
- if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX) {
- /*
- * Only link status matters for 1000Base-X mode, so force
- * 1000 Mbit/s full-duplex status
- */
- if (phydev->link) {
- phydev->speed = SPEED_1000;
- phydev->duplex = DUPLEX_FULL;
- }
- }
-
- return err;
-}
-
static int bcm5481_config_aneg(struct phy_device *phydev)
{
struct device_node *np = phydev->mdio.dev.of_node;
@@ -800,8 +710,7 @@ static struct phy_driver broadcom_drivers[] = {
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM5482",
/* PHY_GBIT_FEATURES */
- .config_init = bcm5482_config_init,
- .read_status = bcm5482_read_status,
+ .config_init = bcm54xx_config_init,
.config_intr = bcm_phy_config_intr,
.handle_interrupt = bcm_phy_handle_interrupt,
}, {
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 620052c023a5..b523aa37ebf0 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -2852,7 +2852,6 @@ static struct phy_driver marvell_drivers[] = {
.probe = marvell_probe,
.config_init = m88e1145_config_init,
.config_aneg = m88e1101_config_aneg,
- .read_status = genphy_read_status,
.config_intr = marvell_config_intr,
.handle_interrupt = marvell_handle_interrupt,
.resume = genphy_resume,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 39c7c786a912..494abf608b8f 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -1368,7 +1368,6 @@ static struct phy_driver ksphy_driver[] = {
.driver_data = &ksz9021_type,
.probe = kszphy_probe,
.config_init = ksz9131_config_init,
- .read_status = genphy_read_status,
.config_intr = kszphy_config_intr,
.handle_interrupt = kszphy_handle_interrupt,
.get_sset_count = kszphy_get_sset_count,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e9e7ada07ea1..1340327f7abf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -347,6 +347,7 @@ struct napi_struct {
struct list_head dev_list;
struct hlist_node napi_hash_node;
unsigned int napi_id;
+ struct task_struct *thread;
};
enum {
@@ -358,6 +359,7 @@ enum {
NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */
NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/
+ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
};
enum {
@@ -369,6 +371,7 @@ enum {
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL),
+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
};
enum gro_result {
@@ -494,6 +497,8 @@ static inline bool napi_complete(struct napi_struct *n)
return napi_complete_done(n, 0);
}
+int dev_set_threaded(struct net_device *dev, bool threaded);
+
/**
* napi_disable - prevent NAPI from scheduling
* @n: NAPI context
@@ -503,20 +508,7 @@ static inline bool napi_complete(struct napi_struct *n)
*/
void napi_disable(struct napi_struct *n);
-/**
- * napi_enable - enable NAPI scheduling
- * @n: NAPI context
- *
- * Resume NAPI from being scheduled on this context.
- * Must be paired with napi_disable.
- */
-static inline void napi_enable(struct napi_struct *n)
-{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- smp_mb__before_atomic();
- clear_bit(NAPI_STATE_SCHED, &n->state);
- clear_bit(NAPI_STATE_NPSVC, &n->state);
-}
+void napi_enable(struct napi_struct *n);
/**
* napi_synchronize - wait until NAPI is not running
@@ -1827,6 +1819,8 @@ enum netdev_priv_flags {
*
* @wol_enabled: Wake-on-LAN is enabled
*
+ * @threaded: napi threaded mode is enabled
+ *
* @net_notifier_list: List of per-net netdev notifier block
* that follow this device when it is moved
* to another network namespace.
@@ -2145,6 +2139,7 @@ struct net_device {
struct lock_class_key *qdisc_running_key;
bool proto_down;
unsigned wol_enabled:1;
+ unsigned threaded:1;
struct list_head net_notifier_list;
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1e262b23c68b..15b7fbe6b15c 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -195,7 +195,8 @@ struct fib6_info {
fib6_destroying:1,
offload:1,
trap:1,
- unused:2;
+ offload_failed:1,
+ unused:1;
struct rcu_head rcu;
struct nexthop *nh;
@@ -539,7 +540,7 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
}
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
- bool offload, bool trap);
+ bool offload, bool trap, bool offload_failed);
#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
struct bpf_iter__ipv6_route {
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 2ec062aaa978..a914f33f3ed5 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -213,7 +213,8 @@ struct fib_rt_info {
u8 type;
u8 offload:1,
trap:1,
- unused:6;
+ offload_failed:1,
+ unused:5;
};
struct fib_entry_notifier_info {
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index b841caa4657e..91e4ca064d61 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -319,6 +319,11 @@ enum rt_scope_t {
#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */
#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */
#define RTM_F_TRAP 0x8000 /* route is trapping packets */
+#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value
+ * is chosen to avoid conflicts with
+ * other flags defined in
+ * include/uapi/linux/ipv6_route.h
+ */
/* Reserved table identifiers */
diff --git a/net/core/dev.c b/net/core/dev.c
index 21d74d30f5d7..7647278e46f0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -91,6 +91,7 @@
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
+#include <linux/kthread.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
@@ -1494,6 +1495,27 @@ void netdev_notify_peers(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_notify_peers);
+static int napi_threaded_poll(void *data);
+
+static int napi_kthread_create(struct napi_struct *n)
+{
+ int err = 0;
+
+ /* Create and wake up the kthread once to put it in
+ * TASK_INTERRUPTIBLE mode to avoid the blocked task
+ * warning and work with loadavg.
+ */
+ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
+ n->dev->name, n->napi_id);
+ if (IS_ERR(n->thread)) {
+ err = PTR_ERR(n->thread);
+ pr_err("kthread_run failed with err %d\n", err);
+ n->thread = NULL;
+ }
+
+ return err;
+}
+
static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -4265,6 +4287,22 @@ int gro_normal_batch __read_mostly = 8;
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
+ struct task_struct *thread;
+
+ if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
+ /* Paired with smp_mb__before_atomic() in
+ * napi_enable()/dev_set_threaded().
+ * Use READ_ONCE() to guarantee a complete
+ * read on napi->thread. Only call
+ * wake_up_process() when it's not NULL.
+ */
+ thread = READ_ONCE(napi->thread);
+ if (thread) {
+ wake_up_process(thread);
+ return;
+ }
+ }
+
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
@@ -6701,6 +6739,49 @@ static void init_gro_hash(struct napi_struct *napi)
napi->gro_bitmask = 0;
}
+int dev_set_threaded(struct net_device *dev, bool threaded)
+{
+ struct napi_struct *napi;
+ int err = 0;
+
+ if (dev->threaded == threaded)
+ return 0;
+
+ if (threaded) {
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (!napi->thread) {
+ err = napi_kthread_create(napi);
+ if (err) {
+ threaded = false;
+ break;
+ }
+ }
+ }
+ }
+
+ dev->threaded = threaded;
+
+ /* Make sure kthread is created before THREADED bit
+ * is set.
+ */
+ smp_mb__before_atomic();
+
+ /* Setting/unsetting threaded mode on a napi might not immediately
+ * take effect, if the current napi instance is actively being
+ * polled. In this case, the switch between threaded mode and
+ * softirq mode will happen in the next round of napi_schedule().
+ * This should not cause hiccups/stalls to the live traffic.
+ */
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (threaded)
+ set_bit(NAPI_STATE_THREADED, &napi->state);
+ else
+ clear_bit(NAPI_STATE_THREADED, &napi->state);
+ }
+
+ return err;
+}
+
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6728,6 +6809,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
set_bit(NAPI_STATE_NPSVC, &napi->state);
list_add_rcu(&napi->dev_list, &dev->napi_list);
napi_hash_add(napi);
+ /* Create kthread for this napi if dev->threaded is set.
+ * Clear dev->threaded if kthread creation failed so that
+ * threaded mode will not be enabled in napi_enable().
+ */
+ if (dev->threaded && napi_kthread_create(napi))
+ dev->threaded = 0;
}
EXPORT_SYMBOL(netif_napi_add);
@@ -6745,9 +6832,28 @@ void napi_disable(struct napi_struct *n)
clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
clear_bit(NAPI_STATE_DISABLE, &n->state);
+ clear_bit(NAPI_STATE_THREADED, &n->state);
}
EXPORT_SYMBOL(napi_disable);
+/**
+ * napi_enable - enable NAPI scheduling
+ * @n: NAPI context
+ *
+ * Resume NAPI from being scheduled on this context.
+ * Must be paired with napi_disable.
+ */
+void napi_enable(struct napi_struct *n)
+{
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+ smp_mb__before_atomic();
+ clear_bit(NAPI_STATE_SCHED, &n->state);
+ clear_bit(NAPI_STATE_NPSVC, &n->state);
+ if (n->dev->threaded && n->thread)
+ set_bit(NAPI_STATE_THREADED, &n->state);
+}
+EXPORT_SYMBOL(napi_enable);
+
static void flush_gro_hash(struct napi_struct *napi)
{
int i;
@@ -6773,18 +6879,18 @@ void __netif_napi_del(struct napi_struct *napi)
flush_gro_hash(napi);
napi->gro_bitmask = 0;
+
+ if (napi->thread) {
+ kthread_stop(napi->thread);
+ napi->thread = NULL;
+ }
}
EXPORT_SYMBOL(__netif_napi_del);
-static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+static int __napi_poll(struct napi_struct *n, bool *repoll)
{
- void *have;
int work, weight;
- list_del_init(&n->poll_list);
-
- have = netpoll_poll_lock(n);
-
weight = n->weight;
/* This NAPI_STATE_SCHED test is for avoiding a race
@@ -6804,7 +6910,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
n->poll, work, weight);
if (likely(work < weight))
- goto out_unlock;
+ return work;
/* Drivers must not modify the NAPI state if they
* consume the entire weight. In such cases this code
@@ -6813,7 +6919,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
*/
if (unlikely(napi_disable_pending(n))) {
napi_complete(n);
- goto out_unlock;
+ return work;
}
/* The NAPI context has more processing work, but busy-polling
@@ -6826,7 +6932,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
*/
napi_schedule(n);
}
- goto out_unlock;
+ return work;
}
if (n->gro_bitmask) {
@@ -6844,17 +6950,79 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
if (unlikely(!list_empty(&n->poll_list))) {
pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
n->dev ? n->dev->name : "backlog");
- goto out_unlock;
+ return work;
}
- list_add_tail(&n->poll_list, repoll);
+ *repoll = true;
+
+ return work;
+}
+
+static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+{
+ bool do_repoll = false;
+ void *have;
+ int work;
+
+ list_del_init(&n->poll_list);
+
+ have = netpoll_poll_lock(n);
+
+ work = __napi_poll(n, &do_repoll);
+
+ if (do_repoll)
+ list_add_tail(&n->poll_list, repoll);
-out_unlock:
netpoll_poll_unlock(have);
return work;
}
+static int napi_thread_wait(struct napi_struct *napi)
+{
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ while (!kthread_should_stop() && !napi_disable_pending(napi)) {
+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ WARN_ON(!list_empty(&napi->poll_list));
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+
+ schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+ __set_current_state(TASK_RUNNING);
+ return -1;
+}
+
+static int napi_threaded_poll(void *data)
+{
+ struct napi_struct *napi = data;
+ void *have;
+
+ while (!napi_thread_wait(napi)) {
+ for (;;) {
+ bool repoll = false;
+
+ local_bh_disable();
+
+ have = netpoll_poll_lock(napi);
+ __napi_poll(napi, &repoll);
+ netpoll_poll_unlock(have);
+
+ __kfree_skb_flush();
+ local_bh_enable();
+
+ if (!repoll)
+ break;
+
+ cond_resched();
+ }
+ }
+ return 0;
+}
+
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 91afb0b6de69..307628fdf380 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -538,6 +538,45 @@ static ssize_t phys_switch_id_show(struct device *dev,
}
static DEVICE_ATTR_RO(phys_switch_id);
+static ssize_t threaded_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (dev_isalive(netdev))
+ ret = sprintf(buf, fmt_dec, netdev->threaded);
+
+ rtnl_unlock();
+ return ret;
+}
+
+static int modify_napi_threaded(struct net_device *dev, unsigned long val)
+{
+ int ret;
+
+ if (list_empty(&dev->napi_list))
+ return -EOPNOTSUPP;
+
+ if (val != 0 && val != 1)
+ return -EOPNOTSUPP;
+
+ ret = dev_set_threaded(dev, val);
+
+ return ret;
+}
+
+static ssize_t threaded_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return netdev_store(dev, attr, buf, len, modify_napi_threaded);
+}
+static DEVICE_ATTR_RW(threaded);
+
static struct attribute *net_class_attrs[] __ro_after_init = {
&dev_attr_netdev_group.attr,
&dev_attr_type.attr,
@@ -570,6 +609,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
&dev_attr_proto_down.attr,
&dev_attr_carrier_up_count.attr,
&dev_attr_carrier_down_count.attr,
+ &dev_attr_threaded.attr,
NULL,
};
ATTRIBUTE_GROUPS(net_class);
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 5bda5aeda579..601f5fbfc63f 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -285,7 +285,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
esp.esph = ip_esp_hdr(skb);
- if (!hw_offload || (hw_offload && !skb_is_gso(skb))) {
+ if (!hw_offload || !skb_is_gso(skb)) {
esp.nfrags = esp_output_head(x, skb, &esp);
if (esp.nfrags < 0)
return esp.nfrags;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index aff454ef0fa3..b58db1ca4bfb 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -18,7 +18,8 @@ struct fib_alias {
s16 fa_default;
u8 offload:1,
trap:1,
- unused:6;
+ offload_failed:1,
+ unused:5;
struct rcu_head rcu;
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 4c38facf91c0..a632b66bc13a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -521,6 +521,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.type = fa->fa_type;
fri.offload = fa->offload;
fri.trap = fa->trap;
+ fri.offload_failed = fa->offload_failed;
err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
@@ -1811,6 +1812,8 @@ offload:
rtm->rtm_flags |= RTM_F_OFFLOAD;
if (fri->trap)
rtm->rtm_flags |= RTM_F_TRAP;
+ if (fri->offload_failed)
+ rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 60559b708158..25cf387cca5b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1047,12 +1047,20 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
if (!fa_match)
goto out;
- if (fa_match->offload == fri->offload && fa_match->trap == fri->trap)
+ if (fa_match->offload == fri->offload && fa_match->trap == fri->trap &&
+ fa_match->offload_failed == fri->offload_failed)
goto out;
fa_match->offload = fri->offload;
fa_match->trap = fri->trap;
+ /* 2 means send notifications only if offload_failed was changed. */
+ if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
+ fa_match->offload_failed == fri->offload_failed)
+ goto out;
+
+ fa_match->offload_failed = fri->offload_failed;
+
if (!net->ipv4.sysctl_fib_notify_on_flag_change)
goto out;
@@ -1290,6 +1298,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_default = -1;
new_fa->offload = 0;
new_fa->trap = 0;
+ new_fa->offload_failed = 0;
hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
@@ -1350,6 +1359,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_default = -1;
new_fa->offload = 0;
new_fa->trap = 0;
+ new_fa->offload_failed = 0;
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
@@ -2289,6 +2299,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.type = fa->fa_type;
fri.offload = fa->offload;
fri.trap = fa->trap;
+ fri.offload_failed = fa->offload_failed;
err = fib_dump_info(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index be31e2446470..02d81d79deeb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3304,6 +3304,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fri.type = rt->rt_type;
fri.offload = 0;
fri.trap = 0;
+ fri.offload_failed = 0;
if (res.fa_head) {
struct fib_alias *fa;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e5798b3b59d2..f55095d3ed16 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1361,7 +1361,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
+ .extra2 = &two,
},
{ }
};
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 2b804fcebcc6..153ad103ba74 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -788,7 +788,7 @@ int esp6_input_done2(struct sk_buff *skb, int err)
int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
int hdr_len = skb_network_header_len(skb);
- if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+ if (!xo || !(xo->flags & CRYPTO_DONE))
kfree(ESP_SKB_CB(skb)->tmp);
if (unlikely(err))
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0d1784b0d65d..1536f4948e86 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5619,6 +5619,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
rtm->rtm_flags |= RTM_F_OFFLOAD;
if (rt->trap)
rtm->rtm_flags |= RTM_F_TRAP;
+ if (rt->offload_failed)
+ rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
}
if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
@@ -6070,17 +6072,25 @@ errout:
}
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
- bool offload, bool trap)
+ bool offload, bool trap, bool offload_failed)
{
struct sk_buff *skb;
int err;
- if (f6i->offload == offload && f6i->trap == trap)
+ if (f6i->offload == offload && f6i->trap == trap &&
+ f6i->offload_failed == offload_failed)
return;
f6i->offload = offload;
f6i->trap = trap;
+ /* 2 means send notifications only if offload_failed was changed. */
+ if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 &&
+ f6i->offload_failed == offload_failed)
+ return;
+
+ f6i->offload_failed = offload_failed;
+
if (!rcu_access_pointer(f6i->fib6_node))
/* The route was removed from the tree, do not send
* notfication.
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 392ef01e3366..263ab43ed06b 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -167,7 +167,7 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
+ .extra2 = &two,
},
{ }
};
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 697cdcfbb5e1..495b1f5c979b 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -296,7 +296,8 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
}
mtu = dst_mtu(dst);
- if (skb->len > mtu) {
+ if ((!skb_is_gso(skb) && skb->len > mtu) ||
+ (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))) {
skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->protocol == htons(ETH_P_IPV6)) {
@@ -564,6 +565,11 @@ static void xfrmi_dev_setup(struct net_device *dev)
eth_broadcast_addr(dev->broadcast);
}
+#define XFRMI_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_GSO_SOFTWARE | \
+ NETIF_F_HW_CSUM)
+
static int xfrmi_dev_init(struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
@@ -581,6 +587,8 @@ static int xfrmi_dev_init(struct net_device *dev)
}
dev->features |= NETIF_F_LLTX;
+ dev->features |= XFRMI_FEATURES;
+ dev->hw_features |= XFRMI_FEATURES;
if (phydev) {
dev->needed_headroom = phydev->needed_headroom;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 0727ac853b55..5a0ef4361e43 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2504,7 +2504,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]),
sizeof(*encap), GFP_KERNEL);
if (!encap)
- return 0;
+ return -ENOMEM;
}
err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap);
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh
index 16a9dd43aefc..8d91191a098c 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh
@@ -7,9 +7,11 @@ ALL_TESTS="
ipv4_route_addition_test
ipv4_route_deletion_test
ipv4_route_replacement_test
+ ipv4_route_offload_failed_test
ipv6_route_addition_test
ipv6_route_deletion_test
ipv6_route_replacement_test
+ ipv6_route_offload_failed_test
"
NETDEVSIM_PATH=/sys/bus/netdevsim/
@@ -17,9 +19,26 @@ DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
NUM_NETIFS=0
source $lib_dir/lib.sh
+check_rt_offload_failed()
+{
+ local outfile=$1; shift
+ local line
+
+ # Make sure that the first notification was emitted without
+ # RTM_F_OFFLOAD_FAILED flag and the second with RTM_F_OFFLOAD_FAILED
+ # flag
+ head -n 1 $outfile | grep -q "rt_offload_failed"
+ if [[ $? -eq 0 ]]; then
+ return 1
+ fi
+
+ head -n 2 $outfile | tail -n 1 | grep -q "rt_offload_failed"
+}
+
check_rt_trap()
{
local outfile=$1; shift
@@ -39,15 +58,23 @@ route_notify_check()
{
local outfile=$1; shift
local expected_num_lines=$1; shift
+ local offload_failed=${1:-0}; shift
# check the monitor results
lines=`wc -l $outfile | cut "-d " -f1`
test $lines -eq $expected_num_lines
check_err $? "$expected_num_lines notifications were expected but $lines were received"
- if [[ $expected_num_lines -eq 2 ]]; then
+ if [[ $expected_num_lines -eq 1 ]]; then
+ return
+ fi
+
+ if [[ $offload_failed -eq 0 ]]; then
check_rt_trap $outfile
check_err $? "Wrong RTM_F_TRAP flags in notifications"
+ else
+ check_rt_offload_failed $outfile
+ check_err $? "Wrong RTM_F_OFFLOAD_FAILED flags in notifications"
fi
}
@@ -57,6 +84,7 @@ route_addition_check()
local notify=$1; shift
local route=$1; shift
local expected_num_notifications=$1; shift
+ local offload_failed=${1:-0}; shift
ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify
@@ -68,7 +96,7 @@ route_addition_check()
sleep 1
kill %% && wait %% &> /dev/null
- route_notify_check $outfile $expected_num_notifications
+ route_notify_check $outfile $expected_num_notifications $offload_failed
rm -f $outfile
$IP route del $route dev dummy1
@@ -93,6 +121,13 @@ ipv4_route_addition_test()
expected_num_notifications=2
route_addition_check $ip $notify $route $expected_num_notifications
+ # notify=2 means emit notifications only for failed route installation,
+ # make sure a single notification will be emitted for the programmed
+ # route.
+ notify=2
+ expected_num_notifications=1
+ route_addition_check $ip $notify $route $expected_num_notifications
+
log_test "IPv4 route addition"
}
@@ -185,11 +220,55 @@ ipv4_route_replacement_test()
expected_num_notifications=2
route_replacement_check $ip $notify $route $expected_num_notifications
+ # notify=2 means emit notifications only for failed route installation,
+ # make sure a single notification will be emitted for the new route.
+ notify=2
+ expected_num_notifications=1
+ route_replacement_check $ip $notify $route $expected_num_notifications
+
$IP link del name dummy2
log_test "IPv4 route replacement"
}
+ipv4_route_offload_failed_test()
+{
+
+ RET=0
+
+ local ip="ipv4"
+ local route=192.0.2.0/24
+ local offload_failed=1
+
+ echo "y"> $DEBUGFS_DIR/fib/fail_route_offload
+ check_err $? "Failed to setup route offload to fail"
+
+ # Make sure a single notification will be emitted for the programmed
+ # route.
+ local notify=0
+ local expected_num_notifications=1
+ route_addition_check $ip $notify $route $expected_num_notifications \
+ $offload_failed
+
+ # Make sure two notifications will be emitted for the new route.
+ notify=1
+ expected_num_notifications=2
+ route_addition_check $ip $notify $route $expected_num_notifications \
+ $offload_failed
+
+ # notify=2 means emit notifications only for failed route installation,
+ # make sure two notifications will be emitted for the new route.
+ notify=2
+ expected_num_notifications=2
+ route_addition_check $ip $notify $route $expected_num_notifications \
+ $offload_failed
+
+ echo "n"> $DEBUGFS_DIR/fib/fail_route_offload
+ check_err $? "Failed to setup route offload not to fail"
+
+ log_test "IPv4 route offload failed"
+}
+
ipv6_route_addition_test()
{
RET=0
@@ -208,6 +287,13 @@ ipv6_route_addition_test()
expected_num_notifications=2
route_addition_check $ip $notify $route $expected_num_notifications
+ # notify=2 means emit notifications only for failed route installation,
+ # make sure a single notification will be emitted for the programmed
+ # route.
+ notify=2
+ expected_num_notifications=1
+ route_addition_check $ip $notify $route $expected_num_notifications
+
log_test "IPv6 route addition"
}
@@ -250,11 +336,55 @@ ipv6_route_replacement_test()
expected_num_notifications=2
route_replacement_check $ip $notify $route $expected_num_notifications
+ # notify=2 means emit notifications only for failed route installation,
+ # make sure a single notification will be emitted for the new route.
+ notify=2
+ expected_num_notifications=1
+ route_replacement_check $ip $notify $route $expected_num_notifications
+
$IP link del name dummy2
log_test "IPv6 route replacement"
}
+ipv6_route_offload_failed_test()
+{
+
+ RET=0
+
+ local ip="ipv6"
+ local route=2001:db8:1::/64
+ local offload_failed=1
+
+ echo "y"> $DEBUGFS_DIR/fib/fail_route_offload
+ check_err $? "Failed to setup route offload to fail"
+
+ # Make sure a single notification will be emitted for the programmed
+ # route.
+ local notify=0
+ local expected_num_notifications=1
+ route_addition_check $ip $notify $route $expected_num_notifications \
+ $offload_failed
+
+ # Make sure two notifications will be emitted for the new route.
+ notify=1
+ expected_num_notifications=2
+ route_addition_check $ip $notify $route $expected_num_notifications \
+ $offload_failed
+
+ # notify=2 means emit notifications only for failed route installation,
+ # make sure two notifications will be emitted for the new route.
+ notify=2
+ expected_num_notifications=2
+ route_addition_check $ip $notify $route $expected_num_notifications \
+ $offload_failed
+
+ echo "n"> $DEBUGFS_DIR/fib/fail_route_offload
+ check_err $? "Failed to setup route offload not to fail"
+
+ log_test "IPv6 route offload failed"
+}
+
setup_prepare()
{
modprobe netdevsim &> /dev/null