aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/renesas,ravb.txt1
-rw-r--r--Documentation/networking/snmp_counter.rst245
-rw-r--r--drivers/net/bonding/bond_3ad.c4
-rw-r--r--drivers/net/dsa/microchip/Kconfig2
-rw-r--r--drivers/net/dsa/microchip/ksz9477.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c251
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h12
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c5
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c201
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c32
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c21
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c113
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h13
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c16
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c3
-rw-r--r--drivers/net/ethernet/freescale/fec.h1
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c12
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c1
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.c2
-rw-r--r--drivers/net/ethernet/hisilicon/Kconfig1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c140
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c29
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h48
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c446
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h690
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c99
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c20
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c6
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c23
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_cq.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h71
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/resources.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c249
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c123
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h28
-rw-r--r--drivers/net/ethernet/netronome/nfp/abm/ctrl.c11
-rw-r--r--drivers/net/ethernet/netronome/nfp/abm/main.c25
-rw-r--r--drivers/net/ethernet/netronome/nfp/abm/main.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c21
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c6
-rw-r--r--drivers/net/ethernet/realtek/r8169.c3
-rw-r--r--drivers/net/ethernet/socionext/netsec.c151
-rw-r--r--drivers/net/hamradio/6pack.c10
-rw-r--r--drivers/net/hamradio/mkiss.c8
-rw-r--r--drivers/net/phy/fixed_phy.c24
-rw-r--r--drivers/net/phy/phy.c36
-rw-r--r--drivers/net/ppp/ppp_async.c8
-rw-r--r--drivers/net/tun.c6
-rw-r--r--drivers/net/vxlan.c34
-rw-r--r--include/linux/indirect_call_wrapper.h51
-rw-r--r--include/linux/netdevice.h7
-rw-r--r--include/linux/phy.h24
-rw-r--r--include/linux/phy_fixed.h5
-rw-r--r--include/net/dsa.h2
-rw-r--r--include/net/inet_common.h9
-rw-r--r--include/net/neighbour.h24
-rw-r--r--include/net/netns/xfrm.h2
-rw-r--r--include/net/pkt_cls.h4
-rw-r--r--include/net/xfrm.h5
-rw-r--r--include/uapi/linux/neighbour.h1
-rw-r--r--lib/test_rhashtable.c32
-rw-r--r--net/bridge/br_device.c1
-rw-r--r--net/bridge/br_fdb.c26
-rw-r--r--net/bridge/br_multicast.c3
-rw-r--r--net/bridge/br_netlink.c4
-rw-r--r--net/bridge/br_private.h3
-rw-r--r--net/core/dev.c15
-rw-r--r--net/core/neighbour.c118
-rw-r--r--net/core/rtnetlink.c168
-rw-r--r--net/dccp/proto.c3
-rw-r--r--net/dsa/Kconfig4
-rw-r--r--net/dsa/dsa.c8
-rw-r--r--net/dsa/dsa_priv.h2
-rw-r--r--net/dsa/tag_ksz.c118
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/fou.c7
-rw-r--r--net/ipv4/inet_hashtables.c61
-rw-r--r--net/ipv4/ipconfig.c13
-rw-r--r--net/ipv4/ipmr.c9
-rw-r--r--net/ipv4/tcp_offload.c6
-rw-r--r--net/ipv4/udp.c76
-rw-r--r--net/ipv4/udp_offload.c15
-rw-r--r--net/ipv6/inet6_hashtables.c54
-rw-r--r--net/ipv6/ip6_offload.c35
-rw-r--r--net/ipv6/ip6mr.c7
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/ipv6/tcpv6_offload.c7
-rw-r--r--net/ipv6/udp.c79
-rw-r--r--net/ipv6/udp_offload.c7
-rw-r--r--net/ipv6/xfrm6_policy.c1
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_ppp.c4
-rw-r--r--net/sched/cls_api.c46
-rw-r--r--net/sched/cls_bpf.c4
-rw-r--r--net/sched/cls_flower.c15
-rw-r--r--net/sched/cls_matchall.c5
-rw-r--r--net/sched/cls_u32.c8
-rw-r--r--net/sched/sch_api.c4
-rw-r--r--net/xfrm/xfrm_policy.c1248
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh175
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile7
-rw-r--r--tools/testing/selftests/net/reuseport_addr_any.c309
-rwxr-xr-xtools/testing/selftests/net/reuseport_addr_any.sh4
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh53
-rwxr-xr-xtools/testing/selftests/net/xfrm_policy.sh302
119 files changed, 5529 insertions, 1020 deletions
diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt
index 3530256a879c..7ad36213093e 100644
--- a/Documentation/devicetree/bindings/net/renesas,ravb.txt
+++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt
@@ -18,6 +18,7 @@ Required properties:
R-Car Gen2 and RZ/G1 devices.
- "renesas,etheravb-r8a774a1" for the R8A774A1 SoC.
+ - "renesas,etheravb-r8a774c0" for the R8A774C0 SoC.
- "renesas,etheravb-r8a7795" for the R8A7795 SoC.
- "renesas,etheravb-r8a7796" for the R8A7796 SoC.
- "renesas,etheravb-r8a77965" for the R8A77965 SoC.
diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst
index 918a1374af30..f8eb77ddbd44 100644
--- a/Documentation/networking/snmp_counter.rst
+++ b/Documentation/networking/snmp_counter.rst
@@ -17,7 +17,9 @@ Defined in `RFC1213 ipInReceives`_
The number of packets received by the IP layer. It gets increasing at the
beginning of ip_rcv function, always be updated together with
-IpExtInOctets. It indicates the number of aggregated segments after
+IpExtInOctets. It will be increased even if the packet is dropped
+later (e.g. due to the IP header is invalid or the checksum is wrong
+and so on). It indicates the number of aggregated segments after
GRO/LRO.
* IpInDelivers
@@ -57,6 +59,58 @@ status. They count the real frame number regardless the LRO/GRO. So
for the same packet, you might find that IpInReceives count 1, but
IpExtInNoECTPkts counts 2 or more.
+* IpInHdrErrors
+Defined in `RFC1213 ipInHdrErrors`_. It indicates the packet is
+dropped due to the IP header error. It might happen in both IP input
+and IP forward paths.
+
+.. _RFC1213 ipInHdrErrors: https://tools.ietf.org/html/rfc1213#page-27
+
+* IpInAddrErrors
+Defined in `RFC1213 ipInAddrErrors`_. It will be increased in two
+scenarios: (1) The IP address is invalid. (2) The destination IP
+address is not a local address and IP forwarding is not enabled
+
+.. _RFC1213 ipInAddrErrors: https://tools.ietf.org/html/rfc1213#page-27
+
+* IpExtInNoRoutes
+This counter means the packet is dropped when the IP stack receives a
+packet and can't find a route for it from the route table. It might
+happen when IP forwarding is enabled and the destination IP address is
+not a local address and there is no route for the destination IP
+address.
+
+* IpInUnknownProtos
+Defined in `RFC1213 ipInUnknownProtos`_. It will be increased if the
+layer 4 protocol is unsupported by kernel. If an application is using
+raw socket, kernel will always deliver the packet to the raw socket
+and this counter won't be increased.
+
+.. _RFC1213 ipInUnknownProtos: https://tools.ietf.org/html/rfc1213#page-27
+
+* IpExtInTruncatedPkts
+For IPv4 packet, it means the actual data size is smaller than the
+"Total Length" field in the IPv4 header.
+
+* IpInDiscards
+Defined in `RFC1213 ipInDiscards`_. It indicates the packet is dropped
+in the IP receiving path and due to kernel internal reasons (e.g. no
+enough memory).
+
+.. _RFC1213 ipInDiscards: https://tools.ietf.org/html/rfc1213#page-28
+
+* IpOutDiscards
+Defined in `RFC1213 ipOutDiscards`_. It indicates the packet is
+dropped in the IP sending path and due to kernel internal reasons.
+
+.. _RFC1213 ipOutDiscards: https://tools.ietf.org/html/rfc1213#page-28
+
+* IpOutNoRoutes
+Defined in `RFC1213 ipOutNoRoutes`_. It indicates the packet is
+dropped in the IP sending path and no route is found for it.
+
+.. _RFC1213 ipOutNoRoutes: https://tools.ietf.org/html/rfc1213#page-29
+
ICMP counters
============
* IcmpInMsgs and IcmpOutMsgs
@@ -425,6 +479,100 @@ The sum of CWND detected by packet delay. Dividing this value by
TcpExtTCPHystartDelayDetect is the average CWND which detected by the
packet delay.
+TCP retransmission and congestion control
+======================================
+The TCP protocol has two retransmission mechanisms: SACK and fast
+recovery. They are exclusive with each other. When SACK is enabled,
+the kernel TCP stack would use SACK, or kernel would use fast
+recovery. The SACK is a TCP option, which is defined in `RFC2018`_,
+the fast recovery is defined in `RFC6582`_, which is also called
+'Reno'.
+
+The TCP congestion control is a big and complex topic. To understand
+the related snmp counter, we need to know the states of the congestion
+control state machine. There are 5 states: Open, Disorder, CWR,
+Recovery and Loss. For details about these states, please refer page 5
+and page 6 of this document:
+https://pdfs.semanticscholar.org/0e9c/968d09ab2e53e24c4dca5b2d67c7f7140f8e.pdf
+
+.. _RFC2018: https://tools.ietf.org/html/rfc2018
+.. _RFC6582: https://tools.ietf.org/html/rfc6582
+
+* TcpExtTCPRenoRecovery and TcpExtTCPSackRecovery
+When the congestion control comes into Recovery state, if sack is
+used, TcpExtTCPSackRecovery increases 1, if sack is not used,
+TcpExtTCPRenoRecovery increases 1. These two counters mean the TCP
+stack begins to retransmit the lost packets.
+
+* TcpExtTCPSACKReneging
+A packet was acknowledged by SACK, but the receiver has dropped this
+packet, so the sender needs to retransmit this packet. In this
+situation, the sender adds 1 to TcpExtTCPSACKReneging. A receiver
+could drop a packet which has been acknowledged by SACK, although it is
+unusual, it is allowed by the TCP protocol. The sender doesn't really
+know what happened on the receiver side. The sender just waits until
+the RTO expires for this packet, then the sender assumes this packet
+has been dropped by the receiver.
+
+* TcpExtTCPRenoReorder
+The reorder packet is detected by fast recovery. It would only be used
+if SACK is disabled. The fast recovery algorithm detects recorder by
+the duplicate ACK number. E.g., if retransmission is triggered, and
+the original retransmitted packet is not lost, it is just out of
+order, the receiver would acknowledge multiple times, one for the
+retransmitted packet, another for the arriving of the original out of
+order packet. Thus the sender would find more ACks than its
+expectation, and the sender knows out of order occurs.
+
+* TcpExtTCPTSReorder
+The reorder packet is detected when a hole is filled. E.g., assume the
+sender sends packet 1,2,3,4,5, and the receiving order is
+1,2,4,5,3. When the sender receives the ACK of packet 3 (which will
+fill the hole), two conditions will let TcpExtTCPTSReorder increase
+1: (1) if the packet 3 is not re-retransmitted yet. (2) if the packet
+3 is retransmitted but the timestamp of the packet 3's ACK is earlier
+than the retransmission timestamp.
+
+* TcpExtTCPSACKReorder
+The reorder packet detected by SACK. The SACK has two methods to
+detect reorder: (1) DSACK is received by the sender. It means the
+sender sends the same packet more than one times. And the only reason
+is the sender believes an out of order packet is lost so it sends the
+packet again. (2) Assume packet 1,2,3,4,5 are sent by the sender, and
+the sender has received SACKs for packet 2 and 5, now the sender
+receives SACK for packet 4 and the sender doesn't retransmit the
+packet yet, the sender would know packet 4 is out of order. The TCP
+stack of kernel will increase TcpExtTCPSACKReorder for both of the
+above scenarios.
+
+
+DSACK
+=====
+The DSACK is defined in `RFC2883`_. The receiver uses DSACK to report
+duplicate packets to the sender. There are two kinds of
+duplications: (1) a packet which has been acknowledged is
+duplicate. (2) an out of order packet is duplicate. The TCP stack
+counts these two kinds of duplications on both receiver side and
+sender side.
+
+.. _RFC2883 : https://tools.ietf.org/html/rfc2883
+
+* TcpExtTCPDSACKOldSent
+The TCP stack receives a duplicate packet which has been acked, so it
+sends a DSACK to the sender.
+
+* TcpExtTCPDSACKOfoSent
+The TCP stack receives an out of order duplicate packet, so it sends a
+DSACK to the sender.
+
+* TcpExtTCPDSACKRecv
+The TCP stack receives a DSACK, which indicate an acknowledged
+duplicate packet is received.
+
+* TcpExtTCPDSACKOfoRecv
+The TCP stack receives a DSACK, which indicate an out of order
+duplciate packet is received.
+
examples
=======
@@ -945,3 +1093,98 @@ Both TcpExtListenOverflows and TcpExtListenDrops were 4. If the time
between the 4th nc and the nstat was longer, the value of
TcpExtListenOverflows and TcpExtListenDrops would be larger, because
the SYN of the 4th nc was dropped, the client was retrying.
+
+IpInAddrErrors, IpExtInNoRoutes and IpOutNoRoutes
+----------------------------------------------
+server A IP address: 192.168.122.250
+server B IP address: 192.168.122.251
+Prepare on server A, add a route to server B::
+
+ $ sudo ip route add 8.8.8.8/32 via 192.168.122.251
+
+Prepare on server B, disable send_redirects for all interfaces::
+
+ $ sudo sysctl -w net.ipv4.conf.all.send_redirects=0
+ $ sudo sysctl -w net.ipv4.conf.ens3.send_redirects=0
+ $ sudo sysctl -w net.ipv4.conf.lo.send_redirects=0
+ $ sudo sysctl -w net.ipv4.conf.default.send_redirects=0
+
+We want to let sever A send a packet to 8.8.8.8, and route the packet
+to server B. When server B receives such packet, it might send a ICMP
+Redirect message to server A, set send_redirects to 0 will disable
+this behavior.
+
+First, generate InAddrErrors. On server B, we disable IP forwarding::
+
+ $ sudo sysctl -w net.ipv4.conf.all.forwarding=0
+
+On server A, we send packets to 8.8.8.8::
+
+ $ nc -v 8.8.8.8 53
+
+On server B, we check the output of nstat::
+
+ $ nstat
+ #kernel
+ IpInReceives 3 0.0
+ IpInAddrErrors 3 0.0
+ IpExtInOctets 180 0.0
+ IpExtInNoECTPkts 3 0.0
+
+As we have let server A route 8.8.8.8 to server B, and we disabled IP
+forwarding on server B, Server A sent packets to server B, then server B
+dropped packets and increased IpInAddrErrors. As the nc command would
+re-send the SYN packet if it didn't receive a SYN+ACK, we could find
+multiple IpInAddrErrors.
+
+Second, generate IpExtInNoRoutes. On server B, we enable IP
+forwarding::
+
+ $ sudo sysctl -w net.ipv4.conf.all.forwarding=1
+
+Check the route table of server B and remove the default route::
+
+ $ ip route show
+ default via 192.168.122.1 dev ens3 proto static
+ 192.168.122.0/24 dev ens3 proto kernel scope link src 192.168.122.251
+ $ sudo ip route delete default via 192.168.122.1 dev ens3 proto static
+
+On server A, we contact 8.8.8.8 again::
+
+ $ nc -v 8.8.8.8 53
+ nc: connect to 8.8.8.8 port 53 (tcp) failed: Network is unreachable
+
+On server B, run nstat::
+
+ $ nstat
+ #kernel
+ IpInReceives 1 0.0
+ IpOutRequests 1 0.0
+ IcmpOutMsgs 1 0.0
+ IcmpOutDestUnreachs 1 0.0
+ IcmpMsgOutType3 1 0.0
+ IpExtInNoRoutes 1 0.0
+ IpExtInOctets 60 0.0
+ IpExtOutOctets 88 0.0
+ IpExtInNoECTPkts 1 0.0
+
+We enabled IP forwarding on server B, when server B received a packet
+which destination IP address is 8.8.8.8, server B will try to forward
+this packet. We have deleted the default route, there was no route for
+8.8.8.8, so server B increase IpExtInNoRoutes and sent the "ICMP
+Destination Unreachable" message to server A.
+
+Third, generate IpOutNoRoutes. Run ping command on server B::
+
+ $ ping -c 1 8.8.8.8
+ connect: Network is unreachable
+
+Run nstat on server B::
+
+ $ nstat
+ #kernel
+ IpOutNoRoutes 1 0.0
+
+We have deleted the default route on server B. Server B couldn't find
+a route for the 8.8.8.8 IP address, so server B increased
+IpOutNoRoutes.
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 93dfcef8afc4..7c46d9f4fefd 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1220,7 +1220,7 @@ static void ad_churn_machine(struct port *port)
port->sm_churn_partner_state = AD_CHURN_MONITOR;
port->sm_churn_actor_timer_counter =
__ad_timer_to_ticks(AD_ACTOR_CHURN_TIMER, 0);
- port->sm_churn_partner_timer_counter =
+ port->sm_churn_partner_timer_counter =
__ad_timer_to_ticks(AD_PARTNER_CHURN_TIMER, 0);
return;
}
@@ -2128,7 +2128,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
if ((new_aggregator->lag_ports == port) &&
new_aggregator->is_active) {
netdev_info(bond->dev, "Removing an active aggregator\n");
- select_new_active_agg = 1;
+ select_new_active_agg = 1;
}
new_aggregator->is_individual = aggregator->is_individual;
diff --git a/drivers/net/dsa/microchip/Kconfig b/drivers/net/dsa/microchip/Kconfig
index a8caf9249d50..bea29fde9f3d 100644
--- a/drivers/net/dsa/microchip/Kconfig
+++ b/drivers/net/dsa/microchip/Kconfig
@@ -4,7 +4,7 @@ config NET_DSA_MICROCHIP_KSZ_COMMON
menuconfig NET_DSA_MICROCHIP_KSZ9477
tristate "Microchip KSZ9477 series switch support"
depends on NET_DSA
- select NET_DSA_TAG_KSZ
+ select NET_DSA_TAG_KSZ9477
select NET_DSA_MICROCHIP_KSZ_COMMON
help
This driver adds support for Microchip KSZ9477 switch chips.
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 0684657fbf9a..57a146a0dd4a 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -262,7 +262,7 @@ static int ksz9477_reset_switch(struct ksz_device *dev)
static enum dsa_tag_protocol ksz9477_get_tag_protocol(struct dsa_switch *ds,
int port)
{
- return DSA_TAG_PROTO_KSZ;
+ return DSA_TAG_PROTO_KSZ9477;
}
static int ksz9477_phy_read16(struct dsa_switch *ds, int addr, int reg)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 8a2e9cdd38ee..218a6dff3efc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -118,6 +118,7 @@ enum board_idx {
NETXTREME_E_VF,
NETXTREME_C_VF,
NETXTREME_S_VF,
+ NETXTREME_E_P5_VF,
};
/* indexed by enum above */
@@ -160,6 +161,7 @@ static const struct {
[NETXTREME_E_VF] = { "Broadcom NetXtreme-E Ethernet Virtual Function" },
[NETXTREME_C_VF] = { "Broadcom NetXtreme-C Ethernet Virtual Function" },
[NETXTREME_S_VF] = { "Broadcom NetXtreme-S Ethernet Virtual Function" },
+ [NETXTREME_E_P5_VF] = { "Broadcom BCM5750X NetXtreme-E Ethernet Virtual Function" },
};
static const struct pci_device_id bnxt_pci_tbl[] = {
@@ -210,6 +212,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = {
{ PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF },
{ PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF },
{ PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF },
+ { PCI_VDEVICE(BROADCOM, 0x1807), .driver_data = NETXTREME_E_P5_VF },
{ PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF },
#endif
{ 0 }
@@ -237,7 +240,7 @@ static struct workqueue_struct *bnxt_pf_wq;
static bool bnxt_vf_pciid(enum board_idx idx)
{
return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF ||
- idx == NETXTREME_S_VF);
+ idx == NETXTREME_S_VF || idx == NETXTREME_E_P5_VF);
}
#define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID)
@@ -3317,9 +3320,8 @@ static int bnxt_alloc_hwrm_short_cmd_req(struct bnxt *bp)
return 0;
}
-static void bnxt_free_stats(struct bnxt *bp)
+static void bnxt_free_port_stats(struct bnxt *bp)
{
- u32 size, i;
struct pci_dev *pdev = bp->pdev;
bp->flags &= ~BNXT_FLAG_PORT_STATS;
@@ -3345,6 +3347,12 @@ static void bnxt_free_stats(struct bnxt *bp)
bp->hw_rx_port_stats_ext_map);
bp->hw_rx_port_stats_ext = NULL;
}
+}
+
+static void bnxt_free_ring_stats(struct bnxt *bp)
+{
+ struct pci_dev *pdev = bp->pdev;
+ int size, i;
if (!bp->bnapi)
return;
@@ -3384,6 +3392,9 @@ static int bnxt_alloc_stats(struct bnxt *bp)
}
if (BNXT_PF(bp) && bp->chip_num != CHIP_NUM_58700) {
+ if (bp->hw_rx_port_stats)
+ goto alloc_ext_stats;
+
bp->hw_port_stats_size = sizeof(struct rx_port_stats) +
sizeof(struct tx_port_stats) + 1024;
@@ -3400,11 +3411,15 @@ static int bnxt_alloc_stats(struct bnxt *bp)
sizeof(struct rx_port_stats) + 512;
bp->flags |= BNXT_FLAG_PORT_STATS;
+alloc_ext_stats:
/* Display extended statistics only if FW supports it */
if (bp->hwrm_spec_code < 0x10804 ||
bp->hwrm_spec_code == 0x10900)
return 0;
+ if (bp->hw_rx_port_stats_ext)
+ goto alloc_tx_ext_stats;
+
bp->hw_rx_port_stats_ext =
dma_zalloc_coherent(&pdev->dev,
sizeof(struct rx_port_stats_ext),
@@ -3413,6 +3428,10 @@ static int bnxt_alloc_stats(struct bnxt *bp)
if (!bp->hw_rx_port_stats_ext)
return 0;
+alloc_tx_ext_stats:
+ if (bp->hw_tx_port_stats_ext)
+ return 0;
+
if (bp->hwrm_spec_code >= 0x10902) {
bp->hw_tx_port_stats_ext =
dma_zalloc_coherent(&pdev->dev,
@@ -3520,7 +3539,7 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init)
bnxt_free_cp_rings(bp);
bnxt_free_ntp_fltrs(bp, irq_re_init);
if (irq_re_init) {
- bnxt_free_stats(bp);
+ bnxt_free_ring_stats(bp);
bnxt_free_ring_grps(bp);
bnxt_free_vnics(bp);
kfree(bp->tx_ring_map);
@@ -5161,7 +5180,6 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
hw_resc->resv_vnics = le16_to_cpu(resp->alloc_vnics);
cp = le16_to_cpu(resp->alloc_cmpl_rings);
stats = le16_to_cpu(resp->alloc_stat_ctx);
- cp = min_t(u16, cp, stats);
hw_resc->resv_irqs = cp;
if (bp->flags & BNXT_FLAG_CHIP_P5) {
int rx = hw_resc->resv_rx_rings;
@@ -5180,6 +5198,7 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
hw_resc->resv_hw_ring_grps = rx;
}
hw_resc->resv_cp_rings = cp;
+ hw_resc->resv_stat_ctxs = stats;
}
mutex_unlock(&bp->hwrm_cmd_lock);
return 0;
@@ -5209,7 +5228,7 @@ static bool bnxt_rfs_supported(struct bnxt *bp);
static void
__bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
int tx_rings, int rx_rings, int ring_grps,
- int cp_rings, int vnics)
+ int cp_rings, int stats, int vnics)
{
u32 enables = 0;
@@ -5251,7 +5270,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
req->num_rsscos_ctxs =
cpu_to_le16(ring_grps + 1);
}
- req->num_stat_ctxs = req->num_cmpl_rings;
+ req->num_stat_ctxs = cpu_to_le16(stats);
req->num_vnics = cpu_to_le16(vnics);
}
req->enables = cpu_to_le32(enables);
@@ -5261,7 +5280,7 @@ static void
__bnxt_hwrm_reserve_vf_rings(struct bnxt *bp,
struct hwrm_func_vf_cfg_input *req, int tx_rings,
int rx_rings, int ring_grps, int cp_rings,
- int vnics)
+ int stats, int vnics)
{
u32 enables = 0;
@@ -5294,7 +5313,7 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp,
req->num_hw_ring_grps = cpu_to_le16(ring_grps);
req->num_rsscos_ctxs = cpu_to_le16(BNXT_VF_MAX_RSS_CTX);
}
- req->num_stat_ctxs = req->num_cmpl_rings;
+ req->num_stat_ctxs = cpu_to_le16(stats);
req->num_vnics = cpu_to_le16(vnics);
req->enables = cpu_to_le32(enables);
@@ -5302,13 +5321,13 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp,
static int
bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
- int ring_grps, int cp_rings, int vnics)
+ int ring_grps, int cp_rings, int stats, int vnics)
{
struct hwrm_func_cfg_input req = {0};
int rc;
__bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
- cp_rings, vnics);
+ cp_rings, stats, vnics);
if (!req.enables)
return 0;
@@ -5325,7 +5344,7 @@ bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
static int
bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
- int ring_grps, int cp_rings, int vnics)
+ int ring_grps, int cp_rings, int stats, int vnics)
{
struct hwrm_func_vf_cfg_input req = {0};
int rc;
@@ -5336,7 +5355,7 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
}
__bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
- cp_rings, vnics);
+ cp_rings, stats, vnics);
rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
return -ENOMEM;
@@ -5346,15 +5365,17 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
}
static int bnxt_hwrm_reserve_rings(struct bnxt *bp, int tx, int rx, int grp,
- int cp, int vnic)
+ int cp, int stat, int vnic)
{
if (BNXT_PF(bp))
- return bnxt_hwrm_reserve_pf_rings(bp, tx, rx, grp, cp, vnic);
+ return bnxt_hwrm_reserve_pf_rings(bp, tx, rx, grp, cp, stat,
+ vnic);
else
- return bnxt_hwrm_reserve_vf_rings(bp, tx, rx, grp, cp, vnic);
+ return bnxt_hwrm_reserve_vf_rings(bp, tx, rx, grp, cp, stat,
+ vnic);
}
-static int bnxt_nq_rings_in_use(struct bnxt *bp)
+int bnxt_nq_rings_in_use(struct bnxt *bp)
{
int cp = bp->cp_nr_rings;
int ulp_msix, ulp_base;
@@ -5380,12 +5401,17 @@ static int bnxt_cp_rings_in_use(struct bnxt *bp)
return cp;
}
+static int bnxt_get_func_stat_ctxs(struct bnxt *bp)
+{
+ return bp->cp_nr_rings + bnxt_get_ulp_stat_ctxs(bp);
+}
+
static bool bnxt_need_reserve_rings(struct bnxt *bp)
{
struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
int cp = bnxt_cp_rings_in_use(bp);
int nq = bnxt_nq_rings_in_use(bp);
- int rx = bp->rx_nr_rings;
+ int rx = bp->rx_nr_rings, stat;
int vnic = 1, grp = rx;
if (bp->hwrm_spec_code < 0x10601)
@@ -5398,9 +5424,11 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp)
vnic = rx + 1;
if (bp->flags & BNXT_FLAG_AGG_RINGS)
rx <<= 1;
+ stat = bnxt_get_func_stat_ctxs(bp);
if (BNXT_NEW_RM(bp) &&
(hw_resc->resv_rx_rings != rx || hw_resc->resv_cp_rings != cp ||
hw_resc->resv_irqs < nq || hw_resc->resv_vnics != vnic ||
+ hw_resc->resv_stat_ctxs != stat ||
(hw_resc->resv_hw_ring_grps != grp &&
!(bp->flags & BNXT_FLAG_CHIP_P5))))
return true;
@@ -5414,8 +5442,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
int tx = bp->tx_nr_rings;
int rx = bp->rx_nr_rings;
int grp, rx_rings, rc;
+ int vnic = 1, stat;
bool sh = false;
- int vnic = 1;
if (!bnxt_need_reserve_rings(bp))
return 0;
@@ -5427,8 +5455,9 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
if (bp->flags & BNXT_FLAG_AGG_RINGS)
rx <<= 1;
grp = bp->rx_nr_rings;
+ stat = bnxt_get_func_stat_ctxs(bp);
- rc = bnxt_hwrm_reserve_rings(bp, tx, rx, grp, cp, vnic);
+ rc = bnxt_hwrm_reserve_rings(bp, tx, rx, grp, cp, stat, vnic);
if (rc)
return rc;
@@ -5438,6 +5467,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
cp = hw_resc->resv_irqs;
grp = hw_resc->resv_hw_ring_grps;
vnic = hw_resc->resv_vnics;
+ stat = hw_resc->resv_stat_ctxs;
}
rx_rings = rx;
@@ -5456,6 +5486,10 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
}
}
rx_rings = min_t(int, rx_rings, grp);
+ cp = min_t(int, cp, bp->cp_nr_rings);
+ if (stat > bnxt_get_ulp_stat_ctxs(bp))
+ stat -= bnxt_get_ulp_stat_ctxs(bp);
+ cp = min_t(int, cp, stat);
rc = bnxt_trim_rings(bp, &rx_rings, &tx, cp, sh);
if (bp->flags & BNXT_FLAG_AGG_RINGS)
rx = rx_rings << 1;
@@ -5464,14 +5498,15 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
bp->rx_nr_rings = rx_rings;
bp->cp_nr_rings = cp;
- if (!tx || !rx || !cp || !grp || !vnic)
+ if (!tx || !rx || !cp || !grp || !vnic || !stat)
return -ENOMEM;
return rc;
}
static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
- int ring_grps, int cp_rings, int vnics)
+ int ring_grps, int cp_rings, int stats,
+ int vnics)
{
struct hwrm_func_vf_cfg_input req = {0};
u32 flags;
@@ -5481,7 +5516,7 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
return 0;
__bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
- cp_rings, vnics);
+ cp_rings, stats, vnics);
flags = FUNC_VF_CFG_REQ_FLAGS_TX_ASSETS_TEST |
FUNC_VF_CFG_REQ_FLAGS_RX_ASSETS_TEST |
FUNC_VF_CFG_REQ_FLAGS_CMPL_ASSETS_TEST |
@@ -5499,14 +5534,15 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
}
static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
- int ring_grps, int cp_rings, int vnics)
+ int ring_grps, int cp_rings, int stats,
+ int vnics)
{
struct hwrm_func_cfg_input req = {0};
u32 flags;
int rc;
__bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
- cp_rings, vnics);
+ cp_rings, stats, vnics);
flags = FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST;
if (BNXT_NEW_RM(bp)) {
flags |= FUNC_CFG_REQ_FLAGS_RX_ASSETS_TEST |
@@ -5527,17 +5563,19 @@ static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
}
static int bnxt_hwrm_check_rings(struct bnxt *bp, int tx_rings, int rx_rings,
- int ring_grps, int cp_rings, int vnics)
+ int ring_grps, int cp_rings, int stats,
+ int vnics)
{
if (bp->hwrm_spec_code < 0x10801)
return 0;
if (BNXT_PF(bp))
return bnxt_hwrm_check_pf_rings(bp, tx_rings, rx_rings,
- ring_grps, cp_rings, vnics);
+ ring_grps, cp_rings, stats,
+ vnics);
return bnxt_hwrm_check_vf_rings(bp, tx_rings, rx_rings, ring_grps,
- cp_rings, vnics);
+ cp_rings, stats, vnics);
}
static void bnxt_hwrm_coal_params_qcaps(struct bnxt *bp)
@@ -6221,7 +6259,7 @@ int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all)
if (bp->flags & BNXT_FLAG_CHIP_P5) {
u16 max_msix = le16_to_cpu(resp->max_msix);
- hw_resc->max_irqs = min_t(u16, hw_resc->max_irqs, max_msix);
+ hw_resc->max_nqs = max_msix;
hw_resc->max_hw_ring_grps = hw_resc->max_rx_rings;
}
@@ -6489,6 +6527,7 @@ static int bnxt_hwrm_port_qstats(struct bnxt *bp)
static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp)
{
struct hwrm_port_qstats_ext_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_queue_pri2cos_qcfg_input req2 = {0};
struct hwrm_port_qstats_ext_input req = {0};
struct bnxt_pf_info *pf = &bp->pf;
int rc;
@@ -6511,6 +6550,34 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp)
bp->fw_rx_stats_ext_size = 0;
bp->fw_tx_stats_ext_size = 0;
}
+ if (bp->fw_tx_stats_ext_size <=
+ offsetof(struct tx_port_stats_ext, pfc_pri0_tx_duration_us) / 8) {
+ mutex_unlock(&bp->hwrm_cmd_lock);
+ bp->pri2cos_valid = 0;
+ return rc;
+ }
+
+ bnxt_hwrm_cmd_hdr_init(bp, &req2, HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
+ req2.flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN);
+
+ rc = _hwrm_send_message(bp, &req2, sizeof(req2), HWRM_CMD_TIMEOUT);
+ if (!rc) {
+ struct hwrm_queue_pri2cos_qcfg_output *resp2;
+ u8 *pri2cos;
+ int i, j;
+
+ resp2 = bp->hwrm_cmd_resp_addr;
+ pri2cos = &resp2->pri0_cos_queue_id;
+ for (i = 0; i < 8; i++) {
+ u8 queue_id = pri2cos[i];
+
+ for (j = 0; j < bp->max_q; j++) {
+ if (bp->q_ids[j] == queue_id)
+ bp->pri2cos[i] = j;
+ }
+ }
+ bp->pri2cos_valid = 1;
+ }
mutex_unlock(&bp->hwrm_cmd_lock);
return rc;
}
@@ -7035,17 +7102,12 @@ unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp)
return bp->hw_resc.max_stat_ctxs;
}
-void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max)
-{
- bp->hw_resc.max_stat_ctxs = max;
-}
-
unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp)
{
return bp->hw_resc.max_cp_rings;
}
-unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp)
+static unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp)
{
unsigned int cp = bp->hw_resc.max_cp_rings;
@@ -7059,6 +7121,9 @@ static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp)
{
struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+ if (bp->flags & BNXT_FLAG_CHIP_P5)
+ return min_t(unsigned int, hw_resc->max_irqs, hw_resc->max_nqs);
+
return min_t(unsigned int, hw_resc->max_irqs, hw_resc->max_cp_rings);
}
@@ -7067,6 +7132,26 @@ static void bnxt_set_max_func_irqs(struct bnxt *bp, unsigned int max_irqs)
bp->hw_resc.max_irqs = max_irqs;
}
+unsigned int bnxt_get_avail_cp_rings_for_en(struct bnxt *bp)
+{
+ unsigned int cp;
+
+ cp = bnxt_get_max_func_cp_rings_for_en(bp);
+ if (bp->flags & BNXT_FLAG_CHIP_P5)
+ return cp - bp->rx_nr_rings - bp->tx_nr_rings;
+ else
+ return cp - bp->cp_nr_rings;
+}
+
+unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp)
+{
+ unsigned int stat;
+
+ stat = bnxt_get_max_func_stat_ctxs(bp) - bnxt_get_ulp_stat_ctxs(bp);
+ stat -= bp->cp_nr_rings;
+ return stat;
+}
+
int bnxt_get_avail_msix(struct bnxt *bp, int num)
{
int max_cp = bnxt_get_max_func_cp_rings(bp);
@@ -7204,23 +7289,26 @@ static void bnxt_clear_int_mode(struct bnxt *bp)
int bnxt_reserve_rings(struct bnxt *bp)
{
int tcs = netdev_get_num_tc(bp->dev);
+ bool reinit_irq = false;
int rc;
if (!bnxt_need_reserve_rings(bp))
return 0;
- rc = __bnxt_reserve_rings(bp);
- if (rc) {
- netdev_err(bp->dev, "ring reservation failure rc: %d\n", rc);
- return rc;
- }
if (BNXT_NEW_RM(bp) && (bnxt_get_num_msix(bp) != bp->total_irqs)) {
bnxt_ulp_irq_stop(bp);
bnxt_clear_int_mode(bp);
- rc = bnxt_init_int_mode(bp);
+ reinit_irq = true;
+ }
+ rc = __bnxt_reserve_rings(bp);
+ if (reinit_irq) {
+ if (!rc)
+ rc = bnxt_init_int_mode(bp);
bnxt_ulp_irq_restart(bp, rc);
- if (rc)
- return rc;
+ }
+ if (rc) {
+ netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc);
+ return rc;
}
if (tcs && (bp->tx_nr_rings_per_tc * tcs != bp->tx_nr_rings)) {
netdev_err(bp->dev, "tx ring reservation failure\n");
@@ -7228,7 +7316,6 @@ int bnxt_reserve_rings(struct bnxt *bp)
bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
return -ENOMEM;
}
- bp->num_stat_ctxs = bp->cp_nr_rings;
return 0;
}
@@ -7822,6 +7909,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
rc = bnxt_hwrm_func_resc_qcaps(bp, true);
hw_resc->resv_cp_rings = 0;
+ hw_resc->resv_stat_ctxs = 0;
hw_resc->resv_irqs = 0;
hw_resc->resv_tx_rings = 0;
hw_resc->resv_rx_rings = 0;
@@ -8261,6 +8349,9 @@ static bool bnxt_drv_busy(struct bnxt *bp)
test_bit(BNXT_STATE_READ_STATS, &bp->state));
}
+static void bnxt_get_ring_stats(struct bnxt *bp,
+ struct rtnl_link_stats64 *stats);
+
static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init,
bool link_re_init)
{
@@ -8286,6 +8377,9 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init,
del_timer_sync(&bp->timer);
bnxt_free_skbs(bp);
+ /* Save ring stats before shutdown */
+ if (bp->bnapi)
+ bnxt_get_ring_stats(bp, &bp->net_stats_prev);
if (irq_re_init) {
bnxt_free_irq(bp);
bnxt_del_napi(bp);
@@ -8347,23 +8441,12 @@ static int bnxt_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EOPNOTSUPP;
}
-static void
-bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+static void bnxt_get_ring_stats(struct bnxt *bp,
+ struct rtnl_link_stats64 *stats)
{
- u32 i;
- struct bnxt *bp = netdev_priv(dev);
+ int i;
- set_bit(BNXT_STATE_READ_STATS, &bp->state);
- /* Make sure bnxt_close_nic() sees that we are reading stats before
- * we check the BNXT_STATE_OPEN flag.
- */
- smp_mb__after_atomic();
- if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
- clear_bit(BNXT_STATE_READ_STATS, &bp->state);
- return;
- }
- /* TODO check if we need to synchronize with bnxt_close path */
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
@@ -8392,6 +8475,40 @@ bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
stats->tx_dropped += le64_to_cpu(hw_stats->tx_drop_pkts);
}
+}
+
+static void bnxt_add_prev_stats(struct bnxt *bp,
+ struct rtnl_link_stats64 *stats)
+{
+ struct rtnl_link_stats64 *prev_stats = &bp->net_stats_prev;
+
+ stats->rx_packets += prev_stats->rx_packets;
+ stats->tx_packets += prev_stats->tx_packets;
+ stats->rx_bytes += prev_stats->rx_bytes;
+ stats->tx_bytes += prev_stats->tx_bytes;
+ stats->rx_missed_errors += prev_stats->rx_missed_errors;
+ stats->multicast += prev_stats->multicast;
+ stats->tx_dropped += prev_stats->tx_dropped;
+}
+
+static void
+bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ struct bnxt *bp = netdev_priv(dev);
+
+ set_bit(BNXT_STATE_READ_STATS, &bp->state);
+ /* Make sure bnxt_close_nic() sees that we are reading stats before
+ * we check the BNXT_STATE_OPEN flag.
+ */
+ smp_mb__after_atomic();
+ if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
+ clear_bit(BNXT_STATE_READ_STATS, &bp->state);
+ *stats = bp->net_stats_prev;
+ return;
+ }
+
+ bnxt_get_ring_stats(bp, stats);
+ bnxt_add_prev_stats(bp, stats);
if (bp->flags & BNXT_FLAG_PORT_STATS) {
struct rx_port_stats *rx = bp->hw_rx_port_stats;
@@ -8627,12 +8744,12 @@ static bool bnxt_rfs_capable(struct bnxt *bp)
if (vnics == bp->hw_resc.resv_vnics)
return true;
- bnxt_hwrm_reserve_rings(bp, 0, 0, 0, 0, vnics);
+ bnxt_hwrm_reserve_rings(bp, 0, 0, 0, 0, 0, vnics);
if (vnics <= bp->hw_resc.resv_vnics)
return true;
netdev_warn(bp->dev, "Unable to reserve resources to support NTUPLE filters.\n");
- bnxt_hwrm_reserve_rings(bp, 0, 0, 0, 0, 1);
+ bnxt_hwrm_reserve_rings(bp, 0, 0, 0, 0, 0, 1);
return false;
#else
return false;
@@ -9043,7 +9160,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
int tx_xdp)
{
int max_rx, max_tx, tx_sets = 1;
- int tx_rings_needed;
+ int tx_rings_needed, stats;
int rx_rings = rx;
int cp, vnics, rc;
@@ -9068,10 +9185,13 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
if (bp->flags & BNXT_FLAG_AGG_RINGS)
rx_rings <<= 1;
cp = sh ? max_t(int, tx_rings_needed, rx) : tx_rings_needed + rx;
- if (BNXT_NEW_RM(bp))
+ stats = cp;
+ if (BNXT_NEW_RM(bp)) {
cp += bnxt_get_ulp_msix_num(bp);
+ stats += bnxt_get_ulp_stat_ctxs(bp);
+ }
return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp,
- vnics);
+ stats, vnics);
}
static void bnxt_unmap_bars(struct bnxt *bp, struct pci_dev *pdev)
@@ -9295,7 +9415,6 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
bp->tx_nr_rings += bp->tx_nr_rings_xdp;
bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
bp->tx_nr_rings + bp->rx_nr_rings;
- bp->num_stat_ctxs = bp->cp_nr_rings;
if (netif_running(bp->dev))
return bnxt_open_nic(bp, true, false);
@@ -9761,6 +9880,7 @@ static void bnxt_remove_one(struct pci_dev *pdev)
kfree(bp->ctx);
bp->ctx = NULL;
bnxt_cleanup_pci(bp);
+ bnxt_free_port_stats(bp);
free_netdev(dev);
}
@@ -9835,7 +9955,7 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
*max_cp = bnxt_get_max_func_cp_rings_for_en(bp);
max_irq = min_t(int, bnxt_get_max_func_irqs(bp) -
bnxt_get_ulp_msix_num(bp),
- bnxt_get_max_func_stat_ctxs(bp));
+ hw_resc->max_stat_ctxs - bnxt_get_ulp_stat_ctxs(bp));
if (!(bp->flags & BNXT_FLAG_CHIP_P5))
*max_cp = min_t(int, *max_cp, max_irq);
max_ring_grps = hw_resc->max_hw_ring_grps;
@@ -9966,7 +10086,6 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
netdev_warn(bp->dev, "2nd rings reservation failed.\n");
bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
}
- bp->num_stat_ctxs = bp->cp_nr_rings;
if (BNXT_CHIP_TYPE_NITRO_A0(bp)) {
bp->rx_nr_rings++;
bp->cp_nr_rings++;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 3030931ccaf8..4fdfd7a87805 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -927,6 +927,8 @@ struct bnxt_hw_resc {
u16 resv_vnics;
u16 min_stat_ctxs;
u16 max_stat_ctxs;
+ u16 resv_stat_ctxs;
+ u16 max_nqs;
u16 max_irqs;
u16 resv_irqs;
};
@@ -1416,8 +1418,6 @@ struct bnxt {
int cp_nr_pages;
int cp_nr_rings;
- int num_stat_ctxs;
-
/* grp_info indexed by completion ring index */
struct bnxt_ring_grp_info *grp_info;
struct bnxt_vnic_info *vnic_info;
@@ -1472,6 +1472,7 @@ struct bnxt {
void *hwrm_cmd_resp_addr;
dma_addr_t hwrm_cmd_resp_dma_addr;
+ struct rtnl_link_stats64 net_stats_prev;
struct rx_port_stats *hw_rx_port_stats;
struct tx_port_stats *hw_tx_port_stats;
struct rx_port_stats_ext *hw_rx_port_stats_ext;
@@ -1483,6 +1484,8 @@ struct bnxt {
int hw_port_stats_size;
u16 fw_rx_stats_ext_size;
u16 fw_tx_stats_ext_size;
+ u8 pri2cos[8];
+ u8 pri2cos_valid;
u16 hwrm_max_req_len;
u16 hwrm_max_ext_req_len;
@@ -1686,11 +1689,12 @@ int bnxt_hwrm_func_rgtr_async_events(struct bnxt *bp, unsigned long *bmap,
int bmap_size);
int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id);
int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings);
+int bnxt_nq_rings_in_use(struct bnxt *bp);
int bnxt_hwrm_set_coal(struct bnxt *);
unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp);
-void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max);
+unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp);
unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp);
-unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp);
+unsigned int bnxt_get_avail_cp_rings_for_en(struct bnxt *bp);
int bnxt_get_avail_msix(struct bnxt *bp, int num);
int bnxt_reserve_rings(struct bnxt *bp);
void bnxt_tx_disable(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index a85d2be986af..15c7041e937b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -471,7 +471,10 @@ static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc)
if (total_ets_bw > 100)
return -EINVAL;
- *tc = max_tc + 1;
+ if (max_tc >= bp->max_tc)
+ *tc = bp->max_tc;
+ else
+ *tc = max_tc + 1;
return 0;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 6cc69a58478a..997775777dbe 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -207,6 +207,34 @@ reset_coalesce:
BNXT_TX_STATS_EXT_COS_ENTRY(6), \
BNXT_TX_STATS_EXT_COS_ENTRY(7) \
+#define BNXT_RX_STATS_PRI_ENTRY(counter, n) \
+ { BNXT_RX_STATS_EXT_OFFSET(counter##_cos0), \
+ __stringify(counter##_pri##n) }
+
+#define BNXT_TX_STATS_PRI_ENTRY(counter, n) \
+ { BNXT_TX_STATS_EXT_OFFSET(counter##_cos0), \
+ __stringify(counter##_pri##n) }
+
+#define BNXT_RX_STATS_PRI_ENTRIES(counter) \
+ BNXT_RX_STATS_PRI_ENTRY(counter, 0), \
+ BNXT_RX_STATS_PRI_ENTRY(counter, 1), \
+ BNXT_RX_STATS_PRI_ENTRY(counter, 2), \
+ BNXT_RX_STATS_PRI_ENTRY(counter, 3), \
+ BNXT_RX_STATS_PRI_ENTRY(counter, 4), \
+ BNXT_RX_STATS_PRI_ENTRY(counter, 5), \
+ BNXT_RX_STATS_PRI_ENTRY(counter, 6), \
+ BNXT_RX_STATS_PRI_ENTRY(counter, 7)
+
+#define BNXT_TX_STATS_PRI_ENTRIES(counter) \
+ BNXT_TX_STATS_PRI_ENTRY(counter, 0), \
+ BNXT_TX_STATS_PRI_ENTRY(counter, 1), \
+ BNXT_TX_STATS_PRI_ENTRY(counter, 2), \
+ BNXT_TX_STATS_PRI_ENTRY(counter, 3), \
+ BNXT_TX_STATS_PRI_ENTRY(counter, 4), \
+ BNXT_TX_STATS_PRI_ENTRY(counter, 5), \
+ BNXT_TX_STATS_PRI_ENTRY(counter, 6), \
+ BNXT_TX_STATS_PRI_ENTRY(counter, 7)
+
enum {
RX_TOTAL_DISCARDS,
TX_TOTAL_DISCARDS,
@@ -327,8 +355,41 @@ static const struct {
BNXT_TX_STATS_EXT_PFC_ENTRIES,
};
+static const struct {
+ long base_off;
+ char string[ETH_GSTRING_LEN];
+} bnxt_rx_bytes_pri_arr[] = {
+ BNXT_RX_STATS_PRI_ENTRIES(rx_bytes),
+};
+
+static const struct {
+ long base_off;
+ char string[ETH_GSTRING_LEN];
+} bnxt_rx_pkts_pri_arr[] = {
+ BNXT_RX_STATS_PRI_ENTRIES(rx_packets),
+};
+
+static const struct {
+ long base_off;
+ char string[ETH_GSTRING_LEN];
+} bnxt_tx_bytes_pri_arr[] = {
+ BNXT_TX_STATS_PRI_ENTRIES(tx_bytes),
+};
+
+static const struct {
+ long base_off;
+ char string[ETH_GSTRING_LEN];
+} bnxt_tx_pkts_pri_arr[] = {
+ BNXT_TX_STATS_PRI_ENTRIES(tx_packets),
+};
+
#define BNXT_NUM_SW_FUNC_STATS ARRAY_SIZE(bnxt_sw_func_stats)
#define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr)
+#define BNXT_NUM_STATS_PRI \
+ (ARRAY_SIZE(bnxt_rx_bytes_pri_arr) + \
+ ARRAY_SIZE(bnxt_rx_pkts_pri_arr) + \
+ ARRAY_SIZE(bnxt_tx_bytes_pri_arr) + \
+ ARRAY_SIZE(bnxt_tx_pkts_pri_arr))
static int bnxt_get_num_stats(struct bnxt *bp)
{
@@ -339,9 +400,12 @@ static int bnxt_get_num_stats(struct bnxt *bp)
if (bp->flags & BNXT_FLAG_PORT_STATS)
num_stats += BNXT_NUM_PORT_STATS;
- if (bp->flags & BNXT_FLAG_PORT_STATS_EXT)
+ if (bp->flags & BNXT_FLAG_PORT_STATS_EXT) {
num_stats += bp->fw_rx_stats_ext_size +
bp->fw_tx_stats_ext_size;
+ if (bp->pri2cos_valid)
+ num_stats += BNXT_NUM_STATS_PRI;
+ }
return num_stats;
}
@@ -369,8 +433,10 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
struct bnxt *bp = netdev_priv(dev);
u32 stat_fields = sizeof(struct ctx_hw_stats) / 8;
- if (!bp->bnapi)
- return;
+ if (!bp->bnapi) {
+ j += BNXT_NUM_STATS * bp->cp_nr_rings + BNXT_NUM_SW_FUNC_STATS;
+ goto skip_ring_stats;
+ }
for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++)
bnxt_sw_func_stats[i].counter = 0;
@@ -395,6 +461,7 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++, j++)
buf[j] = bnxt_sw_func_stats[i].counter;
+skip_ring_stats:
if (bp->flags & BNXT_FLAG_PORT_STATS) {
__le64 *port_stats = (__le64 *)bp->hw_rx_port_stats;
@@ -415,6 +482,32 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
buf[j] = le64_to_cpu(*(tx_port_stats_ext +
bnxt_tx_port_stats_ext_arr[i].offset));
}
+ if (bp->pri2cos_valid) {
+ for (i = 0; i < 8; i++, j++) {
+ long n = bnxt_rx_bytes_pri_arr[i].base_off +
+ bp->pri2cos[i];
+
+ buf[j] = le64_to_cpu(*(rx_port_stats_ext + n));
+ }
+ for (i = 0; i < 8; i++, j++) {
+ long n = bnxt_rx_pkts_pri_arr[i].base_off +
+ bp->pri2cos[i];
+
+ buf[j] = le64_to_cpu(*(rx_port_stats_ext + n));
+ }
+ for (i = 0; i < 8; i++, j++) {
+ long n = bnxt_tx_bytes_pri_arr[i].base_off +
+ bp->pri2cos[i];
+
+ buf[j] = le64_to_cpu(*(tx_port_stats_ext + n));
+ }
+ for (i = 0; i < 8; i++, j++) {
+ long n = bnxt_tx_pkts_pri_arr[i].base_off +
+ bp->pri2cos[i];
+
+ buf[j] = le64_to_cpu(*(tx_port_stats_ext + n));
+ }
+ }
}
}
@@ -493,6 +586,28 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
bnxt_tx_port_stats_ext_arr[i].string);
buf += ETH_GSTRING_LEN;
}
+ if (bp->pri2cos_valid) {
+ for (i = 0; i < 8; i++) {
+ strcpy(buf,
+ bnxt_rx_bytes_pri_arr[i].string);
+ buf += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < 8; i++) {
+ strcpy(buf,
+ bnxt_rx_pkts_pri_arr[i].string);
+ buf += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < 8; i++) {
+ strcpy(buf,
+ bnxt_tx_bytes_pri_arr[i].string);
+ buf += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < 8; i++) {
+ strcpy(buf,
+ bnxt_tx_pkts_pri_arr[i].string);
+ buf += ETH_GSTRING_LEN;
+ }
+ }
}
break;
case ETH_SS_TEST:
@@ -663,8 +778,6 @@ static int bnxt_set_channels(struct net_device *dev,
bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
bp->tx_nr_rings + bp->rx_nr_rings;
- bp->num_stat_ctxs = bp->cp_nr_rings;
-
/* After changing number of rx channels, update NTUPLE feature. */
netdev_update_features(dev);
if (netif_running(dev)) {
@@ -1526,14 +1639,22 @@ static int bnxt_flash_nvram(struct net_device *dev,
rc = hwrm_send_message(bp, &req, sizeof(req), FLASH_NVRAM_TIMEOUT);
dma_free_coherent(&bp->pdev->dev, data_len, kmem, dma_handle);
+ if (rc == HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED) {
+ netdev_info(dev,
+ "PF does not have admin privileges to flash the device\n");
+ rc = -EACCES;
+ } else if (rc) {
+ rc = -EIO;
+ }
return rc;
}
static int bnxt_firmware_reset(struct net_device *dev,
u16 dir_type)
{
- struct bnxt *bp = netdev_priv(dev);
struct hwrm_fw_reset_input req = {0};
+ struct bnxt *bp = netdev_priv(dev);
+ int rc;
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1);
@@ -1573,7 +1694,15 @@ static int bnxt_firmware_reset(struct net_device *dev,
return -EINVAL;
}
- return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ if (rc == HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED) {
+ netdev_info(dev,
+ "PF does not have admin privileges to reset the device\n");
+ rc = -EACCES;
+ } else if (rc) {
+ rc = -EIO;
+ }
+ return rc;
}
static int bnxt_flash_firmware(struct net_device *dev,
@@ -1780,9 +1909,9 @@ static int bnxt_flash_package_from_file(struct net_device *dev,
struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr;
struct hwrm_nvm_install_update_input install = {0};
const struct firmware *fw;
+ int rc, hwrm_err = 0;
u32 item_len;
u16 index;
- int rc;
bnxt_hwrm_fw_set_time(bp);
@@ -1825,15 +1954,16 @@ static int bnxt_flash_package_from_file(struct net_device *dev,
memcpy(kmem, fw->data, fw->size);
modify.host_src_addr = cpu_to_le64(dma_handle);
- rc = hwrm_send_message(bp, &modify, sizeof(modify),
- FLASH_PACKAGE_TIMEOUT);
+ hwrm_err = hwrm_send_message(bp, &modify,
+ sizeof(modify),
+ FLASH_PACKAGE_TIMEOUT);
dma_free_coherent(&bp->pdev->dev, fw->size, kmem,
dma_handle);
}
}
release_firmware(fw);
- if (rc)
- return rc;
+ if (rc || hwrm_err)
+ goto err_exit;
if ((install_type & 0xffff) == 0)
install_type >>= 16;
@@ -1841,12 +1971,10 @@ static int bnxt_flash_package_from_file(struct net_device *dev,
install.install_type = cpu_to_le32(install_type);
mutex_lock(&bp->hwrm_cmd_lock);
- rc = _hwrm_send_message(bp, &install, sizeof(install),
- INSTALL_PACKAGE_TIMEOUT);
- if (rc) {
- rc = -EOPNOTSUPP;
+ hwrm_err = _hwrm_send_message(bp, &install, sizeof(install),
+ INSTALL_PACKAGE_TIMEOUT);
+ if (hwrm_err)
goto flash_pkg_exit;
- }
if (resp->error_code) {
u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err;
@@ -1854,12 +1982,11 @@ static int bnxt_flash_package_from_file(struct net_device *dev,
if (error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
install.flags |= cpu_to_le16(
NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG);
- rc = _hwrm_send_message(bp, &install, sizeof(install),
- INSTALL_PACKAGE_TIMEOUT);
- if (rc) {
- rc = -EOPNOTSUPP;
+ hwrm_err = _hwrm_send_message(bp, &install,
+ sizeof(install),
+ INSTALL_PACKAGE_TIMEOUT);
+ if (hwrm_err)
goto flash_pkg_exit;
- }
}
}
@@ -1870,6 +1997,14 @@ static int bnxt_flash_package_from_file(struct net_device *dev,
}
flash_pkg_exit:
mutex_unlock(&bp->hwrm_cmd_lock);
+err_exit:
+ if (hwrm_err == HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED) {
+ netdev_info(dev,
+ "PF does not have admin privileges to flash the device\n");
+ rc = -EACCES;
+ } else if (hwrm_err) {
+ rc = -EOPNOTSUPP;
+ }
return rc;
}
@@ -2450,17 +2585,37 @@ static int bnxt_hwrm_mac_loopback(struct bnxt *bp, bool enable)
return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
}
+static int bnxt_query_force_speeds(struct bnxt *bp, u16 *force_speeds)
+{
+ struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+ struct hwrm_port_phy_qcaps_input req = {0};
+ int rc;
+
+ bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCAPS, -1, -1);
+ mutex_lock(&bp->hwrm_cmd_lock);
+ rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ if (!rc)
+ *force_speeds = le16_to_cpu(resp->supported_speeds_force_mode);
+
+ mutex_unlock(&bp->hwrm_cmd_lock);
+ return rc;
+}
+
static int bnxt_disable_an_for_lpbk(struct bnxt *bp,
struct hwrm_port_phy_cfg_input *req)
{
struct bnxt_link_info *link_info = &bp->link_info;
- u16 fw_advertising = link_info->advertising;
+ u16 fw_advertising;
u16 fw_speed;
int rc;
if (!link_info->autoneg)
return 0;
+ rc = bnxt_query_force_speeds(bp, &fw_advertising);
+ if (rc)
+ return rc;
+
fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB;
if (netif_carrier_ok(bp->dev))
fw_speed = bp->link_info.link_speed;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 3962f6fd543c..d80f5c981d90 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -448,16 +448,22 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
u16 vf_stat_ctx, vf_vnics, vf_ring_grps;
struct bnxt_pf_info *pf = &bp->pf;
int i, rc = 0, min = 1;
+ u16 vf_msix = 0;
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_RESOURCE_CFG, -1, -1);
- vf_cp_rings = bnxt_get_max_func_cp_rings_for_en(bp) - bp->cp_nr_rings;
- vf_stat_ctx = hw_resc->max_stat_ctxs - bp->num_stat_ctxs;
+ if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ vf_msix = hw_resc->max_nqs - bnxt_nq_rings_in_use(bp);
+ vf_ring_grps = 0;
+ } else {
+ vf_ring_grps = hw_resc->max_hw_ring_grps - bp->rx_nr_rings;
+ }
+ vf_cp_rings = bnxt_get_avail_cp_rings_for_en(bp);
+ vf_stat_ctx = bnxt_get_avail_stat_ctxs_for_en(bp);
if (bp->flags & BNXT_FLAG_AGG_RINGS)
vf_rx_rings = hw_resc->max_rx_rings - bp->rx_nr_rings * 2;
else
vf_rx_rings = hw_resc->max_rx_rings - bp->rx_nr_rings;
- vf_ring_grps = hw_resc->max_hw_ring_grps - bp->rx_nr_rings;
vf_tx_rings = hw_resc->max_tx_rings - bp->tx_nr_rings;
vf_vnics = hw_resc->max_vnics - bp->nr_vnics;
vf_vnics = min_t(u16, vf_vnics, vf_rx_rings);
@@ -476,7 +482,8 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
req.min_l2_ctxs = cpu_to_le16(min);
req.min_vnics = cpu_to_le16(min);
req.min_stat_ctx = cpu_to_le16(min);
- req.min_hw_ring_grps = cpu_to_le16(min);
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ req.min_hw_ring_grps = cpu_to_le16(min);
} else {
vf_cp_rings /= num_vfs;
vf_tx_rings /= num_vfs;
@@ -500,6 +507,8 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
req.max_vnics = cpu_to_le16(vf_vnics);
req.max_stat_ctx = cpu_to_le16(vf_stat_ctx);
req.max_hw_ring_grps = cpu_to_le16(vf_ring_grps);
+ if (bp->flags & BNXT_FLAG_CHIP_P5)
+ req.max_msix = cpu_to_le16(vf_msix / num_vfs);
mutex_lock(&bp->hwrm_cmd_lock);
for (i = 0; i < num_vfs; i++) {
@@ -525,6 +534,8 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
hw_resc->max_rsscos_ctxs -= pf->active_vfs;
hw_resc->max_stat_ctxs -= le16_to_cpu(req.min_stat_ctx) * n;
hw_resc->max_vnics -= le16_to_cpu(req.min_vnics) * n;
+ if (bp->flags & BNXT_FLAG_CHIP_P5)
+ hw_resc->max_irqs -= vf_msix * n;
rc = pf->active_vfs;
}
@@ -539,19 +550,16 @@ static int bnxt_hwrm_func_cfg(struct bnxt *bp, int num_vfs)
u32 rc = 0, mtu, i;
u16 vf_tx_rings, vf_rx_rings, vf_cp_rings, vf_stat_ctx, vf_vnics;
struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
- u16 vf_ring_grps, max_stat_ctxs;
struct hwrm_func_cfg_input req = {0};
struct bnxt_pf_info *pf = &bp->pf;
int total_vf_tx_rings = 0;
+ u16 vf_ring_grps;
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
- max_stat_ctxs = hw_resc->max_stat_ctxs;
-
/* Remaining rings are distributed equally amongs VF's for now */
- vf_cp_rings = (bnxt_get_max_func_cp_rings_for_en(bp) -
- bp->cp_nr_rings) / num_vfs;
- vf_stat_ctx = (max_stat_ctxs - bp->num_stat_ctxs) / num_vfs;
+ vf_cp_rings = bnxt_get_avail_cp_rings_for_en(bp) / num_vfs;
+ vf_stat_ctx = bnxt_get_avail_stat_ctxs_for_en(bp) / num_vfs;
if (bp->flags & BNXT_FLAG_AGG_RINGS)
vf_rx_rings = (hw_resc->max_rx_rings - bp->rx_nr_rings * 2) /
num_vfs;
@@ -644,8 +652,8 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs)
*/
vfs_supported = *num_vfs;
- avail_cp = bnxt_get_max_func_cp_rings_for_en(bp) - bp->cp_nr_rings;
- avail_stat = hw_resc->max_stat_ctxs - bp->num_stat_ctxs;
+ avail_cp = bnxt_get_avail_cp_rings_for_en(bp);
+ avail_stat = bnxt_get_avail_stat_ctxs_for_en(bp);
avail_cp = min_t(int, avail_cp, avail_stat);
while (vfs_supported) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 0a3097baafde..ea45a9b8179e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -48,10 +48,8 @@ static int bnxt_register_dev(struct bnxt_en_dev *edev, int ulp_id,
max_stat_ctxs = bnxt_get_max_func_stat_ctxs(bp);
if (max_stat_ctxs <= BNXT_MIN_ROCE_STAT_CTXS ||
- bp->num_stat_ctxs == max_stat_ctxs)
+ bp->cp_nr_rings == max_stat_ctxs)
return -ENOMEM;
- bnxt_set_max_func_stat_ctxs(bp, max_stat_ctxs -
- BNXT_MIN_ROCE_STAT_CTXS);
}
atomic_set(&ulp->ref_count, 0);
@@ -82,14 +80,9 @@ static int bnxt_unregister_dev(struct bnxt_en_dev *edev, int ulp_id)
netdev_err(bp->dev, "ulp id %d not registered\n", ulp_id);
return -EINVAL;
}
- if (ulp_id == BNXT_ROCE_ULP) {
- unsigned int max_stat_ctxs;
+ if (ulp_id == BNXT_ROCE_ULP && ulp->msix_requested)
+ edev->en_ops->bnxt_free_msix(edev, ulp_id);
- max_stat_ctxs = bnxt_get_max_func_stat_ctxs(bp);
- bnxt_set_max_func_stat_ctxs(bp, max_stat_ctxs + 1);
- if (ulp->msix_requested)
- edev->en_ops->bnxt_free_msix(edev, ulp_id);
- }
if (ulp->max_async_event_id)
bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0);
@@ -218,6 +211,14 @@ int bnxt_get_ulp_msix_base(struct bnxt *bp)
return 0;
}
+int bnxt_get_ulp_stat_ctxs(struct bnxt *bp)
+{
+ if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP))
+ return BNXT_MIN_ROCE_STAT_CTXS;
+
+ return 0;
+}
+
static int bnxt_send_msg(struct bnxt_en_dev *edev, int ulp_id,
struct bnxt_fw_msg *fw_msg)
{
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
index d9bea37cd211..cd78453d0bf0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
@@ -90,6 +90,7 @@ static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev, int ulp_id)
int bnxt_get_ulp_msix_num(struct bnxt *bp);
int bnxt_get_ulp_msix_base(struct bnxt *bp);
+int bnxt_get_ulp_stat_ctxs(struct bnxt *bp);
void bnxt_ulp_stop(struct bnxt *bp);
void bnxt_ulp_start(struct bnxt *bp);
void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index bf6de02be396..0184ef6f05a7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -199,7 +199,6 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
bp->tx_nr_rings_xdp = tx_xdp;
bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp;
bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings);
- bp->num_stat_ctxs = bp->cp_nr_rings;
bnxt_set_tpa_flags(bp);
bnxt_set_ring_params(bp);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index cab492ec8f59..b0ff9fa183f4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -378,19 +378,7 @@ static int cim_qcfg_show(struct seq_file *seq, void *v)
QUEREMFLITS_G(p[2]) * 16);
return 0;
}
-
-static int cim_qcfg_open(struct inode *inode, struct file *file)
-{
- return single_open(file, cim_qcfg_show, inode->i_private);
-}
-
-static const struct file_operations cim_qcfg_fops = {
- .owner = THIS_MODULE,
- .open = cim_qcfg_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(cim_qcfg);
static int cimq_show(struct seq_file *seq, void *v, int idx)
{
@@ -860,8 +848,7 @@ static int tx_rate_show(struct seq_file *seq, void *v)
}
return 0;
}
-
-DEFINE_SIMPLE_DEBUGFS_FILE(tx_rate);
+DEFINE_SHOW_ATTRIBUTE(tx_rate);
static int cctrl_tbl_show(struct seq_file *seq, void *v)
{
@@ -893,8 +880,7 @@ static int cctrl_tbl_show(struct seq_file *seq, void *v)
kfree(incr);
return 0;
}
-
-DEFINE_SIMPLE_DEBUGFS_FILE(cctrl_tbl);
+DEFINE_SHOW_ATTRIBUTE(cctrl_tbl);
/* Format a value in a unit that differs from the value's native unit by the
* given factor.
@@ -955,8 +941,7 @@ static int clk_show(struct seq_file *seq, void *v)
return 0;
}
-
-DEFINE_SIMPLE_DEBUGFS_FILE(clk);
+DEFINE_SHOW_ATTRIBUTE(clk);
/* Firmware Device Log dump. */
static const char * const devlog_level_strings[] = {
@@ -1990,22 +1975,10 @@ static int sensors_show(struct seq_file *seq, void *v)
return 0;
}
-
-DEFINE_SIMPLE_DEBUGFS_FILE(sensors);
+DEFINE_SHOW_ATTRIBUTE(sensors);
#if IS_ENABLED(CONFIG_IPV6)
-static int clip_tbl_open(struct inode *inode, struct file *file)
-{
- return single_open(file, clip_tbl_show, inode->i_private);
-}
-
-static const struct file_operations clip_tbl_debugfs_fops = {
- .owner = THIS_MODULE,
- .open = clip_tbl_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release
-};
+DEFINE_SHOW_ATTRIBUTE(clip_tbl);
#endif
/*RSS Table.
@@ -2208,8 +2181,7 @@ static int rss_config_show(struct seq_file *seq, void *v)
return 0;
}
-
-DEFINE_SIMPLE_DEBUGFS_FILE(rss_config);
+DEFINE_SHOW_ATTRIBUTE(rss_config);
/* RSS Secret Key.
*/
@@ -2628,19 +2600,7 @@ static int resources_show(struct seq_file *seq, void *v)
return 0;
}
-
-static int resources_open(struct inode *inode, struct file *file)
-{
- return single_open(file, resources_show, inode->i_private);
-}
-
-static const struct file_operations resources_debugfs_fops = {
- .owner = THIS_MODULE,
- .open = resources_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
+DEFINE_SHOW_ATTRIBUTE(resources);
/**
* ethqset2pinfo - return port_info of an Ethernet Queue Set
@@ -3233,8 +3193,7 @@ static int tid_info_show(struct seq_file *seq, void *v)
t4_read_reg(adap, LE_DB_ACT_CNT_IPV6_A));
return 0;
}
-
-DEFINE_SIMPLE_DEBUGFS_FILE(tid_info);
+DEFINE_SHOW_ATTRIBUTE(tid_info);
static void add_debugfs_mem(struct adapter *adap, const char *name,
unsigned int idx, unsigned int size_mb)
@@ -3364,21 +3323,9 @@ static int meminfo_show(struct seq_file *seq, void *v)
return 0;
}
+DEFINE_SHOW_ATTRIBUTE(meminfo);
-static int meminfo_open(struct inode *inode, struct file *file)
-{
- return single_open(file, meminfo_show, inode->i_private);
-}
-
-static const struct file_operations meminfo_fops = {
- .owner = THIS_MODULE,
- .open = meminfo_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int chcr_show(struct seq_file *seq, void *v)
+static int chcr_stats_show(struct seq_file *seq, void *v)
{
struct adapter *adap = seq->private;
@@ -3399,20 +3346,7 @@ static int chcr_show(struct seq_file *seq, void *v)
atomic_read(&adap->chcr_stats.ipsec_cnt));
return 0;
}
-
-
-static int chcr_stats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, chcr_show, inode->i_private);
-}
-
-static const struct file_operations chcr_stats_debugfs_fops = {
- .owner = THIS_MODULE,
- .open = chcr_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(chcr_stats);
#define PRINT_ADAP_STATS(string, value) \
seq_printf(seq, "%-25s %-20llu\n", (string), \
@@ -3573,8 +3507,7 @@ static int tp_stats_show(struct seq_file *seq, void *v)
return 0;
}
-
-DEFINE_SIMPLE_DEBUGFS_FILE(tp_stats);
+DEFINE_SHOW_ATTRIBUTE(tp_stats);
/* Add an array of Debug FS files.
*/
@@ -3603,7 +3536,7 @@ int t4_setup_debugfs(struct adapter *adap)
{ "cim_pif_la", &cim_pif_la_fops, 0400, 0 },
{ "cim_ma_la", &cim_ma_la_fops, 0400, 0 },
{ "cim_qcfg", &cim_qcfg_fops, 0400, 0 },
- { "clk", &clk_debugfs_fops, 0400, 0 },
+ { "clk", &clk_fops, 0400, 0 },
{ "devlog", &devlog_fops, 0400, 0 },
{ "mboxlog", &mboxlog_fops, 0400, 0 },
{ "mbox0", &mbox_debugfs_fops, 0600, 0 },
@@ -3621,11 +3554,11 @@ int t4_setup_debugfs(struct adapter *adap)
{ "l2t", &t4_l2t_fops, 0400, 0},
{ "mps_tcam", &mps_tcam_debugfs_fops, 0400, 0 },
{ "rss", &rss_debugfs_fops, 0400, 0 },
- { "rss_config", &rss_config_debugfs_fops, 0400, 0 },
+ { "rss_config", &rss_config_fops, 0400, 0 },
{ "rss_key", &rss_key_debugfs_fops, 0400, 0 },
{ "rss_pf_config", &rss_pf_config_debugfs_fops, 0400, 0 },
{ "rss_vf_config", &rss_vf_config_debugfs_fops, 0400, 0 },
- { "resources", &resources_debugfs_fops, 0400, 0 },
+ { "resources", &resources_fops, 0400, 0 },
#ifdef CONFIG_CHELSIO_T4_DCB
{ "dcb_info", &dcb_info_debugfs_fops, 0400, 0 },
#endif
@@ -3644,18 +3577,18 @@ int t4_setup_debugfs(struct adapter *adap)
{ "obq_ncsi", &cim_obq_fops, 0400, 5 },
{ "tp_la", &tp_la_fops, 0400, 0 },
{ "ulprx_la", &ulprx_la_fops, 0400, 0 },
- { "sensors", &sensors_debugfs_fops, 0400, 0 },
+ { "sensors", &sensors_fops, 0400, 0 },
{ "pm_stats", &pm_stats_debugfs_fops, 0400, 0 },
- { "tx_rate", &tx_rate_debugfs_fops, 0400, 0 },
- { "cctrl", &cctrl_tbl_debugfs_fops, 0400, 0 },
+ { "tx_rate", &tx_rate_fops, 0400, 0 },
+ { "cctrl", &cctrl_tbl_fops, 0400, 0 },
#if IS_ENABLED(CONFIG_IPV6)
- { "clip_tbl", &clip_tbl_debugfs_fops, 0400, 0 },
+ { "clip_tbl", &clip_tbl_fops, 0400, 0 },
#endif
- { "tids", &tid_info_debugfs_fops, 0400, 0},
+ { "tids", &tid_info_fops, 0400, 0},
{ "blocked_fl", &blocked_fl_fops, 0600, 0 },
{ "meminfo", &meminfo_fops, 0400, 0 },
- { "crypto", &chcr_stats_debugfs_fops, 0400, 0 },
- { "tp_stats", &tp_stats_debugfs_fops, 0400, 0 },
+ { "crypto", &chcr_stats_fops, 0400, 0 },
+ { "tp_stats", &tp_stats_fops, 0400, 0 },
};
/* Debug FS nodes common to all T5 and later adapters.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h
index 23f43a0f8950..ba95e13d52da 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h
@@ -37,19 +37,6 @@
#include <linux/export.h>
-#define DEFINE_SIMPLE_DEBUGFS_FILE(name) \
-static int name##_open(struct inode *inode, struct file *file) \
-{ \
- return single_open(file, name##_show, inode->i_private); \
-} \
-static const struct file_operations name##_debugfs_fops = { \
- .owner = THIS_MODULE, \
- .open = name##_open, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release \
-}
-
struct t4_debugfs_entry {
const char *name;
const struct file_operations *ops;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 8a2ad6b387ea..2fab87e86561 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2323,19 +2323,7 @@ static int resources_show(struct seq_file *seq, void *v)
return 0;
}
-
-static int resources_open(struct inode *inode, struct file *file)
-{
- return single_open(file, resources_show, inode->i_private);
-}
-
-static const struct file_operations resources_proc_fops = {
- .owner = THIS_MODULE,
- .open = resources_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(resources);
/*
* Show Virtual Interfaces.
@@ -2419,7 +2407,7 @@ static struct cxgb4vf_debugfs_entry debugfs_files[] = {
{ "mboxlog", 0444, &mboxlog_fops },
{ "sge_qinfo", 0444, &sge_qinfo_debugfs_fops },
{ "sge_qstats", 0444, &sge_qstats_proc_fops },
- { "resources", 0444, &resources_proc_fops },
+ { "resources", 0444, &resources_fops },
{ "interfaces", 0444, &interfaces_proc_fops },
};
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 9510c9d78858..f53090cde041 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -51,9 +51,9 @@
#include <linux/percpu.h>
#include <linux/dma-mapping.h>
#include <linux/sort.h>
+#include <linux/phy_fixed.h>
#include <soc/fsl/bman.h>
#include <soc/fsl/qman.h>
-
#include "fman.h"
#include "fman_port.h"
#include "mac.h"
@@ -2616,6 +2616,7 @@ static const struct net_device_ops dpaa_ops = {
.ndo_stop = dpaa_eth_stop,
.ndo_tx_timeout = dpaa_tx_timeout,
.ndo_get_stats64 = dpaa_get_stats64,
+ .ndo_change_carrier = fixed_phy_change_carrier,
.ndo_set_mac_address = dpaa_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = dpaa_set_rx_mode,
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index bf80855dd0dd..f79e57f735b3 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -531,7 +531,6 @@ struct fec_enet_private {
/* Phylib and MDIO interface */
struct mii_bus *mii_bus;
- int mii_timeout;
uint phy_speed;
phy_interface_t phy_interface;
struct device_node *phy_node;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 6db69ba30dcd..ae0f88bce9aa 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1714,12 +1714,6 @@ static void fec_enet_adjust_link(struct net_device *ndev)
struct phy_device *phy_dev = ndev->phydev;
int status_change = 0;
- /* Prevent a state halted on mii error */
- if (fep->mii_timeout && phy_dev->state == PHY_HALTED) {
- phy_dev->state = PHY_RESUMING;
- return;
- }
-
/*
* If the netdev is down, or is going down, we're not interested
* in link state events, so just mark our idea of the link as down
@@ -1779,7 +1773,6 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
if (ret < 0)
return ret;
- fep->mii_timeout = 0;
reinit_completion(&fep->mdio_done);
/* start a read op */
@@ -1791,7 +1784,6 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
time_left = wait_for_completion_timeout(&fep->mdio_done,
usecs_to_jiffies(FEC_MII_TIMEOUT));
if (time_left == 0) {
- fep->mii_timeout = 1;
netdev_err(fep->netdev, "MDIO read timeout\n");
ret = -ETIMEDOUT;
goto out;
@@ -1820,7 +1812,6 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
else
ret = 0;
- fep->mii_timeout = 0;
reinit_completion(&fep->mdio_done);
/* start a write op */
@@ -1833,7 +1824,6 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
time_left = wait_for_completion_timeout(&fep->mdio_done,
usecs_to_jiffies(FEC_MII_TIMEOUT));
if (time_left == 0) {
- fep->mii_timeout = 1;
netdev_err(fep->netdev, "MDIO write timeout\n");
ret = -ETIMEDOUT;
}
@@ -2001,8 +1991,6 @@ static int fec_enet_mii_init(struct platform_device *pdev)
return -ENOENT;
}
- fep->mii_timeout = 0;
-
/*
* Set MII speed to 2.5 MHz (= clk_get_rate() / 2 * phy_speed)
*
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index a831086601eb..45fcc96be90e 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -500,6 +500,7 @@ static const struct net_device_ops gfar_netdev_ops = {
.ndo_tx_timeout = gfar_timeout,
.ndo_do_ioctl = gfar_ioctl,
.ndo_get_stats = gfar_get_stats,
+ .ndo_change_carrier = fixed_phy_change_carrier,
.ndo_set_mac_address = gfar_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 2e978cb8b28c..c3d539e209ed 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -30,6 +30,7 @@
#include <linux/dma-mapping.h>
#include <linux/mii.h>
#include <linux/phy.h>
+#include <linux/phy_fixed.h>
#include <linux/workqueue.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
@@ -3676,6 +3677,7 @@ static const struct net_device_ops ucc_geth_netdev_ops = {
.ndo_stop = ucc_geth_close,
.ndo_start_xmit = ucc_geth_start_xmit,
.ndo_validate_addr = eth_validate_addr,
+ .ndo_change_carrier = fixed_phy_change_carrier,
.ndo_set_mac_address = ucc_geth_set_mac_addr,
.ndo_set_rx_mode = ucc_geth_set_multi,
.ndo_tx_timeout = ucc_geth_timeout,
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 25152715396b..fee4664c9189 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -118,6 +118,7 @@ config HNS3_ENET
tristate "Hisilicon HNS3 Ethernet Device Support"
default m
depends on 64BIT && PCI
+ depends on INET
---help---
This selects the Ethernet Driver for Hisilicon Network Subsystem 3 for hip08
family of SoCs. This module depends upon HNAE3 driver to access the HNAE3
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 4d9cf39da48c..691d12174902 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -39,6 +39,7 @@ enum HCLGE_MBX_OPCODE {
HCLGE_MBX_KEEP_ALIVE, /* (VF -> PF) send keep alive cmd */
HCLGE_MBX_SET_ALIVE, /* (VF -> PF) set alive state */
HCLGE_MBX_SET_MTU, /* (VF -> PF) set mtu */
+ HCLGE_MBX_GET_QID_IN_PF, /* (VF -> PF) get queue id in pf */
};
/* below are per-VF mac-vlan subcodes */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 294e725c7c44..845d43d3a920 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -460,6 +460,7 @@ struct hnae3_ae_ops {
bool (*ae_dev_resetting)(struct hnae3_handle *handle);
unsigned long (*ae_dev_reset_cnt)(struct hnae3_handle *handle);
int (*set_gro_en)(struct hnae3_handle *handle, int enable);
+ u16 (*get_global_queue_id)(struct hnae3_handle *handle, u16 queue_id);
};
struct hnae3_dcb_ops {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 86d667a3730a..0de543faa5b1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -48,11 +48,11 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h, char *cmd_buf)
* to prevent reference to invalid memory. And need to ensure
* that the following code is executed within 100ms.
*/
- if (test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
+ if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
test_bit(HNS3_NIC_STATE_RESETTING, &priv->state))
return -EPERM;
- ring = ring_data[i + h->kinfo.num_tqps].ring;
+ ring = ring_data[(u32)(i + h->kinfo.num_tqps)].ring;
base_add_h = readl_relaxed(ring->tqp->io_base +
HNS3_RING_RX_RING_BASEADDR_H_REG);
base_add_l = readl_relaxed(ring->tqp->io_base +
@@ -125,16 +125,146 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h, char *cmd_buf)
return 0;
}
+static int hns3_dbg_queue_map(struct hnae3_handle *h)
+{
+ struct hns3_nic_priv *priv = h->priv;
+ struct hns3_nic_ring_data *ring_data;
+ int i;
+
+ if (!h->ae_algo->ops->get_global_queue_id)
+ return -EOPNOTSUPP;
+
+ dev_info(&h->pdev->dev, "map info for queue id and vector id\n");
+ dev_info(&h->pdev->dev,
+ "local queue id | global queue id | vector id\n");
+ for (i = 0; i < h->kinfo.num_tqps; i++) {
+ u16 global_qid;
+
+ global_qid = h->ae_algo->ops->get_global_queue_id(h, i);
+ ring_data = &priv->ring_data[i];
+ if (!ring_data || !ring_data->ring ||
+ !ring_data->ring->tqp_vector)
+ continue;
+
+ dev_info(&h->pdev->dev,
+ " %4d %4d %4d\n",
+ i, global_qid,
+ ring_data->ring->tqp_vector->vector_irq);
+ }
+
+ return 0;
+}
+
+static int hns3_dbg_bd_info(struct hnae3_handle *h, char *cmd_buf)
+{
+ struct hns3_nic_priv *priv = h->priv;
+ struct hns3_nic_ring_data *ring_data;
+ struct hns3_desc *rx_desc, *tx_desc;
+ struct device *dev = &h->pdev->dev;
+ struct hns3_enet_ring *ring;
+ u32 tx_index, rx_index;
+ u32 q_num, value;
+ int cnt;
+
+ cnt = sscanf(&cmd_buf[8], "%u %u", &q_num, &tx_index);
+ if (cnt == 2) {
+ rx_index = tx_index;
+ } else if (cnt != 1) {
+ dev_err(dev, "bd info: bad command string, cnt=%d\n", cnt);
+ return -EINVAL;
+ }
+
+ if (q_num >= h->kinfo.num_tqps) {
+ dev_err(dev, "Queue number(%u) is out of range(%u)\n", q_num,
+ h->kinfo.num_tqps - 1);
+ return -EINVAL;
+ }
+
+ ring_data = priv->ring_data;
+ ring = ring_data[q_num].ring;
+ value = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
+ tx_index = (cnt == 1) ? value : tx_index;
+
+ if (tx_index >= ring->desc_num) {
+ dev_err(dev, "bd index (%u) is out of range(%u)\n", tx_index,
+ ring->desc_num - 1);
+ return -EINVAL;
+ }
+
+ tx_desc = &ring->desc[tx_index];
+ dev_info(dev, "TX Queue Num: %u, BD Index: %u\n", q_num, tx_index);
+ dev_info(dev, "(TX) addr: 0x%llx\n", tx_desc->addr);
+ dev_info(dev, "(TX)vlan_tag: %u\n", tx_desc->tx.vlan_tag);
+ dev_info(dev, "(TX)send_size: %u\n", tx_desc->tx.send_size);
+ dev_info(dev, "(TX)vlan_tso: %u\n", tx_desc->tx.type_cs_vlan_tso);
+ dev_info(dev, "(TX)l2_len: %u\n", tx_desc->tx.l2_len);
+ dev_info(dev, "(TX)l3_len: %u\n", tx_desc->tx.l3_len);
+ dev_info(dev, "(TX)l4_len: %u\n", tx_desc->tx.l4_len);
+ dev_info(dev, "(TX)vlan_tag: %u\n", tx_desc->tx.outer_vlan_tag);
+ dev_info(dev, "(TX)tv: %u\n", tx_desc->tx.tv);
+ dev_info(dev, "(TX)vlan_msec: %u\n", tx_desc->tx.ol_type_vlan_msec);
+ dev_info(dev, "(TX)ol2_len: %u\n", tx_desc->tx.ol2_len);
+ dev_info(dev, "(TX)ol3_len: %u\n", tx_desc->tx.ol3_len);
+ dev_info(dev, "(TX)ol4_len: %u\n", tx_desc->tx.ol4_len);
+ dev_info(dev, "(TX)paylen: %u\n", tx_desc->tx.paylen);
+ dev_info(dev, "(TX)vld_ra_ri: %u\n", tx_desc->tx.bdtp_fe_sc_vld_ra_ri);
+ dev_info(dev, "(TX)mss: %u\n", tx_desc->tx.mss);
+
+ ring = ring_data[q_num + h->kinfo.num_tqps].ring;
+ value = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_TAIL_REG);
+ rx_index = (cnt == 1) ? value : tx_index;
+ rx_desc = &ring->desc[rx_index];
+
+ dev_info(dev, "RX Queue Num: %u, BD Index: %u\n", q_num, rx_index);
+ dev_info(dev, "(RX)addr: 0x%llx\n", rx_desc->addr);
+ dev_info(dev, "(RX)pkt_len: %u\n", rx_desc->rx.pkt_len);
+ dev_info(dev, "(RX)size: %u\n", rx_desc->rx.size);
+ dev_info(dev, "(RX)rss_hash: %u\n", rx_desc->rx.rss_hash);
+ dev_info(dev, "(RX)fd_id: %u\n", rx_desc->rx.fd_id);
+ dev_info(dev, "(RX)vlan_tag: %u\n", rx_desc->rx.vlan_tag);
+ dev_info(dev, "(RX)o_dm_vlan_id_fb: %u\n", rx_desc->rx.o_dm_vlan_id_fb);
+ dev_info(dev, "(RX)ot_vlan_tag: %u\n", rx_desc->rx.ot_vlan_tag);
+ dev_info(dev, "(RX)bd_base_info: %u\n", rx_desc->rx.bd_base_info);
+
+ return 0;
+}
+
static void hns3_dbg_help(struct hnae3_handle *h)
{
+#define HNS3_DBG_BUF_LEN 256
+
+ char printf_buf[HNS3_DBG_BUF_LEN];
+
dev_info(&h->pdev->dev, "available commands\n");
dev_info(&h->pdev->dev, "queue info [number]\n");
+ dev_info(&h->pdev->dev, "queue map\n");
+ dev_info(&h->pdev->dev, "bd info [q_num] <bd index>\n");
dev_info(&h->pdev->dev, "dump fd tcam\n");
dev_info(&h->pdev->dev, "dump tc\n");
+ dev_info(&h->pdev->dev, "dump tm map [q_num]\n");
dev_info(&h->pdev->dev, "dump tm\n");
dev_info(&h->pdev->dev, "dump qos pause cfg\n");
dev_info(&h->pdev->dev, "dump qos pri map\n");
dev_info(&h->pdev->dev, "dump qos buf cfg\n");
+ dev_info(&h->pdev->dev, "dump mng tbl\n");
+
+ memset(printf_buf, 0, HNS3_DBG_BUF_LEN);
+ strncat(printf_buf, "dump reg [[bios common] [ssu <prt_id>]",
+ HNS3_DBG_BUF_LEN - 1);
+ strncat(printf_buf + strlen(printf_buf),
+ " [igu egu <prt_id>] [rpu <tc_queue_num>]",
+ HNS3_DBG_BUF_LEN - strlen(printf_buf) - 1);
+ strncat(printf_buf + strlen(printf_buf),
+ " [rtc] [ppp] [rcb] [tqp <q_num>]]\n",
+ HNS3_DBG_BUF_LEN - strlen(printf_buf) - 1);
+ dev_info(&h->pdev->dev, "%s", printf_buf);
+
+ memset(printf_buf, 0, HNS3_DBG_BUF_LEN);
+ strncat(printf_buf, "dump reg dcb [port_id] [pri_id] [pg_id]",
+ HNS3_DBG_BUF_LEN - 1);
+ strncat(printf_buf + strlen(printf_buf), " [rq_id] [nq_id] [qset_id]\n",
+ HNS3_DBG_BUF_LEN - strlen(printf_buf) - 1);
+ dev_info(&h->pdev->dev, "%s", printf_buf);
}
static ssize_t hns3_dbg_cmd_read(struct file *filp, char __user *buffer,
@@ -179,7 +309,7 @@ static ssize_t hns3_dbg_cmd_write(struct file *filp, const char __user *buffer,
return 0;
/* Judge if the instance is being reset. */
- if (test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
+ if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
test_bit(HNS3_NIC_STATE_RESETTING, &priv->state))
return 0;
@@ -205,6 +335,10 @@ static ssize_t hns3_dbg_cmd_write(struct file *filp, const char __user *buffer,
hns3_dbg_help(handle);
else if (strncmp(cmd_buf, "queue info", 10) == 0)
ret = hns3_dbg_queue_info(handle, cmd_buf);
+ else if (strncmp(cmd_buf, "queue map", 9) == 0)
+ ret = hns3_dbg_queue_map(handle);
+ else if (strncmp(cmd_buf, "bd info", 7) == 0)
+ ret = hns3_dbg_bd_info(handle, cmd_buf);
else if (handle->ae_algo->ops->dbg_run_cmd)
ret = handle->ae_algo->ops->dbg_run_cmd(handle, cmd_buf);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 69142a381064..d060029ce142 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -240,7 +240,6 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector,
tqp_vector->tx_group.coal.int_gl = HNS3_INT_GL_50K;
tqp_vector->rx_group.coal.int_gl = HNS3_INT_GL_50K;
- tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
tqp_vector->rx_group.coal.flow_level = HNS3_FLOW_LOW;
tqp_vector->tx_group.coal.flow_level = HNS3_FLOW_LOW;
}
@@ -2846,10 +2845,10 @@ static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector)
struct hns3_enet_ring_group *tx_group = &tqp_vector->tx_group;
bool rx_update, tx_update;
- if (tqp_vector->int_adapt_down > 0) {
- tqp_vector->int_adapt_down--;
+ /* update param every 1000ms */
+ if (time_before(jiffies,
+ tqp_vector->last_jiffies + msecs_to_jiffies(1000)))
return;
- }
if (rx_group->coal.gl_adapt_enable) {
rx_update = hns3_get_new_int_gl(rx_group);
@@ -2866,7 +2865,6 @@ static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector)
}
tqp_vector->last_jiffies = jiffies;
- tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
}
static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
@@ -2909,8 +2907,8 @@ static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
if (!clean_complete)
return budget;
- if (likely(!test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) &&
- napi_complete(napi)) {
+ if (napi_complete(napi) &&
+ likely(!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))) {
hns3_update_new_int_gl(tqp_vector);
hns3_mask_vector_irq(tqp_vector, 1);
}
@@ -2993,9 +2991,10 @@ err_free_chain:
cur_chain = head->next;
while (cur_chain) {
chain = cur_chain->next;
- devm_kfree(&pdev->dev, chain);
+ devm_kfree(&pdev->dev, cur_chain);
cur_chain = chain;
}
+ head->next = NULL;
return -ENOMEM;
}
@@ -3086,7 +3085,7 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
ret = hns3_get_vector_ring_chain(tqp_vector,
&vector_ring_chain);
if (ret)
- return ret;
+ goto map_ring_fail;
ret = h->ae_algo->ops->map_ring_to_vector(h,
tqp_vector->vector_irq, &vector_ring_chain);
@@ -3180,12 +3179,12 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
- if (priv->tqp_vector[i].irq_init_flag == HNS3_VECTOR_INITED) {
- (void)irq_set_affinity_hint(
- priv->tqp_vector[i].vector_irq,
- NULL);
- free_irq(priv->tqp_vector[i].vector_irq,
- &priv->tqp_vector[i]);
+ if (tqp_vector->irq_init_flag == HNS3_VECTOR_INITED) {
+ irq_set_affinity_notifier(tqp_vector->vector_irq,
+ NULL);
+ irq_set_affinity_hint(tqp_vector->vector_irq, NULL);
+ free_irq(tqp_vector->vector_irq, tqp_vector);
+ tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED;
}
priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 4e4b48ba78e3..e55995e93bb0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -476,8 +476,6 @@ enum hns3_link_mode_bits {
#define HNS3_INT_RL_MAX 0x00EC
#define HNS3_INT_RL_ENABLE_MASK 0x40
-#define HNS3_INT_ADAPT_DOWN_START 100
-
struct hns3_enet_coalesce {
u16 int_gl;
u8 gl_adapt_enable;
@@ -512,8 +510,6 @@ struct hns3_enet_tqp_vector {
char name[HNAE3_INT_NAME_LEN];
- /* when 0 should adjust interrupt coalesce parameter */
- u8 int_adapt_down;
unsigned long last_jiffies;
} ____cacheline_internodealigned_in_smp;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index b1ee6feb0caf..f23042b24c09 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -86,6 +86,20 @@ enum hclge_opcode_type {
HCLGE_OPC_QUERY_REG_NUM = 0x0040,
HCLGE_OPC_QUERY_32_BIT_REG = 0x0041,
HCLGE_OPC_QUERY_64_BIT_REG = 0x0042,
+ HCLGE_OPC_DFX_BD_NUM = 0x0043,
+ HCLGE_OPC_DFX_BIOS_COMMON_REG = 0x0044,
+ HCLGE_OPC_DFX_SSU_REG_0 = 0x0045,
+ HCLGE_OPC_DFX_SSU_REG_1 = 0x0046,
+ HCLGE_OPC_DFX_IGU_EGU_REG = 0x0047,
+ HCLGE_OPC_DFX_RPU_REG_0 = 0x0048,
+ HCLGE_OPC_DFX_RPU_REG_1 = 0x0049,
+ HCLGE_OPC_DFX_NCSI_REG = 0x004A,
+ HCLGE_OPC_DFX_RTC_REG = 0x004B,
+ HCLGE_OPC_DFX_PPP_REG = 0x004C,
+ HCLGE_OPC_DFX_RCB_REG = 0x004D,
+ HCLGE_OPC_DFX_TQP_REG = 0x004E,
+ HCLGE_OPC_DFX_SSU_REG_2 = 0x004F,
+ HCLGE_OPC_DFX_QUERY_CHIP_CAP = 0x0050,
/* MAC command */
HCLGE_OPC_CONFIG_MAC_MODE = 0x0301,
@@ -126,6 +140,15 @@ enum hclge_opcode_type {
HCLGE_OPC_TM_QS_SCH_MODE_CFG = 0x0814,
HCLGE_OPC_TM_BP_TO_QSET_MAPPING = 0x0815,
HCLGE_OPC_ETS_TC_WEIGHT = 0x0843,
+ HCLGE_OPC_QSET_DFX_STS = 0x0844,
+ HCLGE_OPC_PRI_DFX_STS = 0x0845,
+ HCLGE_OPC_PG_DFX_STS = 0x0846,
+ HCLGE_OPC_PORT_DFX_STS = 0x0847,
+ HCLGE_OPC_SCH_NQ_CNT = 0x0848,
+ HCLGE_OPC_SCH_RQ_CNT = 0x0849,
+ HCLGE_OPC_TM_INTERNAL_STS = 0x0850,
+ HCLGE_OPC_TM_INTERNAL_CNT = 0x0851,
+ HCLGE_OPC_TM_INTERNAL_STS_1 = 0x0852,
/* Packet buffer allocate commands */
HCLGE_OPC_TX_BUFF_ALLOC = 0x0901,
@@ -142,6 +165,7 @@ enum hclge_opcode_type {
HCLGE_OPC_CFG_TX_QUEUE = 0x0B01,
HCLGE_OPC_QUERY_TX_POINTER = 0x0B02,
HCLGE_OPC_QUERY_TX_STATUS = 0x0B03,
+ HCLGE_OPC_TQP_TX_QUEUE_TC = 0x0B04,
HCLGE_OPC_CFG_RX_QUEUE = 0x0B11,
HCLGE_OPC_QUERY_RX_POINTER = 0x0B12,
HCLGE_OPC_QUERY_RX_STATUS = 0x0B13,
@@ -237,6 +261,7 @@ enum hclge_opcode_type {
HCLGE_TM_QCN_MEM_INT_CFG = 0x1A14,
HCLGE_PPP_CMD0_INT_CMD = 0x2100,
HCLGE_PPP_CMD1_INT_CMD = 0x2101,
+ HCLGE_MAC_ETHERTYPE_IDX_RD = 0x2105,
HCLGE_NCSI_INT_EN = 0x2401,
};
@@ -395,7 +420,9 @@ struct hclge_pf_res_cmd {
#define HCLGE_PF_VEC_NUM_M GENMASK(7, 0)
__le16 pf_intr_vector_number;
__le16 pf_own_fun_number;
- __le32 rsv[3];
+ __le16 tx_buf_size;
+ __le16 dv_buf_size;
+ __le32 rsv[2];
};
#define HCLGE_CFG_OFFSET_S 0
@@ -744,6 +771,24 @@ struct hclge_cfg_tx_queue_pointer_cmd {
u8 rsv[14];
};
+#pragma pack(1)
+struct hclge_mac_ethertype_idx_rd_cmd {
+ u8 flags;
+ u8 resp_code;
+ __le16 vlan_tag;
+ u8 mac_add[6];
+ __le16 index;
+ __le16 ethter_type;
+ __le16 egress_port;
+ __le16 egress_queue;
+ __le16 rev0;
+ u8 i_port_bitmap;
+ u8 i_port_direction;
+ u8 rev1[2];
+};
+
+#pragma pack()
+
#define HCLGE_TSO_MSS_MIN_S 0
#define HCLGE_TSO_MSS_MIN_M GENMASK(13, 0)
@@ -796,6 +841,7 @@ struct hclge_serdes_lb_cmd {
#define HCLGE_TOTAL_PKT_BUF 0x108000 /* 1.03125M bytes */
#define HCLGE_DEFAULT_DV 0xA000 /* 40k byte */
#define HCLGE_DEFAULT_NON_DCB_DV 0x7800 /* 30K byte */
+#define HCLGE_NON_DCB_ADDITIONAL_BUF 0x200 /* 512 byte */
#define HCLGE_TYPE_CRQ 0
#define HCLGE_TYPE_CSQ 1
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 14577bbf3e11..26d80504c730 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -9,6 +9,300 @@
#include "hclge_tm.h"
#include "hnae3.h"
+static int hclge_dbg_get_dfx_bd_num(struct hclge_dev *hdev, int offset)
+{
+ struct hclge_desc desc[4];
+ int ret;
+
+ hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_DFX_BD_NUM, true);
+ desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+ hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_DFX_BD_NUM, true);
+ desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+ hclge_cmd_setup_basic_desc(&desc[2], HCLGE_OPC_DFX_BD_NUM, true);
+ desc[2].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+ hclge_cmd_setup_basic_desc(&desc[3], HCLGE_OPC_DFX_BD_NUM, true);
+
+ ret = hclge_cmd_send(&hdev->hw, desc, 4);
+ if (ret != HCLGE_CMD_EXEC_SUCCESS) {
+ dev_err(&hdev->pdev->dev,
+ "get dfx bdnum fail, status is %d.\n", ret);
+ return ret;
+ }
+
+ return (int)desc[offset / 6].data[offset % 6];
+}
+
+static int hclge_dbg_cmd_send(struct hclge_dev *hdev,
+ struct hclge_desc *desc_src,
+ int index, int bd_num,
+ enum hclge_opcode_type cmd)
+{
+ struct hclge_desc *desc = desc_src;
+ int ret, i;
+
+ hclge_cmd_setup_basic_desc(desc, cmd, true);
+ desc->data[0] = cpu_to_le32(index);
+
+ for (i = 1; i < bd_num; i++) {
+ desc->flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+ desc++;
+ hclge_cmd_setup_basic_desc(desc, cmd, true);
+ }
+
+ ret = hclge_cmd_send(&hdev->hw, desc_src, bd_num);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "read reg cmd send fail, status is %d.\n", ret);
+ return ret;
+ }
+
+ return ret;
+}
+
+static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
+ struct hclge_dbg_dfx_message *dfx_message,
+ char *cmd_buf, int msg_num, int offset,
+ enum hclge_opcode_type cmd)
+{
+ struct hclge_desc *desc_src;
+ struct hclge_desc *desc;
+ int bd_num, buf_len;
+ int ret, i;
+ int index;
+ int max;
+
+ ret = kstrtouint(cmd_buf, 10, &index);
+ index = (ret != 0) ? 0 : index;
+
+ bd_num = hclge_dbg_get_dfx_bd_num(hdev, offset);
+ if (bd_num <= 0)
+ return;
+
+ buf_len = sizeof(struct hclge_desc) * bd_num;
+ desc_src = kzalloc(buf_len, GFP_KERNEL);
+ if (!desc_src) {
+ dev_err(&hdev->pdev->dev, "call kzalloc failed\n");
+ return;
+ }
+
+ desc = desc_src;
+ ret = hclge_dbg_cmd_send(hdev, desc, index, bd_num, cmd);
+ if (ret != HCLGE_CMD_EXEC_SUCCESS) {
+ kfree(desc_src);
+ return;
+ }
+
+ max = (bd_num * 6) <= msg_num ? (bd_num * 6) : msg_num;
+
+ desc = desc_src;
+ for (i = 0; i < max; i++) {
+ (((i / 6) > 0) && ((i % 6) == 0)) ? desc++ : desc;
+ if (dfx_message->flag)
+ dev_info(&hdev->pdev->dev, "%s: 0x%x\n",
+ dfx_message->message, desc->data[i % 6]);
+
+ dfx_message++;
+ }
+
+ kfree(desc_src);
+}
+
+static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, char *cmd_buf)
+{
+ struct device *dev = &hdev->pdev->dev;
+ struct hclge_dbg_bitmap_cmd *bitmap;
+ int rq_id, pri_id, qset_id;
+ int port_id, nq_id, pg_id;
+ struct hclge_desc desc[2];
+
+ int cnt, ret;
+
+ cnt = sscanf(cmd_buf, "%i %i %i %i %i %i",
+ &port_id, &pri_id, &pg_id, &rq_id, &nq_id, &qset_id);
+ if (cnt != 6) {
+ dev_err(&hdev->pdev->dev,
+ "dump dcb: bad command parameter, cnt=%d\n", cnt);
+ return;
+ }
+
+ ret = hclge_dbg_cmd_send(hdev, desc, qset_id, 1,
+ HCLGE_OPC_QSET_DFX_STS);
+ if (ret)
+ return;
+
+ bitmap = (struct hclge_dbg_bitmap_cmd *)&desc[0].data[1];
+ dev_info(dev, "roce_qset_mask: 0x%x\n", bitmap->bit0);
+ dev_info(dev, "nic_qs_mask: 0x%x\n", bitmap->bit1);
+ dev_info(dev, "qs_shaping_pass: 0x%x\n", bitmap->bit2);
+ dev_info(dev, "qs_bp_sts: 0x%x\n", bitmap->bit3);
+
+ ret = hclge_dbg_cmd_send(hdev, desc, pri_id, 1, HCLGE_OPC_PRI_DFX_STS);
+ if (ret)
+ return;
+
+ bitmap = (struct hclge_dbg_bitmap_cmd *)&desc[0].data[1];
+ dev_info(dev, "pri_mask: 0x%x\n", bitmap->bit0);
+ dev_info(dev, "pri_cshaping_pass: 0x%x\n", bitmap->bit1);
+ dev_info(dev, "pri_pshaping_pass: 0x%x\n", bitmap->bit2);
+
+ ret = hclge_dbg_cmd_send(hdev, desc, pg_id, 1, HCLGE_OPC_PG_DFX_STS);
+ if (ret)
+ return;
+
+ bitmap = (struct hclge_dbg_bitmap_cmd *)&desc[0].data[1];
+ dev_info(dev, "pg_mask: 0x%x\n", bitmap->bit0);
+ dev_info(dev, "pg_cshaping_pass: 0x%x\n", bitmap->bit1);
+ dev_info(dev, "pg_pshaping_pass: 0x%x\n", bitmap->bit2);
+
+ ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1,
+ HCLGE_OPC_PORT_DFX_STS);
+ if (ret)
+ return;
+
+ bitmap = (struct hclge_dbg_bitmap_cmd *)&desc[0].data[1];
+ dev_info(dev, "port_mask: 0x%x\n", bitmap->bit0);
+ dev_info(dev, "port_shaping_pass: 0x%x\n", bitmap->bit1);
+
+ ret = hclge_dbg_cmd_send(hdev, desc, nq_id, 1, HCLGE_OPC_SCH_NQ_CNT);
+ if (ret)
+ return;
+
+ dev_info(dev, "sch_nq_cnt: 0x%x\n", desc[0].data[1]);
+
+ ret = hclge_dbg_cmd_send(hdev, desc, nq_id, 1, HCLGE_OPC_SCH_RQ_CNT);
+ if (ret)
+ return;
+
+ dev_info(dev, "sch_rq_cnt: 0x%x\n", desc[0].data[1]);
+
+ ret = hclge_dbg_cmd_send(hdev, desc, 0, 2, HCLGE_OPC_TM_INTERNAL_STS);
+ if (ret)
+ return;
+
+ dev_info(dev, "pri_bp: 0x%x\n", desc[0].data[1]);
+ dev_info(dev, "fifo_dfx_info: 0x%x\n", desc[0].data[2]);
+ dev_info(dev, "sch_roce_fifo_afull_gap: 0x%x\n", desc[0].data[3]);
+ dev_info(dev, "tx_private_waterline: 0x%x\n", desc[0].data[4]);
+ dev_info(dev, "tm_bypass_en: 0x%x\n", desc[0].data[5]);
+ dev_info(dev, "SSU_TM_BYPASS_EN: 0x%x\n", desc[1].data[0]);
+ dev_info(dev, "SSU_RESERVE_CFG: 0x%x\n", desc[1].data[1]);
+
+ ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1,
+ HCLGE_OPC_TM_INTERNAL_CNT);
+ if (ret)
+ return;
+
+ dev_info(dev, "SCH_NIC_NUM: 0x%x\n", desc[0].data[1]);
+ dev_info(dev, "SCH_ROCE_NUM: 0x%x\n", desc[0].data[2]);
+
+ ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1,
+ HCLGE_OPC_TM_INTERNAL_STS_1);
+ if (ret)
+ return;
+
+ dev_info(dev, "TC_MAP_SEL: 0x%x\n", desc[0].data[1]);
+ dev_info(dev, "IGU_PFC_PRI_EN: 0x%x\n", desc[0].data[2]);
+ dev_info(dev, "MAC_PFC_PRI_EN: 0x%x\n", desc[0].data[3]);
+ dev_info(dev, "IGU_PRI_MAP_TC_CFG: 0x%x\n", desc[0].data[4]);
+ dev_info(dev, "IGU_TX_PRI_MAP_TC_CFG: 0x%x\n", desc[0].data[5]);
+}
+
+static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, char *cmd_buf)
+{
+ int msg_num;
+
+ if (strncmp(&cmd_buf[9], "bios common", 11) == 0) {
+ msg_num = sizeof(hclge_dbg_bios_common_reg) /
+ sizeof(struct hclge_dbg_dfx_message);
+ hclge_dbg_dump_reg_common(hdev, hclge_dbg_bios_common_reg,
+ &cmd_buf[21], msg_num,
+ HCLGE_DBG_DFX_BIOS_OFFSET,
+ HCLGE_OPC_DFX_BIOS_COMMON_REG);
+ } else if (strncmp(&cmd_buf[9], "ssu", 3) == 0) {
+ msg_num = sizeof(hclge_dbg_ssu_reg_0) /
+ sizeof(struct hclge_dbg_dfx_message);
+ hclge_dbg_dump_reg_common(hdev, hclge_dbg_ssu_reg_0,
+ &cmd_buf[13], msg_num,
+ HCLGE_DBG_DFX_SSU_0_OFFSET,
+ HCLGE_OPC_DFX_SSU_REG_0);
+
+ msg_num = sizeof(hclge_dbg_ssu_reg_1) /
+ sizeof(struct hclge_dbg_dfx_message);
+ hclge_dbg_dump_reg_common(hdev, hclge_dbg_ssu_reg_1,
+ &cmd_buf[13], msg_num,
+ HCLGE_DBG_DFX_SSU_1_OFFSET,
+ HCLGE_OPC_DFX_SSU_REG_1);
+
+ msg_num = sizeof(hclge_dbg_ssu_reg_2) /
+ sizeof(struct hclge_dbg_dfx_message);
+ hclge_dbg_dump_reg_common(hdev, hclge_dbg_ssu_reg_2,
+ &cmd_buf[13], msg_num,
+ HCLGE_DBG_DFX_SSU_2_OFFSET,
+ HCLGE_OPC_DFX_SSU_REG_2);
+ } else if (strncmp(&cmd_buf[9], "igu egu", 7) == 0) {
+ msg_num = sizeof(hclge_dbg_igu_egu_reg) /
+ sizeof(struct hclge_dbg_dfx_message);
+ hclge_dbg_dump_reg_common(hdev, hclge_dbg_igu_egu_reg,
+ &cmd_buf[17], msg_num,
+ HCLGE_DBG_DFX_IGU_OFFSET,
+ HCLGE_OPC_DFX_IGU_EGU_REG);
+ } else if (strncmp(&cmd_buf[9], "rpu", 3) == 0) {
+ msg_num = sizeof(hclge_dbg_rpu_reg_0) /
+ sizeof(struct hclge_dbg_dfx_message);
+ hclge_dbg_dump_reg_common(hdev, hclge_dbg_rpu_reg_0,
+ &cmd_buf[13], msg_num,
+ HCLGE_DBG_DFX_RPU_0_OFFSET,
+ HCLGE_OPC_DFX_RPU_REG_0);
+
+ msg_num = sizeof(hclge_dbg_rpu_reg_1) /
+ sizeof(struct hclge_dbg_dfx_message);
+ hclge_dbg_dump_reg_common(hdev, hclge_dbg_rpu_reg_1,
+ &cmd_buf[13], msg_num,
+ HCLGE_DBG_DFX_RPU_1_OFFSET,
+ HCLGE_OPC_DFX_RPU_REG_1);
+ } else if (strncmp(&cmd_buf[9], "ncsi", 4) == 0) {
+ msg_num = sizeof(hclge_dbg_ncsi_reg) /
+ sizeof(struct hclge_dbg_dfx_message);
+ hclge_dbg_dump_reg_common(hdev, hclge_dbg_ncsi_reg,
+ &cmd_buf[14], msg_num,
+ HCLGE_DBG_DFX_NCSI_OFFSET,
+ HCLGE_OPC_DFX_NCSI_REG);
+ } else if (strncmp(&cmd_buf[9], "rtc", 3) == 0) {
+ msg_num = sizeof(hclge_dbg_rtc_reg) /
+ sizeof(struct hclge_dbg_dfx_message);
+ hclge_dbg_dump_reg_common(hdev, hclge_dbg_rtc_reg,
+ &cmd_buf[13], msg_num,
+ HCLGE_DBG_DFX_RTC_OFFSET,
+ HCLGE_OPC_DFX_RTC_REG);
+ } else if (strncmp(&cmd_buf[9], "ppp", 3) == 0) {
+ msg_num = sizeof(hclge_dbg_ppp_reg) /
+ sizeof(struct hclge_dbg_dfx_message);
+ hclge_dbg_dump_reg_common(hdev, hclge_dbg_ppp_reg,
+ &cmd_buf[13], msg_num,
+ HCLGE_DBG_DFX_PPP_OFFSET,
+ HCLGE_OPC_DFX_PPP_REG);
+ } else if (strncmp(&cmd_buf[9], "rcb", 3) == 0) {
+ msg_num = sizeof(hclge_dbg_rcb_reg) /
+ sizeof(struct hclge_dbg_dfx_message);
+ hclge_dbg_dump_reg_common(hdev, hclge_dbg_rcb_reg,
+ &cmd_buf[13], msg_num,
+ HCLGE_DBG_DFX_RCB_OFFSET,
+ HCLGE_OPC_DFX_RCB_REG);
+ } else if (strncmp(&cmd_buf[9], "tqp", 3) == 0) {
+ msg_num = sizeof(hclge_dbg_tqp_reg) /
+ sizeof(struct hclge_dbg_dfx_message);
+ hclge_dbg_dump_reg_common(hdev, hclge_dbg_tqp_reg,
+ &cmd_buf[13], msg_num,
+ HCLGE_DBG_DFX_TQP_OFFSET,
+ HCLGE_OPC_DFX_TQP_REG);
+ } else if (strncmp(&cmd_buf[9], "dcb", 3) == 0) {
+ hclge_dbg_dump_dcb(hdev, &cmd_buf[13]);
+ } else {
+ dev_info(&hdev->pdev->dev, "unknown command\n");
+ return;
+ }
+}
+
static void hclge_title_idx_print(struct hclge_dev *hdev, bool flag, int index,
char *title_buf, char *true_buf,
char *false_buf)
@@ -243,6 +537,88 @@ err_tm_cmd_send:
cmd, ret);
}
+static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev, char *cmd_buf)
+{
+ struct hclge_bp_to_qs_map_cmd *bp_to_qs_map_cmd;
+ struct hclge_nq_to_qs_link_cmd *nq_to_qs_map;
+ struct hclge_qs_to_pri_link_cmd *map;
+ struct hclge_tqp_tx_queue_tc_cmd *tc;
+ enum hclge_opcode_type cmd;
+ struct hclge_desc desc;
+ int queue_id, group_id;
+ u32 qset_maping[32];
+ int tc_id, qset_id;
+ int pri_id, ret;
+ u32 i;
+
+ ret = kstrtouint(&cmd_buf[12], 10, &queue_id);
+ queue_id = (ret != 0) ? 0 : queue_id;
+
+ cmd = HCLGE_OPC_TM_NQ_TO_QS_LINK;
+ nq_to_qs_map = (struct hclge_nq_to_qs_link_cmd *)desc.data;
+ hclge_cmd_setup_basic_desc(&desc, cmd, true);
+ nq_to_qs_map->nq_id = cpu_to_le16(queue_id);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret)
+ goto err_tm_map_cmd_send;
+ qset_id = nq_to_qs_map->qset_id & 0x3FF;
+
+ cmd = HCLGE_OPC_TM_QS_TO_PRI_LINK;
+ map = (struct hclge_qs_to_pri_link_cmd *)desc.data;
+ hclge_cmd_setup_basic_desc(&desc, cmd, true);
+ map->qs_id = cpu_to_le16(qset_id);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret)
+ goto err_tm_map_cmd_send;
+ pri_id = map->priority;
+
+ cmd = HCLGE_OPC_TQP_TX_QUEUE_TC;
+ tc = (struct hclge_tqp_tx_queue_tc_cmd *)desc.data;
+ hclge_cmd_setup_basic_desc(&desc, cmd, true);
+ tc->queue_id = cpu_to_le16(queue_id);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret)
+ goto err_tm_map_cmd_send;
+ tc_id = tc->tc_id & 0x7;
+
+ dev_info(&hdev->pdev->dev, "queue_id | qset_id | pri_id | tc_id\n");
+ dev_info(&hdev->pdev->dev, "%04d | %04d | %02d | %02d\n",
+ queue_id, qset_id, pri_id, tc_id);
+
+ cmd = HCLGE_OPC_TM_BP_TO_QSET_MAPPING;
+ bp_to_qs_map_cmd = (struct hclge_bp_to_qs_map_cmd *)desc.data;
+ for (group_id = 0; group_id < 32; group_id++) {
+ hclge_cmd_setup_basic_desc(&desc, cmd, true);
+ bp_to_qs_map_cmd->tc_id = tc_id;
+ bp_to_qs_map_cmd->qs_group_id = group_id;
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret)
+ goto err_tm_map_cmd_send;
+
+ qset_maping[group_id] = bp_to_qs_map_cmd->qs_bit_map;
+ }
+
+ dev_info(&hdev->pdev->dev, "index | tm bp qset maping:\n");
+
+ i = 0;
+ for (group_id = 0; group_id < 4; group_id++) {
+ dev_info(&hdev->pdev->dev,
+ "%04d | %08x:%08x:%08x:%08x:%08x:%08x:%08x:%08x\n",
+ group_id * 256, qset_maping[(u32)(i + 7)],
+ qset_maping[(u32)(i + 6)], qset_maping[(u32)(i + 5)],
+ qset_maping[(u32)(i + 4)], qset_maping[(u32)(i + 3)],
+ qset_maping[(u32)(i + 2)], qset_maping[(u32)(i + 1)],
+ qset_maping[i]);
+ i += 8;
+ }
+
+ return;
+
+err_tm_map_cmd_send:
+ dev_err(&hdev->pdev->dev, "dump tqp map fail(0x%x), status is %d\n",
+ cmd, ret);
+}
+
static void hclge_dbg_dump_qos_pause_cfg(struct hclge_dev *hdev)
{
struct hclge_cfg_pause_param_cmd *pause_param;
@@ -407,6 +783,70 @@ err_qos_cmd_send:
"dump qos buf cfg fail(0x%x), status is %d\n", cmd, ret);
}
+static void hclge_dbg_dump_mng_table(struct hclge_dev *hdev)
+{
+ struct hclge_mac_ethertype_idx_rd_cmd *req0;
+ char printf_buf[HCLGE_DBG_BUF_LEN];
+ struct hclge_desc desc;
+ int ret, i;
+
+ dev_info(&hdev->pdev->dev, "mng tab:\n");
+ memset(printf_buf, 0, HCLGE_DBG_BUF_LEN);
+ strncat(printf_buf,
+ "entry|mac_addr |mask|ether|mask|vlan|mask",
+ HCLGE_DBG_BUF_LEN - 1);
+ strncat(printf_buf + strlen(printf_buf),
+ "|i_map|i_dir|e_type|pf_id|vf_id|q_id|drop\n",
+ HCLGE_DBG_BUF_LEN - strlen(printf_buf) - 1);
+
+ dev_info(&hdev->pdev->dev, "%s", printf_buf);
+
+ for (i = 0; i < HCLGE_DBG_MNG_TBL_MAX; i++) {
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_ETHERTYPE_IDX_RD,
+ true);
+ req0 = (struct hclge_mac_ethertype_idx_rd_cmd *)&desc.data;
+ req0->index = cpu_to_le16(i);
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "call hclge_cmd_send fail, ret = %d\n", ret);
+ return;
+ }
+
+ if (!req0->resp_code)
+ continue;
+
+ memset(printf_buf, 0, HCLGE_DBG_BUF_LEN);
+ snprintf(printf_buf, HCLGE_DBG_BUF_LEN,
+ "%02u |%02x:%02x:%02x:%02x:%02x:%02x|",
+ req0->index, req0->mac_add[0], req0->mac_add[1],
+ req0->mac_add[2], req0->mac_add[3], req0->mac_add[4],
+ req0->mac_add[5]);
+
+ snprintf(printf_buf + strlen(printf_buf),
+ HCLGE_DBG_BUF_LEN - strlen(printf_buf),
+ "%x |%04x |%x |%04x|%x |%02x |%02x |",
+ !!(req0->flags & HCLGE_DBG_MNG_MAC_MASK_B),
+ req0->ethter_type,
+ !!(req0->flags & HCLGE_DBG_MNG_ETHER_MASK_B),
+ req0->vlan_tag & HCLGE_DBG_MNG_VLAN_TAG,
+ !!(req0->flags & HCLGE_DBG_MNG_VLAN_MASK_B),
+ req0->i_port_bitmap, req0->i_port_direction);
+
+ snprintf(printf_buf + strlen(printf_buf),
+ HCLGE_DBG_BUF_LEN - strlen(printf_buf),
+ "%d |%d |%02d |%04d|%x\n",
+ !!(req0->egress_port & HCLGE_DBG_MNG_E_TYPE_B),
+ req0->egress_port & HCLGE_DBG_MNG_PF_ID,
+ (req0->egress_port >> 3) & HCLGE_DBG_MNG_VF_ID,
+ req0->egress_queue,
+ !!(req0->egress_port & HCLGE_DBG_MNG_DROP_B));
+
+ dev_info(&hdev->pdev->dev, "%s", printf_buf);
+ }
+}
+
static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
bool sel_x, u32 loc)
{
@@ -470,6 +910,8 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf)
hclge_dbg_fd_tcam(hdev);
} else if (strncmp(cmd_buf, "dump tc", 7) == 0) {
hclge_dbg_dump_tc(hdev);
+ } else if (strncmp(cmd_buf, "dump tm map", 11) == 0) {
+ hclge_dbg_dump_tm_map(hdev, cmd_buf);
} else if (strncmp(cmd_buf, "dump tm", 7) == 0) {
hclge_dbg_dump_tm(hdev);
} else if (strncmp(cmd_buf, "dump qos pause cfg", 18) == 0) {
@@ -478,6 +920,10 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf)
hclge_dbg_dump_qos_pri_map(hdev);
} else if (strncmp(cmd_buf, "dump qos buf cfg", 16) == 0) {
hclge_dbg_dump_qos_buf_cfg(hdev);
+ } else if (strncmp(cmd_buf, "dump mng tbl", 12) == 0) {
+ hclge_dbg_dump_mng_table(hdev);
+ } else if (strncmp(cmd_buf, "dump reg", 8) == 0) {
+ hclge_dbg_dump_reg_cmd(hdev, cmd_buf);
} else {
dev_info(&hdev->pdev->dev, "unknown command\n");
return -EINVAL;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
index 50fd0b15fb8e..d055fda41775 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
@@ -4,6 +4,34 @@
#ifndef __HCLGE_DEBUGFS_H
#define __HCLGE_DEBUGFS_H
+#define HCLGE_DBG_BUF_LEN 256
+#define HCLGE_DBG_MNG_TBL_MAX 64
+
+#define HCLGE_DBG_MNG_VLAN_MASK_B BIT(0)
+#define HCLGE_DBG_MNG_MAC_MASK_B BIT(1)
+#define HCLGE_DBG_MNG_ETHER_MASK_B BIT(2)
+#define HCLGE_DBG_MNG_E_TYPE_B BIT(11)
+#define HCLGE_DBG_MNG_DROP_B BIT(13)
+#define HCLGE_DBG_MNG_VLAN_TAG 0x0FFF
+#define HCLGE_DBG_MNG_PF_ID 0x0007
+#define HCLGE_DBG_MNG_VF_ID 0x00FF
+
+/* Get DFX BD number offset */
+#define HCLGE_DBG_DFX_BIOS_OFFSET 1
+#define HCLGE_DBG_DFX_SSU_0_OFFSET 2
+#define HCLGE_DBG_DFX_SSU_1_OFFSET 3
+#define HCLGE_DBG_DFX_IGU_OFFSET 4
+#define HCLGE_DBG_DFX_RPU_0_OFFSET 5
+
+#define HCLGE_DBG_DFX_RPU_1_OFFSET 6
+#define HCLGE_DBG_DFX_NCSI_OFFSET 7
+#define HCLGE_DBG_DFX_RTC_OFFSET 8
+#define HCLGE_DBG_DFX_PPP_OFFSET 9
+#define HCLGE_DBG_DFX_RCB_OFFSET 10
+#define HCLGE_DBG_DFX_TQP_OFFSET 11
+
+#define HCLGE_DBG_DFX_SSU_2_OFFSET 12
+
#pragma pack(1)
struct hclge_qos_pri_map_cmd {
@@ -19,5 +47,667 @@ struct hclge_qos_pri_map_cmd {
rev : 4;
};
+struct hclge_dbg_bitmap_cmd {
+ union {
+ u8 bitmap;
+ struct {
+ u8 bit0 : 1,
+ bit1 : 1,
+ bit2 : 1,
+ bit3 : 1,
+ bit4 : 1,
+ bit5 : 1,
+ bit6 : 1,
+ bit7 : 1;
+ };
+ };
+};
+
+struct hclge_dbg_dfx_message {
+ int flag;
+ char message[60];
+};
+
#pragma pack()
+
+static struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = {
+ {false, "Reserved"},
+ {true, "BP_CPU_STATE"},
+ {true, "DFX_MSIX_INFO_NIC_0"},
+ {true, "DFX_MSIX_INFO_NIC_1"},
+ {true, "DFX_MSIX_INFO_NIC_2"},
+ {true, "DFX_MSIX_INFO_NIC_3"},
+
+ {true, "DFX_MSIX_INFO_ROC_0"},
+ {true, "DFX_MSIX_INFO_ROC_1"},
+ {true, "DFX_MSIX_INFO_ROC_2"},
+ {true, "DFX_MSIX_INFO_ROC_3"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_0[] = {
+ {false, "Reserved"},
+ {true, "SSU_ETS_PORT_STATUS"},
+ {true, "SSU_ETS_TCG_STATUS"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {true, "SSU_BP_STATUS_0"},
+
+ {true, "SSU_BP_STATUS_1"},
+ {true, "SSU_BP_STATUS_2"},
+ {true, "SSU_BP_STATUS_3"},
+ {true, "SSU_BP_STATUS_4"},
+ {true, "SSU_BP_STATUS_5"},
+ {true, "SSU_MAC_TX_PFC_IND"},
+
+ {true, "MAC_SSU_RX_PFC_IND"},
+ {true, "BTMP_AGEING_ST_B0"},
+ {true, "BTMP_AGEING_ST_B1"},
+ {true, "BTMP_AGEING_ST_B2"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+
+ {true, "FULL_DROP_NUM"},
+ {true, "PART_DROP_NUM"},
+ {true, "PPP_KEY_DROP_NUM"},
+ {true, "PPP_RLT_DROP_NUM"},
+ {true, "LO_PRI_UNICAST_RLT_DROP_NUM"},
+ {true, "HI_PRI_MULTICAST_RLT_DROP_NUM"},
+
+ {true, "LO_PRI_MULTICAST_RLT_DROP_NUM"},
+ {true, "NCSI_PACKET_CURR_BUFFER_CNT"},
+ {true, "BTMP_AGEING_RLS_CNT_BANK0"},
+ {true, "BTMP_AGEING_RLS_CNT_BANK1"},
+ {true, "BTMP_AGEING_RLS_CNT_BANK2"},
+ {true, "SSU_MB_RD_RLT_DROP_CNT"},
+
+ {true, "SSU_PPP_MAC_KEY_NUM_L"},
+ {true, "SSU_PPP_MAC_KEY_NUM_H"},
+ {true, "SSU_PPP_HOST_KEY_NUM_L"},
+ {true, "SSU_PPP_HOST_KEY_NUM_H"},
+ {true, "PPP_SSU_MAC_RLT_NUM_L"},
+ {true, "PPP_SSU_MAC_RLT_NUM_H"},
+
+ {true, "PPP_SSU_HOST_RLT_NUM_L"},
+ {true, "PPP_SSU_HOST_RLT_NUM_H"},
+ {true, "NCSI_RX_PACKET_IN_CNT_L"},
+ {true, "NCSI_RX_PACKET_IN_CNT_H"},
+ {true, "NCSI_TX_PACKET_OUT_CNT_L"},
+ {true, "NCSI_TX_PACKET_OUT_CNT_H"},
+
+ {true, "SSU_KEY_DROP_NUM"},
+ {true, "MB_UNCOPY_NUM"},
+ {true, "RX_OQ_DROP_PKT_CNT"},
+ {true, "TX_OQ_DROP_PKT_CNT"},
+ {true, "BANK_UNBALANCE_DROP_CNT"},
+ {true, "BANK_UNBALANCE_RX_DROP_CNT"},
+
+ {true, "NIC_L2_ERR_DROP_PKT_CNT"},
+ {true, "ROC_L2_ERR_DROP_PKT_CNT"},
+ {true, "NIC_L2_ERR_DROP_PKT_CNT_RX"},
+ {true, "ROC_L2_ERR_DROP_PKT_CNT_RX"},
+ {true, "RX_OQ_GLB_DROP_PKT_CNT"},
+ {false, "Reserved"},
+
+ {true, "LO_PRI_UNICAST_CUR_CNT"},
+ {true, "HI_PRI_MULTICAST_CUR_CNT"},
+ {true, "LO_PRI_MULTICAST_CUR_CNT"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_1[] = {
+ {true, "prt_id"},
+ {true, "PACKET_TC_CURR_BUFFER_CNT_0"},
+ {true, "PACKET_TC_CURR_BUFFER_CNT_1"},
+ {true, "PACKET_TC_CURR_BUFFER_CNT_2"},
+ {true, "PACKET_TC_CURR_BUFFER_CNT_3"},
+ {true, "PACKET_TC_CURR_BUFFER_CNT_4"},
+
+ {true, "PACKET_TC_CURR_BUFFER_CNT_5"},
+ {true, "PACKET_TC_CURR_BUFFER_CNT_6"},
+ {true, "PACKET_TC_CURR_BUFFER_CNT_7"},
+ {true, "PACKET_CURR_BUFFER_CNT"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+
+ {true, "RX_PACKET_IN_CNT_L"},
+ {true, "RX_PACKET_IN_CNT_H"},
+ {true, "RX_PACKET_OUT_CNT_L"},
+ {true, "RX_PACKET_OUT_CNT_H"},
+ {true, "TX_PACKET_IN_CNT_L"},
+ {true, "TX_PACKET_IN_CNT_H"},
+
+ {true, "TX_PACKET_OUT_CNT_L"},
+ {true, "TX_PACKET_OUT_CNT_H"},
+ {true, "ROC_RX_PACKET_IN_CNT_L"},
+ {true, "ROC_RX_PACKET_IN_CNT_H"},
+ {true, "ROC_TX_PACKET_OUT_CNT_L"},
+ {true, "ROC_TX_PACKET_OUT_CNT_H"},
+
+ {true, "RX_PACKET_TC_IN_CNT_0_L"},
+ {true, "RX_PACKET_TC_IN_CNT_0_H"},
+ {true, "RX_PACKET_TC_IN_CNT_1_L"},
+ {true, "RX_PACKET_TC_IN_CNT_1_H"},
+ {true, "RX_PACKET_TC_IN_CNT_2_L"},
+ {true, "RX_PACKET_TC_IN_CNT_2_H"},
+
+ {true, "RX_PACKET_TC_IN_CNT_3_L"},
+ {true, "RX_PACKET_TC_IN_CNT_3_H"},
+ {true, "RX_PACKET_TC_IN_CNT_4_L"},
+ {true, "RX_PACKET_TC_IN_CNT_4_H"},
+ {true, "RX_PACKET_TC_IN_CNT_5_L"},
+ {true, "RX_PACKET_TC_IN_CNT_5_H"},
+
+ {true, "RX_PACKET_TC_IN_CNT_6_L"},
+ {true, "RX_PACKET_TC_IN_CNT_6_H"},
+ {true, "RX_PACKET_TC_IN_CNT_7_L"},
+ {true, "RX_PACKET_TC_IN_CNT_7_H"},
+ {true, "RX_PACKET_TC_OUT_CNT_0_L"},
+ {true, "RX_PACKET_TC_OUT_CNT_0_H"},
+
+ {true, "RX_PACKET_TC_OUT_CNT_1_L"},
+ {true, "RX_PACKET_TC_OUT_CNT_1_H"},
+ {true, "RX_PACKET_TC_OUT_CNT_2_L"},
+ {true, "RX_PACKET_TC_OUT_CNT_2_H"},
+ {true, "RX_PACKET_TC_OUT_CNT_3_L"},
+ {true, "RX_PACKET_TC_OUT_CNT_3_H"},
+
+ {true, "RX_PACKET_TC_OUT_CNT_4_L"},
+ {true, "RX_PACKET_TC_OUT_CNT_4_H"},
+ {true, "RX_PACKET_TC_OUT_CNT_5_L"},
+ {true, "RX_PACKET_TC_OUT_CNT_5_H"},
+ {true, "RX_PACKET_TC_OUT_CNT_6_L"},
+ {true, "RX_PACKET_TC_OUT_CNT_6_H"},
+
+ {true, "RX_PACKET_TC_OUT_CNT_7_L"},
+ {true, "RX_PACKET_TC_OUT_CNT_7_H"},
+ {true, "TX_PACKET_TC_IN_CNT_0_L"},
+ {true, "TX_PACKET_TC_IN_CNT_0_H"},
+ {true, "TX_PACKET_TC_IN_CNT_1_L"},
+ {true, "TX_PACKET_TC_IN_CNT_1_H"},
+
+ {true, "TX_PACKET_TC_IN_CNT_2_L"},
+ {true, "TX_PACKET_TC_IN_CNT_2_H"},
+ {true, "TX_PACKET_TC_IN_CNT_3_L"},
+ {true, "TX_PACKET_TC_IN_CNT_3_H"},
+ {true, "TX_PACKET_TC_IN_CNT_4_L"},
+ {true, "TX_PACKET_TC_IN_CNT_4_H"},
+
+ {true, "TX_PACKET_TC_IN_CNT_5_L"},
+ {true, "TX_PACKET_TC_IN_CNT_5_H"},
+ {true, "TX_PACKET_TC_IN_CNT_6_L"},
+ {true, "TX_PACKET_TC_IN_CNT_6_H"},
+ {true, "TX_PACKET_TC_IN_CNT_7_L"},
+ {true, "TX_PACKET_TC_IN_CNT_7_H"},
+
+ {true, "TX_PACKET_TC_OUT_CNT_0_L"},
+ {true, "TX_PACKET_TC_OUT_CNT_0_H"},
+ {true, "TX_PACKET_TC_OUT_CNT_1_L"},
+ {true, "TX_PACKET_TC_OUT_CNT_1_H"},
+ {true, "TX_PACKET_TC_OUT_CNT_2_L"},
+ {true, "TX_PACKET_TC_OUT_CNT_2_H"},
+
+ {true, "TX_PACKET_TC_OUT_CNT_3_L"},
+ {true, "TX_PACKET_TC_OUT_CNT_3_H"},
+ {true, "TX_PACKET_TC_OUT_CNT_4_L"},
+ {true, "TX_PACKET_TC_OUT_CNT_4_H"},
+ {true, "TX_PACKET_TC_OUT_CNT_5_L"},
+ {true, "TX_PACKET_TC_OUT_CNT_5_H"},
+
+ {true, "TX_PACKET_TC_OUT_CNT_6_L"},
+ {true, "TX_PACKET_TC_OUT_CNT_6_H"},
+ {true, "TX_PACKET_TC_OUT_CNT_7_L"},
+ {true, "TX_PACKET_TC_OUT_CNT_7_H"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_2[] = {
+ {true, "OQ_INDEX"},
+ {true, "QUEUE_CNT"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_igu_egu_reg[] = {
+ {true, "prt_id"},
+ {true, "IGU_RX_ERR_PKT"},
+ {true, "IGU_RX_NO_SOF_PKT"},
+ {true, "EGU_TX_1588_SHORT_PKT"},
+ {true, "EGU_TX_1588_PKT"},
+ {true, "EGU_TX_ERR_PKT"},
+
+ {true, "IGU_RX_OUT_L2_PKT"},
+ {true, "IGU_RX_OUT_L3_PKT"},
+ {true, "IGU_RX_OUT_L4_PKT"},
+ {true, "IGU_RX_IN_L2_PKT"},
+ {true, "IGU_RX_IN_L3_PKT"},
+ {true, "IGU_RX_IN_L4_PKT"},
+
+ {true, "IGU_RX_EL3E_PKT"},
+ {true, "IGU_RX_EL4E_PKT"},
+ {true, "IGU_RX_L3E_PKT"},
+ {true, "IGU_RX_L4E_PKT"},
+ {true, "IGU_RX_ROCEE_PKT"},
+ {true, "IGU_RX_OUT_UDP0_PKT"},
+
+ {true, "IGU_RX_IN_UDP0_PKT"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+
+ {true, "IGU_RX_OVERSIZE_PKT_L"},
+ {true, "IGU_RX_OVERSIZE_PKT_H"},
+ {true, "IGU_RX_UNDERSIZE_PKT_L"},
+ {true, "IGU_RX_UNDERSIZE_PKT_H"},
+ {true, "IGU_RX_OUT_ALL_PKT_L"},
+ {true, "IGU_RX_OUT_ALL_PKT_H"},
+
+ {true, "IGU_TX_OUT_ALL_PKT_L"},
+ {true, "IGU_TX_OUT_ALL_PKT_H"},
+ {true, "IGU_RX_UNI_PKT_L"},
+ {true, "IGU_RX_UNI_PKT_H"},
+ {true, "IGU_RX_MULTI_PKT_L"},
+ {true, "IGU_RX_MULTI_PKT_H"},
+
+ {true, "IGU_RX_BROAD_PKT_L"},
+ {true, "IGU_RX_BROAD_PKT_H"},
+ {true, "EGU_TX_OUT_ALL_PKT_L"},
+ {true, "EGU_TX_OUT_ALL_PKT_H"},
+ {true, "EGU_TX_UNI_PKT_L"},
+ {true, "EGU_TX_UNI_PKT_H"},
+
+ {true, "EGU_TX_MULTI_PKT_L"},
+ {true, "EGU_TX_MULTI_PKT_H"},
+ {true, "EGU_TX_BROAD_PKT_L"},
+ {true, "EGU_TX_BROAD_PKT_H"},
+ {true, "IGU_TX_KEY_NUM_L"},
+ {true, "IGU_TX_KEY_NUM_H"},
+
+ {true, "IGU_RX_NON_TUN_PKT_L"},
+ {true, "IGU_RX_NON_TUN_PKT_H"},
+ {true, "IGU_RX_TUN_PKT_L"},
+ {true, "IGU_RX_TUN_PKT_H"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_0[] = {
+ {true, "tc_queue_num"},
+ {true, "FSM_DFX_ST0"},
+ {true, "FSM_DFX_ST1"},
+ {true, "RPU_RX_PKT_DROP_CNT"},
+ {true, "BUF_WAIT_TIMEOUT"},
+ {true, "BUF_WAIT_TIMEOUT_QID"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_1[] = {
+ {false, "Reserved"},
+ {true, "FIFO_DFX_ST0"},
+ {true, "FIFO_DFX_ST1"},
+ {true, "FIFO_DFX_ST2"},
+ {true, "FIFO_DFX_ST3"},
+ {true, "FIFO_DFX_ST4"},
+
+ {true, "FIFO_DFX_ST5"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_ncsi_reg[] = {
+ {false, "Reserved"},
+ {true, "NCSI_EGU_TX_FIFO_STS"},
+ {true, "NCSI_PAUSE_STATUS"},
+ {true, "NCSI_RX_CTRL_DMAC_ERR_CNT"},
+ {true, "NCSI_RX_CTRL_SMAC_ERR_CNT"},
+ {true, "NCSI_RX_CTRL_CKS_ERR_CNT"},
+
+ {true, "NCSI_RX_CTRL_PKT_CNT"},
+ {true, "NCSI_RX_PT_DMAC_ERR_CNT"},
+ {true, "NCSI_RX_PT_SMAC_ERR_CNT"},
+ {true, "NCSI_RX_PT_PKT_CNT"},
+ {true, "NCSI_RX_FCS_ERR_CNT"},
+ {true, "NCSI_TX_CTRL_DMAC_ERR_CNT"},
+
+ {true, "NCSI_TX_CTRL_SMAC_ERR_CNT"},
+ {true, "NCSI_TX_CTRL_PKT_CNT"},
+ {true, "NCSI_TX_PT_DMAC_ERR_CNT"},
+ {true, "NCSI_TX_PT_SMAC_ERR_CNT"},
+ {true, "NCSI_TX_PT_PKT_CNT"},
+ {true, "NCSI_TX_PT_PKT_TRUNC_CNT"},
+
+ {true, "NCSI_TX_PT_PKT_ERR_CNT"},
+ {true, "NCSI_TX_CTRL_PKT_ERR_CNT"},
+ {true, "NCSI_RX_CTRL_PKT_TRUNC_CNT"},
+ {true, "NCSI_RX_CTRL_PKT_CFLIT_CNT"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+
+ {true, "NCSI_MAC_RX_OCTETS_OK"},
+ {true, "NCSI_MAC_RX_OCTETS_BAD"},
+ {true, "NCSI_MAC_RX_UC_PKTS"},
+ {true, "NCSI_MAC_RX_MC_PKTS"},
+ {true, "NCSI_MAC_RX_BC_PKTS"},
+ {true, "NCSI_MAC_RX_PKTS_64OCTETS"},
+
+ {true, "NCSI_MAC_RX_PKTS_65TO127OCTETS"},
+ {true, "NCSI_MAC_RX_PKTS_128TO255OCTETS"},
+ {true, "NCSI_MAC_RX_PKTS_255TO511OCTETS"},
+ {true, "NCSI_MAC_RX_PKTS_512TO1023OCTETS"},
+ {true, "NCSI_MAC_RX_PKTS_1024TO1518OCTETS"},
+ {true, "NCSI_MAC_RX_PKTS_1519TOMAXOCTETS"},
+
+ {true, "NCSI_MAC_RX_FCS_ERRORS"},
+ {true, "NCSI_MAC_RX_LONG_ERRORS"},
+ {true, "NCSI_MAC_RX_JABBER_ERRORS"},
+ {true, "NCSI_MAC_RX_RUNT_ERR_CNT"},
+ {true, "NCSI_MAC_RX_SHORT_ERR_CNT"},
+ {true, "NCSI_MAC_RX_FILT_PKT_CNT"},
+
+ {true, "NCSI_MAC_RX_OCTETS_TOTAL_FILT"},
+ {true, "NCSI_MAC_TX_OCTETS_OK"},
+ {true, "NCSI_MAC_TX_OCTETS_BAD"},
+ {true, "NCSI_MAC_TX_UC_PKTS"},
+ {true, "NCSI_MAC_TX_MC_PKTS"},
+ {true, "NCSI_MAC_TX_BC_PKTS"},
+
+ {true, "NCSI_MAC_TX_PKTS_64OCTETS"},
+ {true, "NCSI_MAC_TX_PKTS_65TO127OCTETS"},
+ {true, "NCSI_MAC_TX_PKTS_128TO255OCTETS"},
+ {true, "NCSI_MAC_TX_PKTS_256TO511OCTETS"},
+ {true, "NCSI_MAC_TX_PKTS_512TO1023OCTETS"},
+ {true, "NCSI_MAC_TX_PKTS_1024TO1518OCTETS"},
+
+ {true, "NCSI_MAC_TX_PKTS_1519TOMAXOCTETS"},
+ {true, "NCSI_MAC_TX_UNDERRUN"},
+ {true, "NCSI_MAC_TX_CRC_ERROR"},
+ {true, "NCSI_MAC_TX_PAUSE_FRAMES"},
+ {true, "NCSI_MAC_RX_PAD_PKTS"},
+ {true, "NCSI_MAC_RX_PAUSE_FRAMES"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_rtc_reg[] = {
+ {false, "Reserved"},
+ {true, "LGE_IGU_AFIFO_DFX_0"},
+ {true, "LGE_IGU_AFIFO_DFX_1"},
+ {true, "LGE_IGU_AFIFO_DFX_2"},
+ {true, "LGE_IGU_AFIFO_DFX_3"},
+ {true, "LGE_IGU_AFIFO_DFX_4"},
+
+ {true, "LGE_IGU_AFIFO_DFX_5"},
+ {true, "LGE_IGU_AFIFO_DFX_6"},
+ {true, "LGE_IGU_AFIFO_DFX_7"},
+ {true, "LGE_EGU_AFIFO_DFX_0"},
+ {true, "LGE_EGU_AFIFO_DFX_1"},
+ {true, "LGE_EGU_AFIFO_DFX_2"},
+
+ {true, "LGE_EGU_AFIFO_DFX_3"},
+ {true, "LGE_EGU_AFIFO_DFX_4"},
+ {true, "LGE_EGU_AFIFO_DFX_5"},
+ {true, "LGE_EGU_AFIFO_DFX_6"},
+ {true, "LGE_EGU_AFIFO_DFX_7"},
+ {true, "CGE_IGU_AFIFO_DFX_0"},
+
+ {true, "CGE_IGU_AFIFO_DFX_1"},
+ {true, "CGE_EGU_AFIFO_DFX_0"},
+ {true, "CGE_EGU_AFIFO_DFX_1"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_ppp_reg[] = {
+ {false, "Reserved"},
+ {true, "DROP_FROM_PRT_PKT_CNT"},
+ {true, "DROP_FROM_HOST_PKT_CNT"},
+ {true, "DROP_TX_VLAN_PROC_CNT"},
+ {true, "DROP_MNG_CNT"},
+ {true, "DROP_FD_CNT"},
+
+ {true, "DROP_NO_DST_CNT"},
+ {true, "DROP_MC_MBID_FULL_CNT"},
+ {true, "DROP_SC_FILTERED"},
+ {true, "PPP_MC_DROP_PKT_CNT"},
+ {true, "DROP_PT_CNT"},
+ {true, "DROP_MAC_ANTI_SPOOF_CNT"},
+
+ {true, "DROP_IG_VFV_CNT"},
+ {true, "DROP_IG_PRTV_CNT"},
+ {true, "DROP_CNM_PFC_PAUSE_CNT"},
+ {true, "DROP_TORUS_TC_CNT"},
+ {true, "DROP_TORUS_LPBK_CNT"},
+ {true, "PPP_HFS_STS"},
+
+ {true, "PPP_MC_RSLT_STS"},
+ {true, "PPP_P3U_STS"},
+ {true, "PPP_RSLT_DESCR_STS"},
+ {true, "PPP_UMV_STS_0"},
+ {true, "PPP_UMV_STS_1"},
+ {true, "PPP_VFV_STS"},
+
+ {true, "PPP_GRO_KEY_CNT"},
+ {true, "PPP_GRO_INFO_CNT"},
+ {true, "PPP_GRO_DROP_CNT"},
+ {true, "PPP_GRO_OUT_CNT"},
+ {true, "PPP_GRO_KEY_MATCH_DATA_CNT"},
+ {true, "PPP_GRO_KEY_MATCH_TCAM_CNT"},
+
+ {true, "PPP_GRO_INFO_MATCH_CNT"},
+ {true, "PPP_GRO_FREE_ENTRY_CNT"},
+ {true, "PPP_GRO_INNER_DFX_SIGNAL"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+
+ {true, "GET_RX_PKT_CNT_L"},
+ {true, "GET_RX_PKT_CNT_H"},
+ {true, "GET_TX_PKT_CNT_L"},
+ {true, "GET_TX_PKT_CNT_H"},
+ {true, "SEND_UC_PRT2HOST_PKT_CNT_L"},
+ {true, "SEND_UC_PRT2HOST_PKT_CNT_H"},
+
+ {true, "SEND_UC_PRT2PRT_PKT_CNT_L"},
+ {true, "SEND_UC_PRT2PRT_PKT_CNT_H"},
+ {true, "SEND_UC_HOST2HOST_PKT_CNT_L"},
+ {true, "SEND_UC_HOST2HOST_PKT_CNT_H"},
+ {true, "SEND_UC_HOST2PRT_PKT_CNT_L"},
+ {true, "SEND_UC_HOST2PRT_PKT_CNT_H"},
+
+ {true, "SEND_MC_FROM_PRT_CNT_L"},
+ {true, "SEND_MC_FROM_PRT_CNT_H"},
+ {true, "SEND_MC_FROM_HOST_CNT_L"},
+ {true, "SEND_MC_FROM_HOST_CNT_H"},
+ {true, "SSU_MC_RD_CNT_L"},
+ {true, "SSU_MC_RD_CNT_H"},
+
+ {true, "SSU_MC_DROP_CNT_L"},
+ {true, "SSU_MC_DROP_CNT_H"},
+ {true, "SSU_MC_RD_PKT_CNT_L"},
+ {true, "SSU_MC_RD_PKT_CNT_H"},
+ {true, "PPP_MC_2HOST_PKT_CNT_L"},
+ {true, "PPP_MC_2HOST_PKT_CNT_H"},
+
+ {true, "PPP_MC_2PRT_PKT_CNT_L"},
+ {true, "PPP_MC_2PRT_PKT_CNT_H"},
+ {true, "NTSNOS_PKT_CNT_L"},
+ {true, "NTSNOS_PKT_CNT_H"},
+ {true, "NTUP_PKT_CNT_L"},
+ {true, "NTUP_PKT_CNT_H"},
+
+ {true, "NTLCL_PKT_CNT_L"},
+ {true, "NTLCL_PKT_CNT_H"},
+ {true, "NTTGT_PKT_CNT_L"},
+ {true, "NTTGT_PKT_CNT_H"},
+ {true, "RTNS_PKT_CNT_L"},
+ {true, "RTNS_PKT_CNT_H"},
+
+ {true, "RTLPBK_PKT_CNT_L"},
+ {true, "RTLPBK_PKT_CNT_H"},
+ {true, "NR_PKT_CNT_L"},
+ {true, "NR_PKT_CNT_H"},
+ {true, "RR_PKT_CNT_L"},
+ {true, "RR_PKT_CNT_H"},
+
+ {true, "MNG_TBL_HIT_CNT_L"},
+ {true, "MNG_TBL_HIT_CNT_H"},
+ {true, "FD_TBL_HIT_CNT_L"},
+ {true, "FD_TBL_HIT_CNT_H"},
+ {true, "FD_LKUP_CNT_L"},
+ {true, "FD_LKUP_CNT_H"},
+
+ {true, "BC_HIT_CNT_L"},
+ {true, "BC_HIT_CNT_H"},
+ {true, "UM_TBL_UC_HIT_CNT_L"},
+ {true, "UM_TBL_UC_HIT_CNT_H"},
+ {true, "UM_TBL_MC_HIT_CNT_L"},
+ {true, "UM_TBL_MC_HIT_CNT_H"},
+
+ {true, "UM_TBL_VMDQ1_HIT_CNT_L"},
+ {true, "UM_TBL_VMDQ1_HIT_CNT_H"},
+ {true, "MTA_TBL_HIT_CNT_L"},
+ {true, "MTA_TBL_HIT_CNT_H"},
+ {true, "FWD_BONDING_HIT_CNT_L"},
+ {true, "FWD_BONDING_HIT_CNT_H"},
+
+ {true, "PROMIS_TBL_HIT_CNT_L"},
+ {true, "PROMIS_TBL_HIT_CNT_H"},
+ {true, "GET_TUNL_PKT_CNT_L"},
+ {true, "GET_TUNL_PKT_CNT_H"},
+ {true, "GET_BMC_PKT_CNT_L"},
+ {true, "GET_BMC_PKT_CNT_H"},
+
+ {true, "SEND_UC_PRT2BMC_PKT_CNT_L"},
+ {true, "SEND_UC_PRT2BMC_PKT_CNT_H"},
+ {true, "SEND_UC_HOST2BMC_PKT_CNT_L"},
+ {true, "SEND_UC_HOST2BMC_PKT_CNT_H"},
+ {true, "SEND_UC_BMC2HOST_PKT_CNT_L"},
+ {true, "SEND_UC_BMC2HOST_PKT_CNT_H"},
+
+ {true, "SEND_UC_BMC2PRT_PKT_CNT_L"},
+ {true, "SEND_UC_BMC2PRT_PKT_CNT_H"},
+ {true, "PPP_MC_2BMC_PKT_CNT_L"},
+ {true, "PPP_MC_2BMC_PKT_CNT_H"},
+ {true, "VLAN_MIRR_CNT_L"},
+ {true, "VLAN_MIRR_CNT_H"},
+
+ {true, "IG_MIRR_CNT_L"},
+ {true, "IG_MIRR_CNT_H"},
+ {true, "EG_MIRR_CNT_L"},
+ {true, "EG_MIRR_CNT_H"},
+ {true, "RX_DEFAULT_HOST_HIT_CNT_L"},
+ {true, "RX_DEFAULT_HOST_HIT_CNT_H"},
+
+ {true, "LAN_PAIR_CNT_L"},
+ {true, "LAN_PAIR_CNT_H"},
+ {true, "UM_TBL_MC_HIT_PKT_CNT_L"},
+ {true, "UM_TBL_MC_HIT_PKT_CNT_H"},
+ {true, "MTA_TBL_HIT_PKT_CNT_L"},
+ {true, "MTA_TBL_HIT_PKT_CNT_H"},
+
+ {true, "PROMIS_TBL_HIT_PKT_CNT_L"},
+ {true, "PROMIS_TBL_HIT_PKT_CNT_H"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_rcb_reg[] = {
+ {false, "Reserved"},
+ {true, "FSM_DFX_ST0"},
+ {true, "FSM_DFX_ST1"},
+ {true, "FSM_DFX_ST2"},
+ {true, "FIFO_DFX_ST0"},
+ {true, "FIFO_DFX_ST1"},
+
+ {true, "FIFO_DFX_ST2"},
+ {true, "FIFO_DFX_ST3"},
+ {true, "FIFO_DFX_ST4"},
+ {true, "FIFO_DFX_ST5"},
+ {true, "FIFO_DFX_ST6"},
+ {true, "FIFO_DFX_ST7"},
+
+ {true, "FIFO_DFX_ST8"},
+ {true, "FIFO_DFX_ST9"},
+ {true, "FIFO_DFX_ST10"},
+ {true, "FIFO_DFX_ST11"},
+ {true, "Q_CREDIT_VLD_0"},
+ {true, "Q_CREDIT_VLD_1"},
+
+ {true, "Q_CREDIT_VLD_2"},
+ {true, "Q_CREDIT_VLD_3"},
+ {true, "Q_CREDIT_VLD_4"},
+ {true, "Q_CREDIT_VLD_5"},
+ {true, "Q_CREDIT_VLD_6"},
+ {true, "Q_CREDIT_VLD_7"},
+
+ {true, "Q_CREDIT_VLD_8"},
+ {true, "Q_CREDIT_VLD_9"},
+ {true, "Q_CREDIT_VLD_10"},
+ {true, "Q_CREDIT_VLD_11"},
+ {true, "Q_CREDIT_VLD_12"},
+ {true, "Q_CREDIT_VLD_13"},
+
+ {true, "Q_CREDIT_VLD_14"},
+ {true, "Q_CREDIT_VLD_15"},
+ {true, "Q_CREDIT_VLD_16"},
+ {true, "Q_CREDIT_VLD_17"},
+ {true, "Q_CREDIT_VLD_18"},
+ {true, "Q_CREDIT_VLD_19"},
+
+ {true, "Q_CREDIT_VLD_20"},
+ {true, "Q_CREDIT_VLD_21"},
+ {true, "Q_CREDIT_VLD_22"},
+ {true, "Q_CREDIT_VLD_23"},
+ {true, "Q_CREDIT_VLD_24"},
+ {true, "Q_CREDIT_VLD_25"},
+
+ {true, "Q_CREDIT_VLD_26"},
+ {true, "Q_CREDIT_VLD_27"},
+ {true, "Q_CREDIT_VLD_28"},
+ {true, "Q_CREDIT_VLD_29"},
+ {true, "Q_CREDIT_VLD_30"},
+ {true, "Q_CREDIT_VLD_31"},
+
+ {true, "GRO_BD_SERR_CNT"},
+ {true, "GRO_CONTEXT_SERR_CNT"},
+ {true, "RX_STASH_CFG_SERR_CNT"},
+ {true, "AXI_RD_FBD_SERR_CNT"},
+ {true, "GRO_BD_MERR_CNT"},
+ {true, "GRO_CONTEXT_MERR_CNT"},
+
+ {true, "RX_STASH_CFG_MERR_CNT"},
+ {true, "AXI_RD_FBD_MERR_CNT"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+ {false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_tqp_reg[] = {
+ {true, "q_num"},
+ {true, "RCB_CFG_RX_RING_TAIL"},
+ {true, "RCB_CFG_RX_RING_HEAD"},
+ {true, "RCB_CFG_RX_RING_FBDNUM"},
+ {true, "RCB_CFG_RX_RING_OFFSET"},
+ {true, "RCB_CFG_RX_RING_FBDOFFSET"},
+
+ {true, "RCB_CFG_RX_RING_PKTNUM_RECORD"},
+ {true, "RCB_CFG_TX_RING_TAIL"},
+ {true, "RCB_CFG_TX_RING_HEAD"},
+ {true, "RCB_CFG_TX_RING_FBDNUM"},
+ {true, "RCB_CFG_TX_RING_OFFSET"},
+ {true, "RCB_CFG_TX_RING_EBDNUM"},
+};
+
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 22380202ba83..d0e84de39fa3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -26,6 +26,8 @@
#define HCLGE_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
#define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f))
+#define HCLGE_BUF_SIZE_UNIT 256
+
static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps);
static int hclge_init_vlan_config(struct hclge_dev *hdev);
static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
@@ -687,6 +689,22 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
hdev->num_tqps = __le16_to_cpu(req->tqp_num);
hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
+ if (req->tx_buf_size)
+ hdev->tx_buf_size =
+ __le16_to_cpu(req->tx_buf_size) << HCLGE_BUF_UNIT_S;
+ else
+ hdev->tx_buf_size = HCLGE_DEFAULT_TX_BUF;
+
+ hdev->tx_buf_size = roundup(hdev->tx_buf_size, HCLGE_BUF_SIZE_UNIT);
+
+ if (req->dv_buf_size)
+ hdev->dv_buf_size =
+ __le16_to_cpu(req->dv_buf_size) << HCLGE_BUF_UNIT_S;
+ else
+ hdev->dv_buf_size = HCLGE_DEFAULT_DV;
+
+ hdev->dv_buf_size = roundup(hdev->dv_buf_size, HCLGE_BUF_SIZE_UNIT);
+
if (hnae3_dev_roce_supported(hdev)) {
hdev->roce_base_msix_offset =
hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee),
@@ -1368,40 +1386,51 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev,
{
u32 shared_buf_min, shared_buf_tc, shared_std;
int tc_num, pfc_enable_num;
- u32 shared_buf;
+ u32 shared_buf, aligned_mps;
u32 rx_priv;
int i;
tc_num = hclge_get_tc_num(hdev);
pfc_enable_num = hclge_get_pfc_enalbe_num(hdev);
+ aligned_mps = roundup(hdev->mps, HCLGE_BUF_SIZE_UNIT);
if (hnae3_dev_dcb_supported(hdev))
- shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_DV;
+ shared_buf_min = 2 * aligned_mps + hdev->dv_buf_size;
else
- shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_NON_DCB_DV;
+ shared_buf_min = aligned_mps + HCLGE_NON_DCB_ADDITIONAL_BUF
+ + hdev->dv_buf_size;
- shared_buf_tc = pfc_enable_num * hdev->mps +
- (tc_num - pfc_enable_num) * hdev->mps / 2 +
- hdev->mps;
- shared_std = max_t(u32, shared_buf_min, shared_buf_tc);
+ shared_buf_tc = pfc_enable_num * aligned_mps +
+ (tc_num - pfc_enable_num) * aligned_mps / 2 +
+ aligned_mps;
+ shared_std = roundup(max_t(u32, shared_buf_min, shared_buf_tc),
+ HCLGE_BUF_SIZE_UNIT);
rx_priv = hclge_get_rx_priv_buff_alloced(buf_alloc);
- if (rx_all <= rx_priv + shared_std)
+ if (rx_all < rx_priv + shared_std)
return false;
- shared_buf = rx_all - rx_priv;
+ shared_buf = rounddown(rx_all - rx_priv, HCLGE_BUF_SIZE_UNIT);
buf_alloc->s_buf.buf_size = shared_buf;
- buf_alloc->s_buf.self.high = shared_buf;
- buf_alloc->s_buf.self.low = 2 * hdev->mps;
+ if (hnae3_dev_dcb_supported(hdev)) {
+ buf_alloc->s_buf.self.high = shared_buf - hdev->dv_buf_size;
+ buf_alloc->s_buf.self.low = buf_alloc->s_buf.self.high
+ - roundup(aligned_mps / 2, HCLGE_BUF_SIZE_UNIT);
+ } else {
+ buf_alloc->s_buf.self.high = aligned_mps +
+ HCLGE_NON_DCB_ADDITIONAL_BUF;
+ buf_alloc->s_buf.self.low =
+ roundup(aligned_mps / 2, HCLGE_BUF_SIZE_UNIT);
+ }
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
if ((hdev->hw_tc_map & BIT(i)) &&
(hdev->tm_info.hw_pfc_map & BIT(i))) {
- buf_alloc->s_buf.tc_thrd[i].low = hdev->mps;
- buf_alloc->s_buf.tc_thrd[i].high = 2 * hdev->mps;
+ buf_alloc->s_buf.tc_thrd[i].low = aligned_mps;
+ buf_alloc->s_buf.tc_thrd[i].high = 2 * aligned_mps;
} else {
buf_alloc->s_buf.tc_thrd[i].low = 0;
- buf_alloc->s_buf.tc_thrd[i].high = hdev->mps;
+ buf_alloc->s_buf.tc_thrd[i].high = aligned_mps;
}
}
@@ -1419,11 +1448,11 @@ static int hclge_tx_buffer_calc(struct hclge_dev *hdev,
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
- if (total_size < HCLGE_DEFAULT_TX_BUF)
+ if (total_size < hdev->tx_buf_size)
return -ENOMEM;
if (hdev->hw_tc_map & BIT(i))
- priv->tx_buf_size = HCLGE_DEFAULT_TX_BUF;
+ priv->tx_buf_size = hdev->tx_buf_size;
else
priv->tx_buf_size = 0;
@@ -1441,7 +1470,6 @@ static int hclge_tx_buffer_calc(struct hclge_dev *hdev,
static int hclge_rx_buffer_calc(struct hclge_dev *hdev,
struct hclge_pkt_buf_alloc *buf_alloc)
{
-#define HCLGE_BUF_SIZE_UNIT 128
u32 rx_all = hdev->pkt_buf_size, aligned_mps;
int no_pfc_priv_num, pfc_priv_num;
struct hclge_priv_buf *priv;
@@ -1467,13 +1495,16 @@ static int hclge_rx_buffer_calc(struct hclge_dev *hdev,
priv->enable = 1;
if (hdev->tm_info.hw_pfc_map & BIT(i)) {
priv->wl.low = aligned_mps;
- priv->wl.high = priv->wl.low + aligned_mps;
+ priv->wl.high =
+ roundup(priv->wl.low + aligned_mps,
+ HCLGE_BUF_SIZE_UNIT);
priv->buf_size = priv->wl.high +
- HCLGE_DEFAULT_DV;
+ hdev->dv_buf_size;
} else {
priv->wl.low = 0;
priv->wl.high = 2 * aligned_mps;
- priv->buf_size = priv->wl.high;
+ priv->buf_size = priv->wl.high +
+ hdev->dv_buf_size;
}
} else {
priv->enable = 0;
@@ -1503,13 +1534,13 @@ static int hclge_rx_buffer_calc(struct hclge_dev *hdev,
priv->enable = 1;
if (hdev->tm_info.hw_pfc_map & BIT(i)) {
- priv->wl.low = 128;
+ priv->wl.low = 256;
priv->wl.high = priv->wl.low + aligned_mps;
- priv->buf_size = priv->wl.high + HCLGE_DEFAULT_DV;
+ priv->buf_size = priv->wl.high + hdev->dv_buf_size;
} else {
priv->wl.low = 0;
priv->wl.high = aligned_mps;
- priv->buf_size = priv->wl.high;
+ priv->buf_size = priv->wl.high + hdev->dv_buf_size;
}
}
@@ -2810,7 +2841,6 @@ static void hclge_reset(struct hclge_dev *hdev)
*/
ae_dev->reset_type = hdev->reset_type;
hdev->reset_count++;
- hdev->last_reset_time = jiffies;
/* perform reset of the stack & ae device for a client */
ret = hclge_notify_roce_client(hdev, HNAE3_DOWN_CLIENT);
if (ret)
@@ -2873,6 +2903,10 @@ static void hclge_reset(struct hclge_dev *hdev)
if (ret)
goto err_reset;
+ hdev->last_reset_time = jiffies;
+ hdev->reset_fail_cnt = 0;
+ ae_dev->reset_type = HNAE3_NONE_RESET;
+
return;
err_reset_lock:
@@ -6602,8 +6636,7 @@ static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
return hnae3_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
}
-static u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle,
- u16 queue_id)
+u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id)
{
struct hnae3_queue *queue;
struct hclge_tqp *tqp;
@@ -7378,19 +7411,6 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
return ret;
}
- ret = hclge_get_cap(hdev);
- if (ret) {
- dev_err(&pdev->dev, "get hw capability error, ret = %d.\n",
- ret);
- return ret;
- }
-
- ret = hclge_configure(hdev);
- if (ret) {
- dev_err(&pdev->dev, "Configure dev error, ret = %d.\n", ret);
- return ret;
- }
-
ret = hclge_map_tqp(hdev);
if (ret) {
dev_err(&pdev->dev, "Map tqp error, ret = %d.\n", ret);
@@ -7975,6 +7995,7 @@ static const struct hnae3_ae_ops hclge_ops = {
.ae_dev_resetting = hclge_ae_dev_resetting,
.ae_dev_reset_cnt = hclge_ae_dev_reset_cnt,
.set_gro_en = hclge_gro_en,
+ .get_global_queue_id = hclge_covert_handle_qid_global,
};
static struct hnae3_ae_algo ae_algo = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 106fce172563..6615b85a1c52 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -736,6 +736,9 @@ struct hclge_dev {
u32 flag;
u32 pkt_buf_size; /* Total pf buf size for tx/rx */
+ u32 tx_buf_size; /* Tx buffer size for each TC */
+ u32 dv_buf_size; /* Dv buffer size for each TC */
+
u32 mps; /* Max packet size */
/* vport_lock protect resource shared by vports */
struct mutex vport_lock;
@@ -874,4 +877,5 @@ int hclge_vport_start(struct hclge_vport *vport);
void hclge_vport_stop(struct hclge_vport *vport);
int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu);
int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf);
+u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index e16a730a5f54..a1de451a85df 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -413,6 +413,19 @@ static int hclge_set_vf_mtu(struct hclge_vport *vport,
return hclge_gen_resp_to_vf(vport, mbx_req, ret, NULL, 0);
}
+static int hclge_get_queue_id_in_pf(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+ u16 queue_id, qid_in_pf;
+ u8 resp_data[2];
+
+ memcpy(&queue_id, &mbx_req->msg[2], sizeof(queue_id));
+ qid_in_pf = hclge_covert_handle_qid_global(&vport->nic, queue_id);
+ memcpy(resp_data, &qid_in_pf, sizeof(qid_in_pf));
+
+ return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data, 2);
+}
+
static bool hclge_cmd_crq_empty(struct hclge_hw *hw)
{
u32 tail = hclge_read_dev(hw, HCLGE_NIC_CRQ_TAIL_REG);
@@ -533,6 +546,13 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
dev_err(&hdev->pdev->dev,
"VF fail(%d) to set mtu\n", ret);
break;
+ case HCLGE_MBX_GET_QID_IN_PF:
+ ret = hclge_get_queue_id_in_pf(vport, req);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "PF failed(%d) to get qid for VF\n",
+ ret);
+ break;
default:
dev_err(&hdev->pdev->dev,
"un-supported mailbox message, code = %d\n",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 741cb3b9519d..dabb8437f8dc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -12,7 +12,7 @@
SUPPORTED_TP | \
PHY_10BT_FEATURES | \
PHY_100BT_FEATURES | \
- PHY_1000BT_FEATURES)
+ SUPPORTED_1000baseT_Full)
enum hclge_mdio_c22_op_seq {
HCLGE_MDIO_C22_WRITE = 1,
@@ -179,6 +179,10 @@ static void hclge_mac_adjust_link(struct net_device *netdev)
int duplex, speed;
int ret;
+ /* When phy link down, do nothing */
+ if (netdev->phydev->link == 0)
+ return;
+
speed = netdev->phydev->speed;
duplex = netdev->phydev->duplex;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 9c6192c46aa6..b6496a439304 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -40,6 +40,13 @@ struct hclge_nq_to_qs_link_cmd {
__le16 qset_id;
};
+struct hclge_tqp_tx_queue_tc_cmd {
+ __le16 queue_id;
+ __le16 rsvd;
+ u8 tc_id;
+ u8 rev[3];
+};
+
struct hclge_pg_weight_cmd {
u8 pg_id;
u8 dwrr;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 417e078eac62..54ba93ac24dd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -256,6 +256,23 @@ static int hclgevf_get_queue_info(struct hclgevf_dev *hdev)
return 0;
}
+static u16 hclgevf_get_qid_global(struct hnae3_handle *handle, u16 queue_id)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ u8 msg_data[2], resp_data[2];
+ u16 qid_in_pf = 0;
+ int ret;
+
+ memcpy(&msg_data[0], &queue_id, sizeof(queue_id));
+
+ ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_QID_IN_PF, 0, msg_data,
+ 2, true, resp_data, 2);
+ if (!ret)
+ qid_in_pf = *(u16 *)resp_data;
+
+ return qid_in_pf;
+}
+
static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev)
{
struct hclgevf_tqp *tqp;
@@ -1325,6 +1342,9 @@ static int hclgevf_reset(struct hclgevf_dev *hdev)
rtnl_unlock();
+ hdev->last_reset_time = jiffies;
+ ae_dev->reset_type = HNAE3_NONE_RESET;
+
return ret;
err_reset_lock:
rtnl_unlock();
@@ -2384,9 +2404,9 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
if (test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) {
hclgevf_misc_irq_uninit(hdev);
hclgevf_uninit_msi(hdev);
- hclgevf_pci_uninit(hdev);
}
+ hclgevf_pci_uninit(hdev);
hclgevf_cmd_uninit(hdev);
}
@@ -2642,6 +2662,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
.ae_dev_reset_cnt = hclgevf_ae_dev_reset_cnt,
.set_gro_en = hclgevf_gro_en,
.set_mtu = hclgevf_set_mtu,
+ .get_global_queue_id = hclgevf_get_qid_global,
};
static struct hnae3_ae_algo ae_algovf = {
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index ed50b8dee44f..14d00985f087 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -773,11 +773,8 @@ static void release_napi(struct ibmvnic_adapter *adapter)
return;
for (i = 0; i < adapter->num_active_rx_napi; i++) {
- if (&adapter->napi[i]) {
- netdev_dbg(adapter->netdev,
- "Releasing napi[%d]\n", i);
- netif_napi_del(&adapter->napi[i]);
- }
+ netdev_dbg(adapter->netdev, "Releasing napi[%d]\n", i);
+ netif_napi_del(&adapter->napi[i]);
}
kfree(adapter->napi);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 062a88fcc5d6..74d466796b7c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -54,11 +54,8 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, node);
if (!cq) {
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq) {
- en_err(priv, "Failed to allocate CQ structure\n");
- return -ENOMEM;
- }
+ en_err(priv, "Failed to allocate CQ structure\n");
+ return -ENOMEM;
}
cq->size = entries;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index fd09ba98c0a6..9a0881cb7f51 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -271,11 +271,8 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
if (!ring) {
- ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (!ring) {
- en_err(priv, "Failed to allocate RX ring structure\n");
- return -ENOMEM;
- }
+ en_err(priv, "Failed to allocate RX ring structure\n");
+ return -ENOMEM;
}
ring->prod = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 6f5153afcab4..2cbd2bd7c67c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -57,11 +57,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
if (!ring) {
- ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (!ring) {
- en_err(priv, "Failed allocating TX ring\n");
- return -ENOMEM;
- }
+ en_err(priv, "Failed allocating TX ring\n");
+ return -ENOMEM;
}
ring->size = size;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 1f77e97e2d7a..bbf45f10c208 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -20,7 +20,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
spectrum_acl_tcam.o spectrum_acl_ctcam.o \
spectrum_acl_atcam.o spectrum_acl_erp.o \
spectrum1_acl_tcam.o spectrum2_acl_tcam.o \
- spectrum_acl.o \
+ spectrum_acl_bloom_filter.o spectrum_acl.o \
spectrum_flower.o spectrum_cnt.o \
spectrum_fid.o spectrum_ipip.o \
spectrum_acl_flex_actions.o \
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index ace3a897b3f0..9b48dffc9f63 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -2743,7 +2743,7 @@ mlxsw_reg_perpt_pack(char *payload, u8 erpt_bank, u8 erpt_index,
mlxsw_reg_perpt_erpt_bank_set(payload, erpt_bank);
mlxsw_reg_perpt_erpt_index_set(payload, erpt_index);
mlxsw_reg_perpt_key_size_set(payload, key_size);
- mlxsw_reg_perpt_bf_bypass_set(payload, true);
+ mlxsw_reg_perpt_bf_bypass_set(payload, false);
mlxsw_reg_perpt_erp_id_set(payload, erp_id);
mlxsw_reg_perpt_erpt_base_bank_set(payload, erpt_base_bank);
mlxsw_reg_perpt_erpt_base_index_set(payload, erpt_base_index);
@@ -3006,7 +3006,7 @@ static inline void mlxsw_reg_percr_pack(char *payload, u16 region_id)
mlxsw_reg_percr_region_id_set(payload, region_id);
mlxsw_reg_percr_atcam_ignore_prune_set(payload, false);
mlxsw_reg_percr_ctcam_ignore_prune_set(payload, false);
- mlxsw_reg_percr_bf_bypass_set(payload, true);
+ mlxsw_reg_percr_bf_bypass_set(payload, false);
}
/* PERERP - Policy-Engine Region eRP Register
@@ -3095,6 +3095,72 @@ static inline void mlxsw_reg_pererp_pack(char *payload, u16 region_id,
mlxsw_reg_pererp_master_rp_id_set(payload, master_rp_id);
}
+/* PEABFE - Policy-Engine Algorithmic Bloom Filter Entries Register
+ * ----------------------------------------------------------------
+ * This register configures the Bloom filter entries.
+ */
+#define MLXSW_REG_PEABFE_ID 0x3022
+#define MLXSW_REG_PEABFE_BASE_LEN 0x10
+#define MLXSW_REG_PEABFE_BF_REC_LEN 0x4
+#define MLXSW_REG_PEABFE_BF_REC_MAX_COUNT 256
+#define MLXSW_REG_PEABFE_LEN (MLXSW_REG_PEABFE_BASE_LEN + \
+ MLXSW_REG_PEABFE_BF_REC_LEN * \
+ MLXSW_REG_PEABFE_BF_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(peabfe, MLXSW_REG_PEABFE_ID, MLXSW_REG_PEABFE_LEN);
+
+/* reg_peabfe_size
+ * Number of BF entries to be updated.
+ * Range 1..256
+ * Access: Op
+ */
+MLXSW_ITEM32(reg, peabfe, size, 0x00, 0, 9);
+
+/* reg_peabfe_bf_entry_state
+ * Bloom filter state
+ * 0 - Clear
+ * 1 - Set
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_state,
+ MLXSW_REG_PEABFE_BASE_LEN, 31, 1,
+ MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false);
+
+/* reg_peabfe_bf_entry_bank
+ * Bloom filter bank ID
+ * Range 0..cap_max_erp_table_banks-1
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_bank,
+ MLXSW_REG_PEABFE_BASE_LEN, 24, 4,
+ MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false);
+
+/* reg_peabfe_bf_entry_index
+ * Bloom filter entry index
+ * Range 0..2^cap_max_bf_log-1
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_index,
+ MLXSW_REG_PEABFE_BASE_LEN, 0, 24,
+ MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false);
+
+static inline void mlxsw_reg_peabfe_pack(char *payload)
+{
+ MLXSW_REG_ZERO(peabfe, payload);
+}
+
+static inline void mlxsw_reg_peabfe_rec_pack(char *payload, int rec_index,
+ u8 state, u8 bank, u32 bf_index)
+{
+ u8 num_rec = mlxsw_reg_peabfe_size_get(payload);
+
+ if (rec_index >= num_rec)
+ mlxsw_reg_peabfe_size_set(payload, rec_index + 1);
+ mlxsw_reg_peabfe_bf_entry_state_set(payload, rec_index, state);
+ mlxsw_reg_peabfe_bf_entry_bank_set(payload, rec_index, bank);
+ mlxsw_reg_peabfe_bf_entry_index_set(payload, rec_index, bf_index);
+}
+
/* IEDR - Infrastructure Entry Delete Register
* ----------------------------------------------------
* This register is used for deleting entries from the entry tables.
@@ -9608,6 +9674,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(pemrbt),
MLXSW_REG(ptce2),
MLXSW_REG(perpt),
+ MLXSW_REG(peabfe),
MLXSW_REG(perar),
MLXSW_REG(ptce3),
MLXSW_REG(percr),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index 99b341539870..b8b3a01c2a9e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -41,6 +41,7 @@ enum mlxsw_res_id {
MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB,
MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB,
MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB,
+ MLXSW_RES_ID_ACL_MAX_BF_LOG,
MLXSW_RES_ID_MAX_CPU_POLICERS,
MLXSW_RES_ID_MAX_VRS,
MLXSW_RES_ID_MAX_RIFS,
@@ -93,6 +94,7 @@ static u16 mlxsw_res_ids[] = {
[MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB] = 0x2951,
[MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB] = 0x2952,
[MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB] = 0x2953,
+ [MLXSW_RES_ID_ACL_MAX_BF_LOG] = 0x2960,
[MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13,
[MLXSW_RES_ID_MAX_VRS] = 0x2C01,
[MLXSW_RES_ID_MAX_RIFS] = 0x2C02,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 5fbb4b75f472..eb8db9fae237 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -45,8 +45,8 @@
#define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
#define MLXSW_SP1_FWREV_MAJOR 13
-#define MLXSW_SP1_FWREV_MINOR 1703
-#define MLXSW_SP1_FWREV_SUBMINOR 4
+#define MLXSW_SP1_FWREV_MINOR 1910
+#define MLXSW_SP1_FWREV_SUBMINOR 622
#define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702
static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
index 75f11c2d7b97..80fb268d51a5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
@@ -323,6 +323,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp,
aregion->region = region;
aregion->atcam = atcam;
mlxsw_sp_acl_atcam_region_type_init(aregion);
+ INIT_LIST_HEAD(&aregion->entries_list);
err = rhashtable_init(&aregion->entries_ht,
&mlxsw_sp_acl_atcam_entries_ht_params);
@@ -356,6 +357,7 @@ void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion)
mlxsw_sp_acl_erp_region_fini(aregion);
aregion->ops->fini(aregion);
rhashtable_destroy(&aregion->entries_ht);
+ WARN_ON(!list_empty(&aregion->entries_list));
}
void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion,
@@ -499,6 +501,12 @@ __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_acl_erp_delta_value(delta, aentry->full_enc_key);
mlxsw_sp_acl_erp_delta_clear(delta, aentry->ht_key.enc_key);
+ /* Add rule to the list of A-TCAM rules, assuming this
+ * rule is intended to A-TCAM. In case this rule does
+ * not fit into A-TCAM it will be removed from the list.
+ */
+ list_add(&aentry->list, &aregion->entries_list);
+
/* We can't insert identical rules into the A-TCAM, so fail and
* let the rule spill into C-TCAM
*/
@@ -508,6 +516,13 @@ __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_rhashtable_insert;
+ /* Bloom filter must be updated here, before inserting the rule into
+ * the A-TCAM.
+ */
+ err = mlxsw_sp_acl_erp_bf_insert(mlxsw_sp, aregion, erp_mask, aentry);
+ if (err)
+ goto err_bf_insert;
+
err = mlxsw_sp_acl_atcam_region_entry_insert(mlxsw_sp, aregion, aentry,
rulei);
if (err)
@@ -516,9 +531,12 @@ __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp,
return 0;
err_rule_insert:
+ mlxsw_sp_acl_erp_bf_remove(mlxsw_sp, aregion, erp_mask, aentry);
+err_bf_insert:
rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node,
mlxsw_sp_acl_atcam_entries_ht_params);
err_rhashtable_insert:
+ list_del(&aentry->list);
mlxsw_sp_acl_erp_mask_put(aregion, erp_mask);
return err;
}
@@ -529,8 +547,10 @@ __mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_atcam_entry *aentry)
{
mlxsw_sp_acl_atcam_region_entry_remove(mlxsw_sp, aregion, aentry);
+ mlxsw_sp_acl_erp_bf_remove(mlxsw_sp, aregion, aentry->erp_mask, aentry);
rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node,
mlxsw_sp_acl_atcam_entries_ht_params);
+ list_del(&aentry->list);
mlxsw_sp_acl_erp_mask_put(aregion, aentry->erp_mask);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
new file mode 100644
index 000000000000..505b87846acc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/refcount.h>
+
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+
+struct mlxsw_sp_acl_bf {
+ unsigned int bank_size;
+ refcount_t refcnt[0];
+};
+
+/* Bloom filter uses a crc-16 hash over chunks of data which contain 4 key
+ * blocks, eRP ID and region ID. In Spectrum-2, region key is combined of up to
+ * 12 key blocks, so there can be up to 3 chunks in the Bloom filter key,
+ * depending on the actual number of key blocks used in the region.
+ * The layout of the Bloom filter key is as follows:
+ *
+ * +-------------------------+------------------------+------------------------+
+ * | Chunk 2 Key blocks 11-8 | Chunk 1 Key blocks 7-4 | Chunk 0 Key blocks 3-0 |
+ * +-------------------------+------------------------+------------------------+
+ */
+#define MLXSW_BLOOM_KEY_CHUNKS 3
+#define MLXSW_BLOOM_KEY_LEN 69
+
+/* Each chunk size is 23 bytes. 18 bytes of it contain 4 key blocks, each is
+ * 36 bits, 2 bytes which hold eRP ID and region ID, and 3 bytes of zero
+ * padding.
+ * The layout of each chunk is as follows:
+ *
+ * +---------+----------------------+-----------------------------------+
+ * | 3 bytes | 2 bytes | 18 bytes |
+ * +---------+-----------+----------+-----------------------------------+
+ * | 183:158 | 157:148 | 147:144 | 143:0 |
+ * +---------+-----------+----------+-----------------------------------+
+ * | 0 | region ID | eRP ID | 4 Key blocks (18 Bytes) |
+ * +---------+-----------+----------+-----------------------------------+
+ */
+#define MLXSW_BLOOM_CHUNK_PAD_BYTES 3
+#define MLXSW_BLOOM_CHUNK_KEY_BYTES 18
+#define MLXSW_BLOOM_KEY_CHUNK_BYTES 23
+
+/* The offset of the key block within a chunk is 5 bytes as it comes after
+ * 3 bytes of zero padding and 16 bits of region ID and eRP ID.
+ */
+#define MLXSW_BLOOM_CHUNK_KEY_OFFSET 5
+
+/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8,
+ * and we need to populate it with 4 key blocks copied from the entry encoded
+ * key. Since the encoded key contains a padding, key block 11 starts at offset
+ * 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks take
+ * 18 bytes.
+ * This array defines key offsets for easy access when copying key blocks from
+ * entry key to Bloom filter chunk.
+ */
+static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38};
+
+/* This table is just the CRC of each possible byte. It is
+ * computed, Msbit first, for the Bloom filter polynomial
+ * which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and
+ * the implicit x^16).
+ */
+static const u16 mlxsw_sp_acl_bf_crc_tab[256] = {
+0x0000, 0x8529, 0x8f7b, 0x0a52, 0x9bdf, 0x1ef6, 0x14a4, 0x918d,
+0xb297, 0x37be, 0x3dec, 0xb8c5, 0x2948, 0xac61, 0xa633, 0x231a,
+0xe007, 0x652e, 0x6f7c, 0xea55, 0x7bd8, 0xfef1, 0xf4a3, 0x718a,
+0x5290, 0xd7b9, 0xddeb, 0x58c2, 0xc94f, 0x4c66, 0x4634, 0xc31d,
+0x4527, 0xc00e, 0xca5c, 0x4f75, 0xdef8, 0x5bd1, 0x5183, 0xd4aa,
+0xf7b0, 0x7299, 0x78cb, 0xfde2, 0x6c6f, 0xe946, 0xe314, 0x663d,
+0xa520, 0x2009, 0x2a5b, 0xaf72, 0x3eff, 0xbbd6, 0xb184, 0x34ad,
+0x17b7, 0x929e, 0x98cc, 0x1de5, 0x8c68, 0x0941, 0x0313, 0x863a,
+0x8a4e, 0x0f67, 0x0535, 0x801c, 0x1191, 0x94b8, 0x9eea, 0x1bc3,
+0x38d9, 0xbdf0, 0xb7a2, 0x328b, 0xa306, 0x262f, 0x2c7d, 0xa954,
+0x6a49, 0xef60, 0xe532, 0x601b, 0xf196, 0x74bf, 0x7eed, 0xfbc4,
+0xd8de, 0x5df7, 0x57a5, 0xd28c, 0x4301, 0xc628, 0xcc7a, 0x4953,
+0xcf69, 0x4a40, 0x4012, 0xc53b, 0x54b6, 0xd19f, 0xdbcd, 0x5ee4,
+0x7dfe, 0xf8d7, 0xf285, 0x77ac, 0xe621, 0x6308, 0x695a, 0xec73,
+0x2f6e, 0xaa47, 0xa015, 0x253c, 0xb4b1, 0x3198, 0x3bca, 0xbee3,
+0x9df9, 0x18d0, 0x1282, 0x97ab, 0x0626, 0x830f, 0x895d, 0x0c74,
+0x91b5, 0x149c, 0x1ece, 0x9be7, 0x0a6a, 0x8f43, 0x8511, 0x0038,
+0x2322, 0xa60b, 0xac59, 0x2970, 0xb8fd, 0x3dd4, 0x3786, 0xb2af,
+0x71b2, 0xf49b, 0xfec9, 0x7be0, 0xea6d, 0x6f44, 0x6516, 0xe03f,
+0xc325, 0x460c, 0x4c5e, 0xc977, 0x58fa, 0xddd3, 0xd781, 0x52a8,
+0xd492, 0x51bb, 0x5be9, 0xdec0, 0x4f4d, 0xca64, 0xc036, 0x451f,
+0x6605, 0xe32c, 0xe97e, 0x6c57, 0xfdda, 0x78f3, 0x72a1, 0xf788,
+0x3495, 0xb1bc, 0xbbee, 0x3ec7, 0xaf4a, 0x2a63, 0x2031, 0xa518,
+0x8602, 0x032b, 0x0979, 0x8c50, 0x1ddd, 0x98f4, 0x92a6, 0x178f,
+0x1bfb, 0x9ed2, 0x9480, 0x11a9, 0x8024, 0x050d, 0x0f5f, 0x8a76,
+0xa96c, 0x2c45, 0x2617, 0xa33e, 0x32b3, 0xb79a, 0xbdc8, 0x38e1,
+0xfbfc, 0x7ed5, 0x7487, 0xf1ae, 0x6023, 0xe50a, 0xef58, 0x6a71,
+0x496b, 0xcc42, 0xc610, 0x4339, 0xd2b4, 0x579d, 0x5dcf, 0xd8e6,
+0x5edc, 0xdbf5, 0xd1a7, 0x548e, 0xc503, 0x402a, 0x4a78, 0xcf51,
+0xec4b, 0x6962, 0x6330, 0xe619, 0x7794, 0xf2bd, 0xf8ef, 0x7dc6,
+0xbedb, 0x3bf2, 0x31a0, 0xb489, 0x2504, 0xa02d, 0xaa7f, 0x2f56,
+0x0c4c, 0x8965, 0x8337, 0x061e, 0x9793, 0x12ba, 0x18e8, 0x9dc1,
+};
+
+static u16 mlxsw_sp_acl_bf_crc_byte(u16 crc, u8 c)
+{
+ return (crc << 8) ^ mlxsw_sp_acl_bf_crc_tab[(crc >> 8) ^ c];
+}
+
+static u16 mlxsw_sp_acl_bf_crc(const u8 *buffer, size_t len)
+{
+ u16 crc = 0;
+
+ while (len--)
+ crc = mlxsw_sp_acl_bf_crc_byte(crc, *buffer++);
+ return crc;
+}
+
+static void
+mlxsw_sp_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_entry *aentry,
+ char *output, u8 *len)
+{
+ struct mlxsw_afk_key_info *key_info = aregion->region->key_info;
+ u8 chunk_index, chunk_count, block_count;
+ char *chunk = output;
+ __be16 erp_region_id;
+
+ block_count = mlxsw_afk_key_info_blocks_count_get(key_info);
+ chunk_count = 1 + ((block_count - 1) >> 2);
+ erp_region_id = cpu_to_be16(aentry->ht_key.erp_id |
+ (aregion->region->id << 4));
+ for (chunk_index = MLXSW_BLOOM_KEY_CHUNKS - chunk_count;
+ chunk_index < MLXSW_BLOOM_KEY_CHUNKS; chunk_index++) {
+ memset(chunk, 0, MLXSW_BLOOM_CHUNK_PAD_BYTES);
+ memcpy(chunk + MLXSW_BLOOM_CHUNK_PAD_BYTES, &erp_region_id,
+ sizeof(erp_region_id));
+ memcpy(chunk + MLXSW_BLOOM_CHUNK_KEY_OFFSET,
+ &aentry->ht_key.enc_key[chunk_key_offsets[chunk_index]],
+ MLXSW_BLOOM_CHUNK_KEY_BYTES);
+ chunk += MLXSW_BLOOM_KEY_CHUNK_BYTES;
+ }
+ *len = chunk_count * MLXSW_BLOOM_KEY_CHUNK_BYTES;
+}
+
+static unsigned int
+mlxsw_sp_acl_bf_rule_count_index_get(struct mlxsw_sp_acl_bf *bf,
+ unsigned int erp_bank,
+ unsigned int bf_index)
+{
+ return erp_bank * bf->bank_size + bf_index;
+}
+
+static unsigned int
+mlxsw_sp_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+ char bf_key[MLXSW_BLOOM_KEY_LEN];
+ u8 bf_size;
+
+ mlxsw_sp_acl_bf_key_encode(aregion, aentry, bf_key, &bf_size);
+ return mlxsw_sp_acl_bf_crc(bf_key, bf_size);
+}
+
+int
+mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_bf *bf,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ unsigned int erp_bank,
+ struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+ unsigned int rule_index;
+ char *peabfe_pl;
+ u16 bf_index;
+ int err;
+
+ bf_index = mlxsw_sp_acl_bf_index_get(bf, aregion, aentry);
+ rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank,
+ bf_index);
+
+ if (refcount_inc_not_zero(&bf->refcnt[rule_index]))
+ return 0;
+
+ peabfe_pl = kmalloc(MLXSW_REG_PEABFE_LEN, GFP_KERNEL);
+ if (!peabfe_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_peabfe_pack(peabfe_pl);
+ mlxsw_reg_peabfe_rec_pack(peabfe_pl, 0, 1, erp_bank, bf_index);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(peabfe), peabfe_pl);
+ kfree(peabfe_pl);
+ if (err)
+ return err;
+
+ refcount_set(&bf->refcnt[rule_index], 1);
+ return 0;
+}
+
+void
+mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_bf *bf,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ unsigned int erp_bank,
+ struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+ unsigned int rule_index;
+ char *peabfe_pl;
+ u16 bf_index;
+
+ bf_index = mlxsw_sp_acl_bf_index_get(bf, aregion, aentry);
+ rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank,
+ bf_index);
+
+ if (refcount_dec_and_test(&bf->refcnt[rule_index])) {
+ peabfe_pl = kmalloc(MLXSW_REG_PEABFE_LEN, GFP_KERNEL);
+ if (!peabfe_pl)
+ return;
+
+ mlxsw_reg_peabfe_pack(peabfe_pl);
+ mlxsw_reg_peabfe_rec_pack(peabfe_pl, 0, 0, erp_bank, bf_index);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(peabfe), peabfe_pl);
+ kfree(peabfe_pl);
+ }
+}
+
+struct mlxsw_sp_acl_bf *
+mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks)
+{
+ struct mlxsw_sp_acl_bf *bf;
+ unsigned int bf_bank_size;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_BF_LOG))
+ return ERR_PTR(-EIO);
+
+ /* Bloom filter size per erp_table_bank
+ * is 2^ACL_MAX_BF_LOG
+ */
+ bf_bank_size = 1 << MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_BF_LOG);
+ bf = kzalloc(sizeof(*bf) + bf_bank_size * num_erp_banks *
+ sizeof(*bf->refcnt), GFP_KERNEL);
+ if (!bf)
+ return ERR_PTR(-ENOMEM);
+
+ bf->bank_size = bf_bank_size;
+ return bf;
+}
+
+void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf)
+{
+ kfree(bf);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
index d9a4b7e8434b..1c19feefa5f2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
@@ -24,6 +24,7 @@ struct mlxsw_sp_acl_erp_core {
unsigned int erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX + 1];
struct gen_pool *erp_tables;
struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_acl_bf *bf;
unsigned int num_erp_banks;
};
@@ -114,6 +115,19 @@ static const struct mlxsw_sp_acl_erp_table_ops erp_no_mask_ops = {
.erp_destroy = mlxsw_sp_acl_erp_no_mask_destroy,
};
+static bool
+mlxsw_sp_acl_erp_table_is_used(const struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ return erp_table->ops != &erp_single_mask_ops &&
+ erp_table->ops != &erp_no_mask_ops;
+}
+
+static unsigned int
+mlxsw_sp_acl_erp_bank_get(const struct mlxsw_sp_acl_erp *erp)
+{
+ return erp->index % erp->erp_table->erp_core->num_erp_banks;
+}
+
static unsigned int
mlxsw_sp_acl_erp_table_entry_size(const struct mlxsw_sp_acl_erp_table *erp_table)
{
@@ -504,6 +518,48 @@ err_table_relocate:
}
static int
+mlxsw_acl_erp_table_bf_add(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp)
+{
+ struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion;
+ unsigned int erp_bank = mlxsw_sp_acl_erp_bank_get(erp);
+ struct mlxsw_sp_acl_atcam_entry *aentry;
+ int err;
+
+ list_for_each_entry(aentry, &aregion->entries_list, list) {
+ err = mlxsw_sp_acl_bf_entry_add(aregion->region->mlxsw_sp,
+ erp_table->erp_core->bf,
+ aregion, erp_bank, aentry);
+ if (err)
+ goto bf_entry_add_err;
+ }
+
+ return 0;
+
+bf_entry_add_err:
+ list_for_each_entry_continue_reverse(aentry, &aregion->entries_list,
+ list)
+ mlxsw_sp_acl_bf_entry_del(aregion->region->mlxsw_sp,
+ erp_table->erp_core->bf,
+ aregion, erp_bank, aentry);
+ return err;
+}
+
+static void
+mlxsw_acl_erp_table_bf_del(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp)
+{
+ struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion;
+ unsigned int erp_bank = mlxsw_sp_acl_erp_bank_get(erp);
+ struct mlxsw_sp_acl_atcam_entry *aentry;
+
+ list_for_each_entry_reverse(aentry, &aregion->entries_list, list)
+ mlxsw_sp_acl_bf_entry_del(aregion->region->mlxsw_sp,
+ erp_table->erp_core->bf,
+ aregion, erp_bank, aentry);
+}
+
+static int
mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table)
{
struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core;
@@ -527,16 +583,24 @@ mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table)
goto err_table_master_rp;
}
- /* Maintain the same eRP bank for the master RP, so that we
- * wouldn't need to update the bloom filter
+ /* Make sure the master RP is using a valid index, as
+ * only a single eRP row is currently allocated.
*/
- master_rp->index = master_rp->index % erp_core->num_erp_banks;
+ master_rp->index = 0;
__set_bit(master_rp->index, erp_table->erp_index_bitmap);
err = mlxsw_sp_acl_erp_table_erp_add(erp_table, master_rp);
if (err)
goto err_table_master_rp_add;
+ /* Update Bloom filter before enabling eRP table, as rules
+ * on the master RP were not set to Bloom filter up to this
+ * point.
+ */
+ err = mlxsw_acl_erp_table_bf_add(erp_table, master_rp);
+ if (err)
+ goto err_table_bf_add;
+
err = mlxsw_sp_acl_erp_table_enable(erp_table, false);
if (err)
goto err_table_enable;
@@ -544,6 +608,8 @@ mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table)
return 0;
err_table_enable:
+ mlxsw_acl_erp_table_bf_del(erp_table, master_rp);
+err_table_bf_add:
mlxsw_sp_acl_erp_table_erp_del(master_rp);
err_table_master_rp_add:
__clear_bit(master_rp->index, erp_table->erp_index_bitmap);
@@ -564,6 +630,7 @@ mlxsw_sp_acl_erp_region_master_mask_trans(struct mlxsw_sp_acl_erp_table *erp_tab
master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table);
if (!master_rp)
return;
+ mlxsw_acl_erp_table_bf_del(erp_table, master_rp);
mlxsw_sp_acl_erp_table_erp_del(master_rp);
__clear_bit(master_rp->index, erp_table->erp_index_bitmap);
mlxsw_sp_acl_erp_table_free(erp_core, erp_table->num_max_atcam_erps,
@@ -635,8 +702,7 @@ __mlxsw_sp_acl_erp_table_other_inc(struct mlxsw_sp_acl_erp_table *erp_table,
/* If there are C-TCAM eRP or deltas in use we need to transition
* the region to use eRP table, if it is not already done
*/
- if (erp_table->ops != &erp_two_masks_ops &&
- erp_table->ops != &erp_multiple_masks_ops) {
+ if (!mlxsw_sp_acl_erp_table_is_used(erp_table)) {
err = mlxsw_sp_acl_erp_region_table_trans(erp_table);
if (err)
return err;
@@ -960,6 +1026,44 @@ void mlxsw_sp_acl_erp_mask_put(struct mlxsw_sp_acl_atcam_region *aregion,
objagg_obj_put(aregion->erp_table->objagg, objagg_obj);
}
+int mlxsw_sp_acl_erp_bf_insert(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_erp_mask *erp_mask,
+ struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+ struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask;
+ const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj);
+ unsigned int erp_bank;
+
+ ASSERT_RTNL();
+ if (!mlxsw_sp_acl_erp_table_is_used(erp->erp_table))
+ return 0;
+
+ erp_bank = mlxsw_sp_acl_erp_bank_get(erp);
+ return mlxsw_sp_acl_bf_entry_add(mlxsw_sp,
+ erp->erp_table->erp_core->bf,
+ aregion, erp_bank, aentry);
+}
+
+void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_erp_mask *erp_mask,
+ struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+ struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask;
+ const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj);
+ unsigned int erp_bank;
+
+ ASSERT_RTNL();
+ if (!mlxsw_sp_acl_erp_table_is_used(erp->erp_table))
+ return;
+
+ erp_bank = mlxsw_sp_acl_erp_bank_get(erp);
+ mlxsw_sp_acl_bf_entry_del(mlxsw_sp,
+ erp->erp_table->erp_core->bf,
+ aregion, erp_bank, aentry);
+}
+
bool
mlxsw_sp_acl_erp_mask_is_ctcam(const struct mlxsw_sp_acl_erp_mask *erp_mask)
{
@@ -1320,6 +1424,12 @@ static int mlxsw_sp_acl_erp_tables_init(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_gen_pool_add;
+ erp_core->bf = mlxsw_sp_acl_bf_init(mlxsw_sp, erp_core->num_erp_banks);
+ if (IS_ERR(erp_core->bf)) {
+ err = PTR_ERR(erp_core->bf);
+ goto err_bf_init;
+ }
+
/* Different regions require masks of different sizes */
err = mlxsw_sp_acl_erp_tables_sizes_query(mlxsw_sp, erp_core);
if (err)
@@ -1328,6 +1438,8 @@ static int mlxsw_sp_acl_erp_tables_init(struct mlxsw_sp *mlxsw_sp,
return 0;
err_erp_tables_sizes_query:
+ mlxsw_sp_acl_bf_fini(erp_core->bf);
+err_bf_init:
err_gen_pool_add:
gen_pool_destroy(erp_core->erp_tables);
return err;
@@ -1336,6 +1448,7 @@ err_gen_pool_add:
static void mlxsw_sp_acl_erp_tables_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_erp_core *erp_core)
{
+ mlxsw_sp_acl_bf_fini(erp_core->bf);
gen_pool_destroy(erp_core->erp_tables);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
index 95929b9039ec..0f1a9dee63de 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
@@ -152,6 +152,7 @@ struct mlxsw_sp_acl_atcam {
struct mlxsw_sp_acl_atcam_region {
struct rhashtable entries_ht; /* A-TCAM only */
+ struct list_head entries_list; /* A-TCAM only */
struct mlxsw_sp_acl_ctcam_region cregion;
const struct mlxsw_sp_acl_atcam_region_ops *ops;
struct mlxsw_sp_acl_tcam_region *region;
@@ -174,6 +175,7 @@ struct mlxsw_sp_acl_atcam_chunk {
struct mlxsw_sp_acl_atcam_entry {
struct rhash_head ht_node;
+ struct list_head list; /* Member in entries_list */
struct mlxsw_sp_acl_atcam_entry_ht_key ht_key;
char full_enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded key */
struct {
@@ -251,6 +253,14 @@ mlxsw_sp_acl_erp_mask_get(struct mlxsw_sp_acl_atcam_region *aregion,
const char *mask, bool ctcam);
void mlxsw_sp_acl_erp_mask_put(struct mlxsw_sp_acl_atcam_region *aregion,
struct mlxsw_sp_acl_erp_mask *erp_mask);
+int mlxsw_sp_acl_erp_bf_insert(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_erp_mask *erp_mask,
+ struct mlxsw_sp_acl_atcam_entry *aentry);
+void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_erp_mask *erp_mask,
+ struct mlxsw_sp_acl_atcam_entry *aentry);
int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion);
void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion);
int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp,
@@ -258,4 +268,22 @@ int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp,
void mlxsw_sp_acl_erps_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_atcam *atcam);
+struct mlxsw_sp_acl_bf;
+
+int
+mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_bf *bf,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ unsigned int erp_bank,
+ struct mlxsw_sp_acl_atcam_entry *aentry);
+void
+mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_bf *bf,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ unsigned int erp_bank,
+ struct mlxsw_sp_acl_atcam_entry *aentry);
+struct mlxsw_sp_acl_bf *
+mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks);
+void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf);
+
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
index ad6c2a621c7a..9584f03f3efa 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
@@ -17,6 +17,8 @@
#define NFP_NUM_BANDS_SYM_NAME "_abi_pci_dscp_num_band_%u"
#define NFP_ACT_MASK_SYM_NAME "_abi_nfd_out_q_actions_%u"
+#define NFP_RED_SUPPORT_SYM_NAME "_abi_nfd_out_red_offload_%u"
+
#define NFP_QLVL_SYM_NAME "_abi_nfd_out_q_lvls_%u%s"
#define NFP_QLVL_STRIDE 16
#define NFP_QLVL_BLOG_BYTES 0
@@ -358,6 +360,12 @@ int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm)
abm->pf_id = nfp_cppcore_pcie_unit(pf->cpp);
+ /* Check if Qdisc offloads are supported */
+ res = nfp_pf_rtsym_read_optional(pf, NFP_RED_SUPPORT_SYM_NAME, 1);
+ if (res < 0)
+ return res;
+ abm->red_support = res;
+
/* Read count of prios and prio bands */
res = nfp_pf_rtsym_read_optional(pf, NFP_NUM_BANDS_SYM_NAME, 1);
if (res < 0)
@@ -390,6 +398,9 @@ int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm)
}
/* Find level and stat symbols */
+ if (!abm->red_support)
+ return 0;
+
sym = nfp_abm_ctrl_find_q_rtsym(abm, NFP_QLVL_SYM_NAME,
NFP_QLVL_STRIDE);
if (IS_ERR(sym))
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index 80e79447b644..4d4ff5844c47 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -207,6 +207,9 @@ static int nfp_abm_eswitch_set_switchdev(struct nfp_abm *abm)
struct nfp_net *nn;
int err;
+ if (!abm->red_support)
+ return -EOPNOTSUPP;
+
err = nfp_abm_ctrl_qm_enable(abm);
if (err)
return err;
@@ -418,12 +421,26 @@ nfp_abm_port_get_stats_strings(struct nfp_app *app, struct nfp_port *port,
return data;
}
+static int nfp_abm_fw_init_reset(struct nfp_abm *abm)
+{
+ unsigned int i;
+
+ if (!abm->red_support)
+ return 0;
+
+ for (i = 0; i < abm->num_bands * NFP_NET_MAX_RX_RINGS; i++) {
+ __nfp_abm_ctrl_set_q_lvl(abm, i, NFP_ABM_LVL_INFINITY);
+ __nfp_abm_ctrl_set_q_act(abm, i, NFP_ABM_ACT_DROP);
+ }
+
+ return nfp_abm_ctrl_qm_disable(abm);
+}
+
static int nfp_abm_init(struct nfp_app *app)
{
struct nfp_pf *pf = app->pf;
struct nfp_reprs *reprs;
struct nfp_abm *abm;
- unsigned int i;
int err;
if (!pf->eth_tbl) {
@@ -460,18 +477,14 @@ static int nfp_abm_init(struct nfp_app *app)
sizeof(*abm->thresholds), GFP_KERNEL);
if (!abm->thresholds)
goto err_free_thresh_umap;
- for (i = 0; i < abm->num_bands * NFP_NET_MAX_RX_RINGS; i++)
- __nfp_abm_ctrl_set_q_lvl(abm, i, NFP_ABM_LVL_INFINITY);
abm->actions = kvcalloc(abm->num_thresholds, sizeof(*abm->actions),
GFP_KERNEL);
if (!abm->actions)
goto err_free_thresh;
- for (i = 0; i < abm->num_bands * NFP_NET_MAX_RX_RINGS; i++)
- __nfp_abm_ctrl_set_q_act(abm, i, NFP_ABM_ACT_DROP);
/* We start in legacy mode, make sure advanced queuing is disabled */
- err = nfp_abm_ctrl_qm_disable(abm);
+ err = nfp_abm_fw_init_reset(abm);
if (err)
goto err_free_act;
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index 4dcf5881fb4b..49749c60885e 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
@@ -40,6 +40,7 @@ enum nfp_abm_q_action {
* @app: back pointer to nfp_app
* @pf_id: ID of our PF link
*
+ * @red_support: is RED offload supported
* @num_prios: number of supported DSCP priorities
* @num_bands: number of supported DSCP priority bands
* @action_mask: bitmask of supported actions
@@ -63,6 +64,7 @@ struct nfp_abm {
struct nfp_app *app;
unsigned int pf_id;
+ unsigned int red_support;
unsigned int num_prios;
unsigned int num_bands;
unsigned int action_mask;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 21499a5b3b6b..c642fd84eb02 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -718,7 +718,7 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app,
err = tcf_block_cb_register(f->block,
nfp_flower_setup_indr_block_cb,
- netdev, cb_priv, f->extack);
+ cb_priv, cb_priv, f->extack);
if (err) {
list_del(&cb_priv->list);
kfree(cb_priv);
@@ -726,13 +726,15 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app,
return err;
case TC_BLOCK_UNBIND:
- tcf_block_cb_unregister(f->block,
- nfp_flower_setup_indr_block_cb, netdev);
cb_priv = nfp_flower_indr_block_cb_priv_lookup(app, netdev);
- if (cb_priv) {
- list_del(&cb_priv->list);
- kfree(cb_priv);
- }
+ if (!cb_priv)
+ return -ENOENT;
+
+ tcf_block_cb_unregister(f->block,
+ nfp_flower_setup_indr_block_cb,
+ cb_priv);
+ list_del(&cb_priv->list);
+ kfree(cb_priv);
return 0;
default:
@@ -766,15 +768,14 @@ int nfp_flower_reg_indir_block_handler(struct nfp_app *app,
if (event == NETDEV_REGISTER) {
err = __tc_indr_block_cb_register(netdev, app,
nfp_flower_indr_setup_tc_cb,
- netdev);
+ app);
if (err)
nfp_flower_cmsg_warn(app,
"Indirect block reg failed - %s\n",
netdev->name);
} else if (event == NETDEV_UNREGISTER) {
__tc_indr_block_cb_unregister(netdev,
- nfp_flower_indr_setup_tc_cb,
- netdev);
+ nfp_flower_indr_setup_tc_cb, app);
}
return NOTIFY_OK;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index 50eaafa3eaba..af3b037fa442 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
@@ -1067,9 +1067,6 @@ static int qlcnic_sriov_pf_cfg_ip_cmd(struct qlcnic_bc_trans *trans,
struct qlcnic_vf_info *vf = trans->vf;
struct qlcnic_adapter *adapter = vf->adapter;
int err = -EIO;
- u8 op;
-
- op = cmd->req.arg[1] & 0xff;
cmd->req.arg[1] |= vf->vp->handle << 16;
cmd->req.arg[1] |= BIT_31;
@@ -1339,14 +1336,13 @@ static int qlcnic_sriov_pf_get_acl_cmd(struct qlcnic_bc_trans *trans,
{
struct qlcnic_vf_info *vf = trans->vf;
struct qlcnic_vport *vp = vf->vp;
- u8 cmd_op, mode = vp->vlan_mode;
+ u8 mode = vp->vlan_mode;
struct qlcnic_adapter *adapter;
struct qlcnic_sriov *sriov;
adapter = vf->adapter;
sriov = adapter->ahw->sriov;
- cmd_op = trans->req_hdr->cmd_op;
cmd->rsp.arg[0] |= 1 << 25;
/* For 84xx adapter in case of PVID , PFD should send vlan mode as
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index e26c48bd54a2..18e39e5e447b 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -6405,8 +6405,9 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
{
struct rtl8169_private *tp = dev_instance;
u16 status = rtl_get_events(tp);
+ u16 irq_mask = RTL_R16(tp, IntrMask);
- if (status == 0xffff || !(status & tp->irq_mask))
+ if (status == 0xffff || !(status & irq_mask))
return IRQ_NONE;
if (unlikely(status & SYSErr)) {
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index bba9733b5119..05a0948ad929 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -257,7 +257,6 @@ struct netsec_desc_ring {
dma_addr_t desc_dma;
struct netsec_desc *desc;
void *vaddr;
- u16 pkt_cnt;
u16 head, tail;
};
@@ -598,33 +597,26 @@ static void netsec_set_rx_de(struct netsec_priv *priv,
dring->desc[idx].len = desc->len;
}
-static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget)
+static bool netsec_clean_tx_dring(struct netsec_priv *priv)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
unsigned int pkts, bytes;
-
- dring->pkt_cnt += netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT);
-
- if (dring->pkt_cnt < budget)
- budget = dring->pkt_cnt;
+ struct netsec_de *entry;
+ int tail = dring->tail;
+ int cnt = 0;
pkts = 0;
bytes = 0;
+ entry = dring->vaddr + DESC_SZ * tail;
- while (pkts < budget) {
+ while (!(entry->attr & (1U << NETSEC_TX_SHIFT_OWN_FIELD)) &&
+ cnt < DESC_NUM) {
struct netsec_desc *desc;
- struct netsec_de *entry;
- int tail, eop;
-
- tail = dring->tail;
-
- /* move tail ahead */
- dring->tail = (tail + 1) % DESC_NUM;
+ int eop;
desc = &dring->desc[tail];
- entry = dring->vaddr + DESC_SZ * tail;
-
eop = (entry->attr >> NETSEC_TX_LAST) & 1;
+ dma_rmb();
dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
DMA_TO_DEVICE);
@@ -633,33 +625,51 @@ static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget)
bytes += desc->skb->len;
dev_kfree_skb(desc->skb);
}
+ /* clean up so netsec_uninit_pkt_dring() won't free the skb
+ * again
+ */
*desc = (struct netsec_desc){};
+
+ /* entry->attr is not going to be accessed by the NIC until
+ * netsec_set_tx_de() is called. No need for a dma_wmb() here
+ */
+ entry->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD;
+ /* move tail ahead */
+ dring->tail = (tail + 1) % DESC_NUM;
+
+ tail = dring->tail;
+ entry = dring->vaddr + DESC_SZ * tail;
+ cnt++;
}
- dring->pkt_cnt -= budget;
- priv->ndev->stats.tx_packets += budget;
+ if (!cnt)
+ return false;
+
+ /* reading the register clears the irq */
+ netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT);
+
+ priv->ndev->stats.tx_packets += cnt;
priv->ndev->stats.tx_bytes += bytes;
- netdev_completed_queue(priv->ndev, budget, bytes);
+ netdev_completed_queue(priv->ndev, cnt, bytes);
- return budget;
+ return true;
}
-static int netsec_process_tx(struct netsec_priv *priv, int budget)
+static void netsec_process_tx(struct netsec_priv *priv)
{
struct net_device *ndev = priv->ndev;
- int new, done = 0;
+ bool cleaned;
- do {
- new = netsec_clean_tx_dring(priv, budget);
- done += new;
- budget -= new;
- } while (new);
+ cleaned = netsec_clean_tx_dring(priv);
- if (done && netif_queue_stopped(ndev))
+ if (cleaned && netif_queue_stopped(ndev)) {
+ /* Make sure we update the value, anyone stopping the queue
+ * after this will read the proper consumer idx
+ */
+ smp_wmb();
netif_wake_queue(ndev);
-
- return done;
+ }
}
static void *netsec_alloc_rx_data(struct netsec_priv *priv,
@@ -808,24 +818,17 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
static int netsec_napi_poll(struct napi_struct *napi, int budget)
{
struct netsec_priv *priv;
- int tx, rx, done, todo;
+ int rx, done, todo;
priv = container_of(napi, struct netsec_priv, napi);
+ netsec_process_tx(priv);
+
todo = budget;
do {
- if (!todo)
- break;
-
- tx = netsec_process_tx(priv, todo);
- todo -= tx;
-
- if (!todo)
- break;
-
rx = netsec_process_rx(priv, todo);
todo -= rx;
- } while (rx || tx);
+ } while (rx);
done = budget - todo;
@@ -877,6 +880,41 @@ static void netsec_set_tx_de(struct netsec_priv *priv,
dring->head = (dring->head + 1) % DESC_NUM;
}
+static int netsec_desc_used(struct netsec_desc_ring *dring)
+{
+ int used;
+
+ if (dring->head >= dring->tail)
+ used = dring->head - dring->tail;
+ else
+ used = dring->head + DESC_NUM - dring->tail;
+
+ return used;
+}
+
+static int netsec_check_stop_tx(struct netsec_priv *priv, int used)
+{
+ struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
+
+ /* keep tail from touching the queue */
+ if (DESC_NUM - used < 2) {
+ netif_stop_queue(priv->ndev);
+
+ /* Make sure we read the updated value in case
+ * descriptors got freed
+ */
+ smp_rmb();
+
+ used = netsec_desc_used(dring);
+ if (DESC_NUM - used < 2)
+ return NETDEV_TX_BUSY;
+
+ netif_wake_queue(priv->ndev);
+ }
+
+ return 0;
+}
+
static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
struct net_device *ndev)
{
@@ -887,16 +925,10 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
u16 tso_seg_len = 0;
int filled;
- /* differentiate between full/emtpy ring */
- if (dring->head >= dring->tail)
- filled = dring->head - dring->tail;
- else
- filled = dring->head + DESC_NUM - dring->tail;
-
- if (DESC_NUM - filled < 2) { /* if less than 2 available */
- netif_err(priv, drv, priv->ndev, "%s: TxQFull!\n", __func__);
- netif_stop_queue(priv->ndev);
- dma_wmb();
+ filled = netsec_desc_used(dring);
+ if (netsec_check_stop_tx(priv, filled)) {
+ net_warn_ratelimited("%s %s Tx queue full\n",
+ dev_name(priv->dev), ndev->name);
return NETDEV_TX_BUSY;
}
@@ -973,7 +1005,6 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id)
dring->head = 0;
dring->tail = 0;
- dring->pkt_cnt = 0;
if (id == NETSEC_RING_TX)
netdev_reset_queue(priv->ndev);
@@ -996,6 +1027,7 @@ static void netsec_free_dring(struct netsec_priv *priv, int id)
static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id)
{
struct netsec_desc_ring *dring = &priv->desc_ring[id];
+ int i;
dring->vaddr = dma_zalloc_coherent(priv->dev, DESC_SZ * DESC_NUM,
&dring->desc_dma, GFP_KERNEL);
@@ -1006,6 +1038,19 @@ static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id)
if (!dring->desc)
goto err;
+ if (id == NETSEC_RING_TX) {
+ for (i = 0; i < DESC_NUM; i++) {
+ struct netsec_de *de;
+
+ de = dring->vaddr + (DESC_SZ * i);
+ /* de->attr is not going to be accessed by the NIC
+ * until netsec_set_tx_de() is called.
+ * No need for a dma_wmb() here
+ */
+ de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD;
+ }
+ }
+
return 0;
err:
netsec_free_dring(priv, id);
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 17e6dcd2eb42..28c749980359 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -120,7 +120,7 @@ struct sixpack {
struct timer_list tx_t;
struct timer_list resync_t;
refcount_t refcnt;
- struct semaphore dead_sem;
+ struct completion dead;
spinlock_t lock;
};
@@ -389,7 +389,7 @@ static struct sixpack *sp_get(struct tty_struct *tty)
static void sp_put(struct sixpack *sp)
{
if (refcount_dec_and_test(&sp->refcnt))
- up(&sp->dead_sem);
+ complete(&sp->dead);
}
/*
@@ -576,7 +576,7 @@ static int sixpack_open(struct tty_struct *tty)
spin_lock_init(&sp->lock);
refcount_set(&sp->refcnt, 1);
- sema_init(&sp->dead_sem, 0);
+ init_completion(&sp->dead);
/* !!! length of the buffers. MTU is IP MTU, not PACLEN! */
@@ -670,10 +670,10 @@ static void sixpack_close(struct tty_struct *tty)
* we have to wait for all existing users to finish.
*/
if (!refcount_dec_and_test(&sp->refcnt))
- down(&sp->dead_sem);
+ wait_for_completion(&sp->dead);
/* We must stop the queue to avoid potentially scribbling
- * on the free buffers. The sp->dead_sem is not sufficient
+ * on the free buffers. The sp->dead completion is not sufficient
* to protect us from sp->xbuff access.
*/
netif_stop_queue(sp->dev);
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 802233d41b25..4938cf4c184c 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -81,7 +81,7 @@ struct mkiss {
#define CRC_MODE_SMACK_TEST 4
atomic_t refcnt;
- struct semaphore dead_sem;
+ struct completion dead;
};
/*---------------------------------------------------------------------------*/
@@ -687,7 +687,7 @@ static struct mkiss *mkiss_get(struct tty_struct *tty)
static void mkiss_put(struct mkiss *ax)
{
if (atomic_dec_and_test(&ax->refcnt))
- up(&ax->dead_sem);
+ complete(&ax->dead);
}
static int crc_force = 0; /* Can be overridden with insmod */
@@ -715,7 +715,7 @@ static int mkiss_open(struct tty_struct *tty)
spin_lock_init(&ax->buflock);
atomic_set(&ax->refcnt, 1);
- sema_init(&ax->dead_sem, 0);
+ init_completion(&ax->dead);
ax->tty = tty;
tty->disc_data = ax;
@@ -795,7 +795,7 @@ static void mkiss_close(struct tty_struct *tty)
* we have to wait for all existing users to finish.
*/
if (!atomic_dec_and_test(&ax->refcnt))
- down(&ax->dead_sem);
+ wait_for_completion(&ax->dead);
/*
* Halt the transmit queue so that a new transmit cannot scribble
* on our buffers
diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index f7fb62712cd8..72d43c88e6ff 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -25,6 +25,7 @@
#include <linux/gpio.h>
#include <linux/seqlock.h>
#include <linux/idr.h>
+#include <linux/netdevice.h>
#include "swphy.h"
@@ -38,6 +39,7 @@ struct fixed_phy {
struct phy_device *phydev;
seqcount_t seqcount;
struct fixed_phy_status status;
+ bool no_carrier;
int (*link_update)(struct net_device *, struct fixed_phy_status *);
struct list_head node;
int link_gpio;
@@ -48,9 +50,28 @@ static struct fixed_mdio_bus platform_fmb = {
.phys = LIST_HEAD_INIT(platform_fmb.phys),
};
+int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier)
+{
+ struct fixed_mdio_bus *fmb = &platform_fmb;
+ struct phy_device *phydev = dev->phydev;
+ struct fixed_phy *fp;
+
+ if (!phydev || !phydev->mdio.bus)
+ return -EINVAL;
+
+ list_for_each_entry(fp, &fmb->phys, node) {
+ if (fp->addr == phydev->mdio.addr) {
+ fp->no_carrier = !new_carrier;
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(fixed_phy_change_carrier);
+
static void fixed_phy_update(struct fixed_phy *fp)
{
- if (gpio_is_valid(fp->link_gpio))
+ if (!fp->no_carrier && gpio_is_valid(fp->link_gpio))
fp->status.link = !!gpio_get_value_cansleep(fp->link_gpio);
}
@@ -66,6 +87,7 @@ static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num)
do {
s = read_seqcount_begin(&fp->seqcount);
+ fp->status.link = !fp->no_carrier;
/* Issue callback if user registered it. */
if (fp->link_update) {
fp->link_update(fp->phydev->attached_dev,
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index e24708f1fc16..d33e7b3caf03 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -543,6 +543,13 @@ int phy_start_aneg(struct phy_device *phydev)
mutex_lock(&phydev->lock);
+ if (!__phy_is_started(phydev)) {
+ WARN(1, "called from state %s\n",
+ phy_state_to_str(phydev->state));
+ err = -EBUSY;
+ goto out_unlock;
+ }
+
if (AUTONEG_DISABLE == phydev->autoneg)
phy_sanitize_settings(phydev);
@@ -553,13 +560,11 @@ int phy_start_aneg(struct phy_device *phydev)
if (err < 0)
goto out_unlock;
- if (phydev->state != PHY_HALTED) {
- if (AUTONEG_ENABLE == phydev->autoneg) {
- err = phy_check_link_status(phydev);
- } else {
- phydev->state = PHY_FORCING;
- phydev->link_timeout = PHY_FORCE_TIMEOUT;
- }
+ if (phydev->autoneg == AUTONEG_ENABLE) {
+ err = phy_check_link_status(phydev);
+ } else {
+ phydev->state = PHY_FORCING;
+ phydev->link_timeout = PHY_FORCE_TIMEOUT;
}
out_unlock:
@@ -709,7 +714,7 @@ void phy_stop_machine(struct phy_device *phydev)
cancel_delayed_work_sync(&phydev->state_queue);
mutex_lock(&phydev->lock);
- if (phydev->state > PHY_UP && phydev->state != PHY_HALTED)
+ if (__phy_is_started(phydev))
phydev->state = PHY_UP;
mutex_unlock(&phydev->lock);
}
@@ -725,6 +730,8 @@ void phy_stop_machine(struct phy_device *phydev)
*/
static void phy_error(struct phy_device *phydev)
{
+ WARN_ON(1);
+
mutex_lock(&phydev->lock);
phydev->state = PHY_HALTED;
mutex_unlock(&phydev->lock);
@@ -760,7 +767,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
{
struct phy_device *phydev = phy_dat;
- if (PHY_HALTED == phydev->state)
+ if (!phy_is_started(phydev))
return IRQ_NONE; /* It can't be ours. */
if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev))
@@ -842,15 +849,18 @@ void phy_stop(struct phy_device *phydev)
{
mutex_lock(&phydev->lock);
- if (PHY_HALTED == phydev->state)
- goto out_unlock;
+ if (!__phy_is_started(phydev)) {
+ WARN(1, "called from state %s\n",
+ phy_state_to_str(phydev->state));
+ mutex_unlock(&phydev->lock);
+ return;
+ }
if (phy_interrupt_is_valid(phydev))
phy_disable_interrupts(phydev);
phydev->state = PHY_HALTED;
-out_unlock:
mutex_unlock(&phydev->lock);
phy_state_machine(&phydev->state_queue.work);
@@ -984,7 +994,7 @@ void phy_state_machine(struct work_struct *work)
* state machine would be pointless and possibly error prone when
* called from phy_disconnect() synchronously.
*/
- if (phy_polling_mode(phydev) && old_state != PHY_HALTED)
+ if (phy_polling_mode(phydev) && phy_is_started(phydev))
phy_queue_state_machine(phydev, PHY_STATE_TIME);
}
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index bdc4d23627c5..288cf099876b 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -70,7 +70,7 @@ struct asyncppp {
struct tasklet_struct tsk;
refcount_t refcnt;
- struct semaphore dead_sem;
+ struct completion dead;
struct ppp_channel chan; /* interface to generic ppp layer */
unsigned char obuf[OBUFSIZE];
};
@@ -148,7 +148,7 @@ static struct asyncppp *ap_get(struct tty_struct *tty)
static void ap_put(struct asyncppp *ap)
{
if (refcount_dec_and_test(&ap->refcnt))
- up(&ap->dead_sem);
+ complete(&ap->dead);
}
/*
@@ -186,7 +186,7 @@ ppp_asynctty_open(struct tty_struct *tty)
tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap);
refcount_set(&ap->refcnt, 1);
- sema_init(&ap->dead_sem, 0);
+ init_completion(&ap->dead);
ap->chan.private = ap;
ap->chan.ops = &async_ops;
@@ -235,7 +235,7 @@ ppp_asynctty_close(struct tty_struct *tty)
* by the time it returns.
*/
if (!refcount_dec_and_test(&ap->refcnt))
- down(&ap->dead_sem);
+ wait_for_completion(&ap->dead);
tasklet_kill(&ap->tsk);
ppp_unregister_channel(&ap->chan);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 72577aa35b06..a4fdad475594 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2495,12 +2495,14 @@ build:
skb_record_rx_queue(skb, tfile->queue_index);
netif_receive_skb(skb);
- stats = get_cpu_ptr(tun->pcpu_stats);
+ /* No need for get_cpu_ptr() here since this function is
+ * always called with bh disabled
+ */
+ stats = this_cpu_ptr(tun->pcpu_stats);
u64_stats_update_begin(&stats->syncp);
stats->rx_packets++;
stats->rx_bytes += datasize;
u64_stats_update_end(&stats->syncp);
- put_cpu_ptr(stats);
if (rxhash)
tun_flow_update(tun, rxhash, tfile);
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 49d4b5854c62..71c3b7b6b1ab 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1152,6 +1152,39 @@ out:
return err;
}
+static int vxlan_fdb_get(struct sk_buff *skb,
+ struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr,
+ u16 vid, u32 portid, u32 seq,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_fdb *f;
+ __be32 vni;
+ int err;
+
+ if (tb[NDA_VNI])
+ vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
+ else
+ vni = vxlan->default_dst.remote_vni;
+
+ rcu_read_lock();
+
+ f = __vxlan_find_mac(vxlan, addr, vni);
+ if (!f) {
+ NL_SET_ERR_MSG(extack, "Fdb entry not found");
+ err = -ENOENT;
+ goto errout;
+ }
+
+ err = vxlan_fdb_info(skb, vxlan, f, portid, seq,
+ RTM_NEWNEIGH, 0, first_remote_rcu(f));
+errout:
+ rcu_read_unlock();
+ return err;
+}
+
/* Watch incoming packets to learn mapping between Ethernet address
* and Tunnel endpoint.
* Return true if packet is bogus and should be dropped.
@@ -2805,6 +2838,7 @@ static const struct net_device_ops vxlan_netdev_ether_ops = {
.ndo_fdb_add = vxlan_fdb_add,
.ndo_fdb_del = vxlan_fdb_delete,
.ndo_fdb_dump = vxlan_fdb_dump,
+ .ndo_fdb_get = vxlan_fdb_get,
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
};
diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
new file mode 100644
index 000000000000..00d7e8e919c6
--- /dev/null
+++ b/include/linux/indirect_call_wrapper.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_INDIRECT_CALL_WRAPPER_H
+#define _LINUX_INDIRECT_CALL_WRAPPER_H
+
+#ifdef CONFIG_RETPOLINE
+
+/*
+ * INDIRECT_CALL_$NR - wrapper for indirect calls with $NR known builtin
+ * @f: function pointer
+ * @f$NR: builtin functions names, up to $NR of them
+ * @__VA_ARGS__: arguments for @f
+ *
+ * Avoid retpoline overhead for known builtin, checking @f vs each of them and
+ * eventually invoking directly the builtin function. The functions are check
+ * in the given order. Fallback to the indirect call.
+ */
+#define INDIRECT_CALL_1(f, f1, ...) \
+ ({ \
+ likely(f == f1) ? f1(__VA_ARGS__) : f(__VA_ARGS__); \
+ })
+#define INDIRECT_CALL_2(f, f2, f1, ...) \
+ ({ \
+ likely(f == f2) ? f2(__VA_ARGS__) : \
+ INDIRECT_CALL_1(f, f1, __VA_ARGS__); \
+ })
+
+#define INDIRECT_CALLABLE_DECLARE(f) f
+#define INDIRECT_CALLABLE_SCOPE
+
+#else
+#define INDIRECT_CALL_1(f, f1, ...) f(__VA_ARGS__)
+#define INDIRECT_CALL_2(f, f2, f1, ...) f(__VA_ARGS__)
+#define INDIRECT_CALLABLE_DECLARE(f)
+#define INDIRECT_CALLABLE_SCOPE static
+#endif
+
+/*
+ * We can use INDIRECT_CALL_$NR for ipv6 related functions only if ipv6 is
+ * builtin, this macro simplify dealing with indirect calls with only ipv4/ipv6
+ * alternatives
+ */
+#if IS_BUILTIN(CONFIG_IPV6)
+#define INDIRECT_CALL_INET(f, f2, f1, ...) \
+ INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__)
+#elif IS_ENABLED(CONFIG_INET)
+#define INDIRECT_CALL_INET(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_INET(f, f2, f1, ...) f(__VA_ARGS__)
+#endif
+
+#endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 811632d4d8b1..1377d085ef99 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1387,7 +1387,12 @@ struct net_device_ops {
struct net_device *dev,
struct net_device *filter_dev,
int *idx);
-
+ int (*ndo_fdb_get)(struct sk_buff *skb,
+ struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr,
+ u16 vid, u32 portid, u32 seq,
+ struct netlink_ext_ack *extack);
int (*ndo_bridge_setlink)(struct net_device *dev,
struct nlmsghdr *nlh,
u16 flags,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 8f927246acdb..da039f211c22 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -319,12 +319,12 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
enum phy_state {
PHY_DOWN = 0,
PHY_READY,
+ PHY_HALTED,
PHY_UP,
PHY_RUNNING,
PHY_NOLINK,
PHY_FORCING,
PHY_CHANGELINK,
- PHY_HALTED,
PHY_RESUMING
};
@@ -669,6 +669,28 @@ phy_lookup_setting(int speed, int duplex, const unsigned long *mask,
size_t phy_speeds(unsigned int *speeds, size_t size,
unsigned long *mask);
+static inline bool __phy_is_started(struct phy_device *phydev)
+{
+ WARN_ON(!mutex_is_locked(&phydev->lock));
+
+ return phydev->state >= PHY_UP;
+}
+
+/**
+ * phy_is_started - Convenience function to check whether PHY is started
+ * @phydev: The phy_device struct
+ */
+static inline bool phy_is_started(struct phy_device *phydev)
+{
+ bool started;
+
+ mutex_lock(&phydev->lock);
+ started = __phy_is_started(phydev);
+ mutex_unlock(&phydev->lock);
+
+ return started;
+}
+
void phy_resolve_aneg_linkmode(struct phy_device *phydev);
/**
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index ee54453a40a0..9525567b1951 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -13,6 +13,7 @@ struct fixed_phy_status {
struct device_node;
#if IS_ENABLED(CONFIG_FIXED_PHY)
+extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier);
extern int fixed_phy_add(unsigned int irq, int phy_id,
struct fixed_phy_status *status,
int link_gpio);
@@ -47,6 +48,10 @@ static inline int fixed_phy_set_link_update(struct phy_device *phydev,
{
return -ENODEV;
}
+static inline int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_FIXED_PHY */
#endif /* __PHY_FIXED_H */
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6ee2e24e0a6e..b3eefe8e18fd 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -36,7 +36,7 @@ enum dsa_tag_protocol {
DSA_TAG_PROTO_DSA,
DSA_TAG_PROTO_EDSA,
DSA_TAG_PROTO_GSWIP,
- DSA_TAG_PROTO_KSZ,
+ DSA_TAG_PROTO_KSZ9477,
DSA_TAG_PROTO_LAN9303,
DSA_TAG_PROTO_MTK,
DSA_TAG_PROTO_QCA,
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 3ca969cbd161..975901a95c0f 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -2,6 +2,8 @@
#ifndef _INET_COMMON_H
#define _INET_COMMON_H
+#include <linux/indirect_call_wrapper.h>
+
extern const struct proto_ops inet_stream_ops;
extern const struct proto_ops inet_dgram_ops;
@@ -54,4 +56,11 @@ static inline void inet_ctl_sock_destroy(struct sock *sk)
sock_release(sk->sk_socket);
}
+#define indirect_call_gro_receive(f2, f1, cb, head, skb) \
+({ \
+ unlikely(gro_recursion_inc_test(skb)) ? \
+ NAPI_GRO_CB(skb)->flush |= 1, NULL : \
+ INDIRECT_CALL_2(cb, f2, f1, head, skb); \
+})
+
#endif
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index f886b58956a6..66221f1991c0 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -140,8 +140,8 @@ struct neighbour {
unsigned long updated;
rwlock_t lock;
refcount_t refcnt;
- struct sk_buff_head arp_queue;
unsigned int arp_queue_len_bytes;
+ struct sk_buff_head arp_queue;
struct timer_list timer;
unsigned long used;
atomic_t probes;
@@ -149,8 +149,9 @@ struct neighbour {
__u8 nud_state;
__u8 type;
__u8 dead;
+ u8 protocol;
seqlock_t ha_lock;
- unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
+ unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8);
struct hh_cache hh;
int (*output)(struct neighbour *, struct sk_buff *);
const struct neigh_ops *ops;
@@ -173,6 +174,7 @@ struct pneigh_entry {
possible_net_t net;
struct net_device *dev;
u8 flags;
+ u8 protocol;
u8 key[0];
};
@@ -549,24 +551,6 @@ static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
} while (read_seqretry(&n->ha_lock, seq));
}
-static inline void neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
- int *notify)
-{
- u8 ndm_flags = 0;
-
- if (!(flags & NEIGH_UPDATE_F_ADMIN))
- return;
-
- ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
- if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
- if (ndm_flags & NTF_EXT_LEARNED)
- neigh->flags |= NTF_EXT_LEARNED;
- else
- neigh->flags &= ~NTF_EXT_LEARNED;
- *notify = 1;
- }
-}
-
static inline void neigh_update_is_router(struct neighbour *neigh, u32 flags,
int *notify)
{
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 9991e5ef52cc..59f45b1e9dac 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -5,6 +5,7 @@
#include <linux/list.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
+#include <linux/rhashtable-types.h>
#include <linux/xfrm.h>
#include <net/dst_ops.h>
@@ -53,6 +54,7 @@ struct netns_xfrm {
unsigned int policy_count[XFRM_POLICY_MAX * 2];
struct work_struct policy_hash_work;
struct xfrm_policy_hthresh policy_hthresh;
+ struct list_head inexact_bins;
struct sock *nlsk;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index ea191d8cfcc9..40965fbbcd31 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -619,8 +619,8 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
}
#endif /* CONFIG_NET_CLS_IND */
-int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
- enum tc_setup_type type, void *type_data, bool err_stop);
+int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
+ void *type_data, bool err_stop);
enum tc_block_command {
TC_BLOCK_BIND,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0eb390c205af..0a8d70d16918 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -577,6 +577,7 @@ struct xfrm_policy {
/* This lock only affects elements except for entry. */
rwlock_t lock;
refcount_t refcnt;
+ u32 pos;
struct timer_list timer;
atomic_t genid;
@@ -589,6 +590,7 @@ struct xfrm_policy {
struct xfrm_lifetime_cur curlft;
struct xfrm_policy_walk_entry walk;
struct xfrm_policy_queue polq;
+ bool bydst_reinsert;
u8 type;
u8 action;
u8 flags;
@@ -596,6 +598,7 @@ struct xfrm_policy {
u16 family;
struct xfrm_sec_ctx *security;
struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH];
+ struct hlist_node bydst_inexact_list;
struct rcu_head rcu;
};
@@ -1967,7 +1970,7 @@ static inline void xfrm_dev_state_delete(struct xfrm_state *x)
static inline void xfrm_dev_state_free(struct xfrm_state *x)
{
struct xfrm_state_offload *xso = &x->xso;
- struct net_device *dev = xso->dev;
+ struct net_device *dev = xso->dev;
if (dev && dev->xfrmdev_ops) {
if (dev->xfrmdev_ops->xdo_dev_state_free)
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 998155444e0d..cd144e3099a3 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -28,6 +28,7 @@ enum {
NDA_MASTER,
NDA_LINK_NETNSID,
NDA_SRC_VNI,
+ NDA_PROTOCOL, /* Originator of entry */
__NDA_MAX
};
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 82ac39ce5310..6a8ac7626797 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -20,11 +20,11 @@
#include <linux/module.h>
#include <linux/rcupdate.h>
#include <linux/rhashtable.h>
-#include <linux/semaphore.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/random.h>
#include <linux/vmalloc.h>
+#include <linux/wait.h>
#define MAX_ENTRIES 1000000
#define TEST_INSERT_FAIL INT_MAX
@@ -112,8 +112,8 @@ static struct rhashtable_params test_rht_params_dup = {
.automatic_shrinking = false,
};
-static struct semaphore prestart_sem;
-static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0);
+static atomic_t startup_count;
+static DECLARE_WAIT_QUEUE_HEAD(startup_wait);
static int insert_retry(struct rhashtable *ht, struct test_obj *obj,
const struct rhashtable_params params)
@@ -634,9 +634,12 @@ static int threadfunc(void *data)
int i, step, err = 0, insert_retries = 0;
struct thread_data *tdata = data;
- up(&prestart_sem);
- if (down_interruptible(&startup_sem))
- pr_err(" thread[%d]: down_interruptible failed\n", tdata->id);
+ if (atomic_dec_and_test(&startup_count))
+ wake_up(&startup_wait);
+ if (wait_event_interruptible(startup_wait, atomic_read(&startup_count) == -1)) {
+ pr_err(" thread[%d]: interrupted\n", tdata->id);
+ goto out;
+ }
for (i = 0; i < tdata->entries; i++) {
tdata->objs[i].value.id = i;
@@ -755,7 +758,7 @@ static int __init test_rht_init(void)
pr_info("Testing concurrent rhashtable access from %d threads\n",
tcount);
- sema_init(&prestart_sem, 1 - tcount);
+ atomic_set(&startup_count, tcount);
tdata = vzalloc(array_size(tcount, sizeof(struct thread_data)));
if (!tdata)
return -ENOMEM;
@@ -781,15 +784,18 @@ static int __init test_rht_init(void)
tdata[i].objs = objs + i * entries;
tdata[i].task = kthread_run(threadfunc, &tdata[i],
"rhashtable_thrad[%d]", i);
- if (IS_ERR(tdata[i].task))
+ if (IS_ERR(tdata[i].task)) {
pr_err(" kthread_run failed for thread %d\n", i);
- else
+ atomic_dec(&startup_count);
+ } else {
started_threads++;
+ }
}
- if (down_interruptible(&prestart_sem))
- pr_err(" down interruptible failed\n");
- for (i = 0; i < tcount; i++)
- up(&startup_sem);
+ if (wait_event_interruptible(startup_wait, atomic_read(&startup_count) == 0))
+ pr_err(" wait_event interruptible failed\n");
+ /* count is 0 now, set it to -1 and wake up all threads together */
+ atomic_dec(&startup_count);
+ wake_up_all(&startup_wait);
for (i = 0; i < tcount; i++) {
if (IS_ERR(tdata[i].task))
continue;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 9f41a5d4da3f..013323b6dbe4 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -403,6 +403,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fdb_add = br_fdb_add,
.ndo_fdb_del = br_fdb_delete,
.ndo_fdb_dump = br_fdb_dump,
+ .ndo_fdb_get = br_fdb_get,
.ndo_bridge_getlink = br_getlink,
.ndo_bridge_setlink = br_setlink,
.ndo_bridge_dellink = br_dellink,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 38b1d0dd0529..fe3c758791ca 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -773,6 +773,32 @@ skip:
return err;
}
+int br_fdb_get(struct sk_buff *skb,
+ struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr,
+ u16 vid, u32 portid, u32 seq,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_fdb_entry *f;
+ int err = 0;
+
+ rcu_read_lock();
+ f = br_fdb_find_rcu(br, addr, vid);
+ if (!f) {
+ NL_SET_ERR_MSG(extack, "Fdb entry not found");
+ err = -ENOENT;
+ goto errout;
+ }
+
+ err = fdb_fill_info(skb, br, f, portid, seq,
+ RTM_NEWNEIGH, 0);
+errout:
+ rcu_read_unlock();
+ return err;
+}
+
/* Update (create or replace) forwarding database entry */
static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
const u8 *addr, u16 state, u16 flags, u16 vid,
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 879cd2315769..3aeff0895669 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1937,7 +1937,6 @@ static void br_multicast_start_querier(struct net_bridge *br,
int br_multicast_toggle(struct net_bridge *br, unsigned long val)
{
struct net_bridge_port *port;
- int err = 0;
spin_lock_bh(&br->multicast_lock);
if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val)
@@ -1958,7 +1957,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
unlock:
spin_unlock_bh(&br->multicast_lock);
- return err;
+ return 0;
}
bool br_multicast_enabled(const struct net_device *dev)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 935495b93a99..9c07591b0232 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1175,9 +1175,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_MCAST_SNOOPING]) {
u8 mcast_snooping = nla_get_u8(data[IFLA_BR_MCAST_SNOOPING]);
- err = br_multicast_toggle(br, mcast_snooping);
- if (err)
- return err;
+ br_multicast_toggle(br, mcast_snooping);
}
if (data[IFLA_BR_MCAST_QUERY_USE_IFADDR]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index ff3dfb2a78b8..d240b3e7919f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -575,6 +575,9 @@ int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags);
int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev, struct net_device *fdev, int *idx);
+int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
+ const unsigned char *addr, u16 vid, u32 portid, u32 seq,
+ struct netlink_ext_ack *extack);
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
diff --git a/net/core/dev.c b/net/core/dev.c
index ed9aa4a91f1f..1b5a4410be0e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
#include <linux/net_namespace.h>
+#include <linux/indirect_call_wrapper.h>
#include "net-sysfs.h"
@@ -5338,6 +5339,8 @@ static void flush_all_backlogs(void)
put_online_cpus();
}
+INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
static int napi_gro_complete(struct sk_buff *skb)
{
struct packet_offload *ptype;
@@ -5357,7 +5360,9 @@ static int napi_gro_complete(struct sk_buff *skb)
if (ptype->type != type || !ptype->callbacks.gro_complete)
continue;
- err = ptype->callbacks.gro_complete(skb, 0);
+ err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
+ ipv6_gro_complete, inet_gro_complete,
+ skb, 0);
break;
}
rcu_read_unlock();
@@ -5504,6 +5509,10 @@ static void gro_flush_oldest(struct list_head *head)
napi_gro_complete(oldest);
}
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
+ struct sk_buff *));
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
@@ -5553,7 +5562,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->csum_valid = 0;
}
- pp = ptype->callbacks.gro_receive(gro_head, skb);
+ pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
+ ipv6_gro_receive, inet_gro_receive,
+ gro_head, skb);
break;
}
rcu_read_unlock();
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 03fdc5ae66b0..fb4372cb1de1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -127,40 +127,62 @@ static void neigh_mark_dead(struct neighbour *n)
}
}
-static void neigh_change_state(struct neighbour *n, u8 new)
+static void neigh_update_gc_list(struct neighbour *n)
{
- bool on_gc_list = !list_empty(&n->gc_list);
- bool new_is_perm = new & NUD_PERMANENT;
+ bool on_gc_list, exempt_from_gc;
- n->nud_state = new;
+ write_lock_bh(&n->tbl->lock);
+ write_lock(&n->lock);
- /* remove from the gc list if new state is permanent;
- * add to the gc list if new state is not permanent
+ /* remove from the gc list if new state is permanent or if neighbor
+ * is externally learned; otherwise entry should be on the gc list
*/
- if (new_is_perm && on_gc_list) {
- write_lock_bh(&n->tbl->lock);
- list_del_init(&n->gc_list);
- write_unlock_bh(&n->tbl->lock);
+ exempt_from_gc = n->nud_state & NUD_PERMANENT ||
+ n->flags & NTF_EXT_LEARNED;
+ on_gc_list = !list_empty(&n->gc_list);
+ if (exempt_from_gc && on_gc_list) {
+ list_del_init(&n->gc_list);
atomic_dec(&n->tbl->gc_entries);
- } else if (!new_is_perm && !on_gc_list) {
+ } else if (!exempt_from_gc && !on_gc_list) {
/* add entries to the tail; cleaning removes from the front */
- write_lock_bh(&n->tbl->lock);
list_add_tail(&n->gc_list, &n->tbl->gc_list);
- write_unlock_bh(&n->tbl->lock);
-
atomic_inc(&n->tbl->gc_entries);
}
+
+ write_unlock(&n->lock);
+ write_unlock_bh(&n->tbl->lock);
}
-static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
- struct neighbour __rcu **np, struct neigh_table *tbl)
+static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
+ int *notify)
+{
+ bool rc = false;
+ u8 ndm_flags;
+
+ if (!(flags & NEIGH_UPDATE_F_ADMIN))
+ return rc;
+
+ ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+ if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+ if (ndm_flags & NTF_EXT_LEARNED)
+ neigh->flags |= NTF_EXT_LEARNED;
+ else
+ neigh->flags &= ~NTF_EXT_LEARNED;
+ rc = true;
+ *notify = 1;
+ }
+
+ return rc;
+}
+
+static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
+ struct neigh_table *tbl)
{
bool retval = false;
write_lock(&n->lock);
- if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
- !(n->flags & flags)) {
+ if (refcount_read(&n->refcnt) == 1) {
struct neighbour *neigh;
neigh = rcu_dereference_protected(n->next,
@@ -192,7 +214,7 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
while ((n = rcu_dereference_protected(*np,
lockdep_is_held(&tbl->lock)))) {
if (n == ndel)
- return neigh_del(n, 0, 0, np, tbl);
+ return neigh_del(n, np, tbl);
np = &n->next;
}
return false;
@@ -202,9 +224,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
{
int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
unsigned long tref = jiffies - 5 * HZ;
- u8 flags = NTF_EXT_LEARNED;
struct neighbour *n, *tmp;
- u8 state = NUD_PERMANENT;
int shrunk = 0;
NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
@@ -216,7 +236,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
bool remove = false;
write_lock(&n->lock);
- if (!(n->nud_state & state) && !(n->flags & flags) &&
+ if ((n->nud_state == NUD_FAILED) ||
time_after(tref, n->updated))
remove = true;
write_unlock(&n->lock);
@@ -355,13 +375,13 @@ EXPORT_SYMBOL(neigh_ifdown);
static struct neighbour *neigh_alloc(struct neigh_table *tbl,
struct net_device *dev,
- bool permanent)
+ bool exempt_from_gc)
{
struct neighbour *n = NULL;
unsigned long now = jiffies;
int entries;
- if (permanent)
+ if (exempt_from_gc)
goto do_alloc;
entries = atomic_inc_return(&tbl->gc_entries) - 1;
@@ -403,7 +423,7 @@ out:
return n;
out_entries:
- if (!permanent)
+ if (!exempt_from_gc)
atomic_dec(&tbl->gc_entries);
goto out;
}
@@ -550,9 +570,9 @@ EXPORT_SYMBOL(neigh_lookup_nodev);
static struct neighbour *___neigh_create(struct neigh_table *tbl,
const void *pkey,
struct net_device *dev,
- bool permanent, bool want_ref)
+ bool exempt_from_gc, bool want_ref)
{
- struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, permanent);
+ struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
u32 hash_val;
unsigned int key_len = tbl->key_len;
int error;
@@ -618,7 +638,7 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl,
}
n->dead = 0;
- if (!permanent)
+ if (!exempt_from_gc)
list_add_tail(&n->gc_list, &n->tbl->gc_list);
if (want_ref)
@@ -1194,6 +1214,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
u8 new, u32 flags, u32 nlmsg_pid,
struct netlink_ext_ack *extack)
{
+ bool ext_learn_change = false;
u8 old;
int err;
int notify = 0;
@@ -1214,13 +1235,13 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
goto out;
}
- neigh_update_ext_learned(neigh, flags, &notify);
+ ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
if (!(new & NUD_VALID)) {
neigh_del_timer(neigh);
if (old & NUD_CONNECTED)
neigh_suspect(neigh);
- neigh_change_state(neigh, new);
+ neigh->nud_state = new;
err = 0;
notify = old & NUD_VALID;
if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
@@ -1299,7 +1320,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
((new & NUD_REACHABLE) ?
neigh->parms->reachable_time :
0)));
- neigh_change_state(neigh, new);
+ neigh->nud_state = new;
notify = 1;
}
@@ -1360,6 +1381,9 @@ out:
neigh_update_is_router(neigh, flags, &notify);
write_unlock_bh(&neigh->lock);
+ if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
+ neigh_update_gc_list(neigh);
+
if (notify)
neigh_update_notify(neigh, nlmsg_pid);
@@ -1804,6 +1828,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_device *dev = NULL;
struct neighbour *neigh;
void *dst, *lladdr;
+ u8 protocol = 0;
int err;
ASSERT_RTNL();
@@ -1843,6 +1868,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
dst = nla_data(tb[NDA_DST]);
lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
+ if (tb[NDA_PROTOCOL]) {
+ if (nla_len(tb[NDA_PROTOCOL]) != sizeof(u8)) {
+ NL_SET_ERR_MSG(extack, "Invalid protocol attribute");
+ goto out;
+ }
+ protocol = nla_get_u8(tb[NDA_PROTOCOL]);
+ }
+
if (ndm->ndm_flags & NTF_PROXY) {
struct pneigh_entry *pn;
@@ -1850,6 +1883,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
pn = pneigh_lookup(tbl, net, dst, dev, 1);
if (pn) {
pn->flags = ndm->ndm_flags;
+ if (protocol)
+ pn->protocol = protocol;
err = 0;
}
goto out;
@@ -1862,14 +1897,16 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
neigh = neigh_lookup(tbl, dst, dev);
if (neigh == NULL) {
+ bool exempt_from_gc;
+
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
err = -ENOENT;
goto out;
}
- neigh = ___neigh_create(tbl, dst, dev,
- ndm->ndm_state & NUD_PERMANENT,
- true);
+ exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
+ ndm->ndm_flags & NTF_EXT_LEARNED;
+ neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
if (IS_ERR(neigh)) {
err = PTR_ERR(neigh);
goto out;
@@ -1898,6 +1935,10 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
} else
err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
NETLINK_CB(skb).portid, extack);
+
+ if (protocol)
+ neigh->protocol = protocol;
+
neigh_release(neigh);
out:
@@ -2391,6 +2432,9 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
goto nla_put_failure;
+ if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -2422,6 +2466,9 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
goto nla_put_failure;
+ if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -3077,7 +3124,8 @@ static inline size_t neigh_nlmsg_size(void)
+ nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+ nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+ nla_total_size(sizeof(struct nda_cacheinfo))
- + nla_total_size(4); /* NDA_PROBES */
+ + nla_total_size(4) /* NDA_PROBES */
+ + nla_total_size(1); /* NDA_PROTOCOL */
}
static void __neigh_notify(struct neighbour *n, int type, int flags,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f8bdb8adab2c..baf2685b4da2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3460,6 +3460,18 @@ void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
new_nsid, new_ifindex);
}
+static const struct nla_policy nda_policy[NDA_MAX+1] = {
+ [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+ [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+ [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
+ [NDA_PROBES] = { .type = NLA_U32 },
+ [NDA_VLAN] = { .type = NLA_U16 },
+ [NDA_PORT] = { .type = NLA_U16 },
+ [NDA_VNI] = { .type = NLA_U32 },
+ [NDA_IFINDEX] = { .type = NLA_U32 },
+ [NDA_MASTER] = { .type = NLA_U32 },
+};
+
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
u8 *addr, u16 vid, u32 pid, u32 seq,
@@ -4021,6 +4033,160 @@ out:
return skb->len;
}
+static int valid_fdb_get_strict(const struct nlmsghdr *nlh,
+ struct nlattr **tb, u8 *ndm_flags,
+ int *br_idx, int *brport_idx, u8 **addr,
+ u16 *vid, struct netlink_ext_ack *extack)
+{
+ struct ndmsg *ndm;
+ int err, i;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for fdb get request");
+ return -EINVAL;
+ }
+
+ ndm = nlmsg_data(nlh);
+ if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
+ ndm->ndm_type) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for fdb get request");
+ return -EINVAL;
+ }
+
+ if (ndm->ndm_flags & ~(NTF_MASTER | NTF_SELF)) {
+ NL_SET_ERR_MSG(extack, "Invalid flags in header for fdb get request");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
+ nda_policy, extack);
+ if (err < 0)
+ return err;
+
+ *ndm_flags = ndm->ndm_flags;
+ *brport_idx = ndm->ndm_ifindex;
+ for (i = 0; i <= NDA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ switch (i) {
+ case NDA_MASTER:
+ *br_idx = nla_get_u32(tb[i]);
+ break;
+ case NDA_LLADDR:
+ if (nla_len(tb[i]) != ETH_ALEN) {
+ NL_SET_ERR_MSG(extack, "Invalid address in fdb get request");
+ return -EINVAL;
+ }
+ *addr = nla_data(tb[i]);
+ break;
+ case NDA_VLAN:
+ err = fdb_vid_parse(tb[i], vid, extack);
+ if (err)
+ return err;
+ break;
+ case NDA_VNI:
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in fdb get request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = NULL, *br_dev = NULL;
+ const struct net_device_ops *ops = NULL;
+ struct net *net = sock_net(in_skb->sk);
+ struct nlattr *tb[NDA_MAX + 1];
+ struct sk_buff *skb;
+ int brport_idx = 0;
+ u8 ndm_flags = 0;
+ int br_idx = 0;
+ u8 *addr = NULL;
+ u16 vid = 0;
+ int err;
+
+ err = valid_fdb_get_strict(nlh, tb, &ndm_flags, &br_idx,
+ &brport_idx, &addr, &vid, extack);
+ if (err < 0)
+ return err;
+
+ if (brport_idx) {
+ dev = __dev_get_by_index(net, brport_idx);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Unknown device ifindex");
+ return -ENODEV;
+ }
+ }
+
+ if (br_idx) {
+ if (dev) {
+ NL_SET_ERR_MSG(extack, "Master and device are mutually exclusive");
+ return -EINVAL;
+ }
+
+ br_dev = __dev_get_by_index(net, br_idx);
+ if (!br_dev) {
+ NL_SET_ERR_MSG(extack, "Invalid master ifindex");
+ return -EINVAL;
+ }
+ ops = br_dev->netdev_ops;
+ }
+
+ if (dev) {
+ if (!ndm_flags || (ndm_flags & NTF_MASTER)) {
+ if (!(dev->priv_flags & IFF_BRIDGE_PORT)) {
+ NL_SET_ERR_MSG(extack, "Device is not a bridge port");
+ return -EINVAL;
+ }
+ br_dev = netdev_master_upper_dev_get(dev);
+ if (!br_dev) {
+ NL_SET_ERR_MSG(extack, "Master of device not found");
+ return -EINVAL;
+ }
+ ops = br_dev->netdev_ops;
+ } else {
+ if (!(ndm_flags & NTF_SELF)) {
+ NL_SET_ERR_MSG(extack, "Missing NTF_SELF");
+ return -EINVAL;
+ }
+ ops = dev->netdev_ops;
+ }
+ }
+
+ if (!br_dev && !dev) {
+ NL_SET_ERR_MSG(extack, "No device specified");
+ return -ENODEV;
+ }
+
+ if (!ops || !ops->ndo_fdb_get) {
+ NL_SET_ERR_MSG(extack, "Fdb get operation not supported by device");
+ return -EOPNOTSUPP;
+ }
+
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (br_dev)
+ dev = br_dev;
+ err = ops->ndo_fdb_get(skb, tb, dev, addr, vid,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, extack);
+ if (err)
+ goto out;
+
+ return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+out:
+ kfree_skb(skb);
+ return err;
+}
+
static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
unsigned int attrnum, unsigned int flag)
{
@@ -5081,7 +5247,7 @@ void __init rtnetlink_init(void)
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, 0);
+ rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);
rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 658cd32bb7b3..be0b223aa862 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1141,6 +1141,9 @@ static int __init dccp_init(void)
goto out_fail;
rc = -ENOBUFS;
inet_hashinfo_init(&dccp_hashinfo);
+ inet_hashinfo2_init(&dccp_hashinfo, "dccp_listen_portaddr_hash",
+ INET_LHTABLE_SIZE, 21, /* one slot per 2 MB*/
+ 0, 64 * 1024);
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 48c41918fb35..91e52973ee13 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -44,6 +44,10 @@ config NET_DSA_TAG_GSWIP
config NET_DSA_TAG_KSZ
bool
+config NET_DSA_TAG_KSZ9477
+ bool
+ select NET_DSA_TAG_KSZ
+
config NET_DSA_TAG_LAN9303
bool
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index a69c1790bbfc..aee909bcddc4 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -55,8 +55,8 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
#ifdef CONFIG_NET_DSA_TAG_GSWIP
[DSA_TAG_PROTO_GSWIP] = &gswip_netdev_ops,
#endif
-#ifdef CONFIG_NET_DSA_TAG_KSZ
- [DSA_TAG_PROTO_KSZ] = &ksz_netdev_ops,
+#ifdef CONFIG_NET_DSA_TAG_KSZ9477
+ [DSA_TAG_PROTO_KSZ9477] = &ksz9477_netdev_ops,
#endif
#ifdef CONFIG_NET_DSA_TAG_LAN9303
[DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
@@ -91,8 +91,8 @@ const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops)
#ifdef CONFIG_NET_DSA_TAG_GSWIP
[DSA_TAG_PROTO_GSWIP] = "gswip",
#endif
-#ifdef CONFIG_NET_DSA_TAG_KSZ
- [DSA_TAG_PROTO_KSZ] = "ksz",
+#ifdef CONFIG_NET_DSA_TAG_KSZ9477
+ [DSA_TAG_PROTO_KSZ9477] = "ksz9477",
#endif
#ifdef CONFIG_NET_DSA_TAG_LAN9303
[DSA_TAG_PROTO_LAN9303] = "lan9303",
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 9e4fd04ab53c..026a05774bf7 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -210,7 +210,7 @@ extern const struct dsa_device_ops edsa_netdev_ops;
extern const struct dsa_device_ops gswip_netdev_ops;
/* tag_ksz.c */
-extern const struct dsa_device_ops ksz_netdev_ops;
+extern const struct dsa_device_ops ksz9477_netdev_ops;
/* tag_lan9303.c */
extern const struct dsa_device_ops lan9303_netdev_ops;
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 96411f70ab9f..da71b9e2af52 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -14,34 +14,18 @@
#include <net/dsa.h>
#include "dsa_priv.h"
-/* For Ingress (Host -> KSZ), 2 bytes are added before FCS.
- * ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
- * ---------------------------------------------------------------------------
- * tag0 : Prioritization (not used now)
- * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
- *
- * For Egress (KSZ -> Host), 1 byte is added before FCS.
- * ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
- * ---------------------------------------------------------------------------
- * tag0 : zero-based value represents port
- * (eg, 0x00=port1, 0x02=port3, 0x06=port7)
- */
-
-#define KSZ_INGRESS_TAG_LEN 2
-#define KSZ_EGRESS_TAG_LEN 1
+/* Typically only one byte is used for tail tag. */
+#define KSZ_EGRESS_TAG_LEN 1
-static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *ksz_common_xmit(struct sk_buff *skb,
+ struct net_device *dev, int len)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
int padlen;
- u8 *tag;
padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
- if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
+ if (skb_tailroom(skb) >= padlen + len) {
/* Let dsa_slave_xmit() free skb */
if (__skb_put_padto(skb, skb->len + padlen, false))
return NULL;
@@ -49,7 +33,7 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
nskb = skb;
} else {
nskb = alloc_skb(NET_IP_ALIGN + skb->len +
- padlen + KSZ_INGRESS_TAG_LEN, GFP_ATOMIC);
+ padlen + len, GFP_ATOMIC);
if (!nskb)
return NULL;
skb_reserve(nskb, NET_IP_ALIGN);
@@ -70,34 +54,88 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
consume_skb(skb);
}
- tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
- tag[0] = 0;
- tag[1] = 1 << dp->index; /* destination port */
-
return nskb;
}
-static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
+ struct net_device *dev,
+ unsigned int port, unsigned int len)
{
- u8 *tag;
- int source_port;
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev)
+ return NULL;
- tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+ pskb_trim_rcsum(skb, skb->len - len);
- source_port = tag[0] & 7;
+ return skb;
+}
- skb->dev = dsa_master_find_slave(dev, 0, source_port);
- if (!skb->dev)
+/*
+ * For Ingress (Host -> KSZ9477), 2 bytes are added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : Prioritization (not used now)
+ * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
+ *
+ * For Egress (KSZ9477 -> Host), 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : zero-based value represents port
+ * (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ */
+
+#define KSZ9477_INGRESS_TAG_LEN 2
+#define KSZ9477_PTP_TAG_LEN 4
+#define KSZ9477_PTP_TAG_INDICATION 0x80
+
+#define KSZ9477_TAIL_TAG_OVERRIDE BIT(9)
+#define KSZ9477_TAIL_TAG_LOOKUP BIT(10)
+
+static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct sk_buff *nskb;
+ u16 *tag;
+ u8 *addr;
+
+ nskb = ksz_common_xmit(skb, dev, KSZ9477_INGRESS_TAG_LEN);
+ if (!nskb)
return NULL;
- pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN);
+ /* Tag encoding */
+ tag = skb_put(nskb, KSZ9477_INGRESS_TAG_LEN);
+ addr = skb_mac_header(nskb);
- return skb;
+ *tag = BIT(dp->index);
+
+ if (is_link_local_ether_addr(addr))
+ *tag |= KSZ9477_TAIL_TAG_OVERRIDE;
+
+ *tag = cpu_to_be16(*tag);
+
+ return nskb;
+}
+
+static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt)
+{
+ /* Tag decoding */
+ u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+ unsigned int port = tag[0] & 7;
+ unsigned int len = KSZ_EGRESS_TAG_LEN;
+
+ /* Extra 4-bytes PTP timestamp */
+ if (tag[0] & KSZ9477_PTP_TAG_INDICATION)
+ len += KSZ9477_PTP_TAG_LEN;
+
+ return ksz_common_rcv(skb, dev, port, len);
}
-const struct dsa_device_ops ksz_netdev_ops = {
- .xmit = ksz_xmit,
- .rcv = ksz_rcv,
- .overhead = KSZ_INGRESS_TAG_LEN,
+const struct dsa_device_ops ksz9477_netdev_ops = {
+ .xmit = ksz9477_xmit,
+ .rcv = ksz9477_rcv,
+ .overhead = KSZ9477_INGRESS_TAG_LEN,
};
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 326c422c22f8..0dfb72c46671 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1385,6 +1385,10 @@ out:
}
EXPORT_SYMBOL(inet_gso_segment);
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
+ struct sk_buff *));
struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
{
const struct net_offload *ops;
@@ -1494,7 +1498,8 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
skb_gro_pull(skb, sizeof(*iph));
skb_set_transport_header(skb, skb_gro_offset(skb));
- pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+ pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive,
+ ops->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
@@ -1556,6 +1561,8 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
return -EINVAL;
}
+INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
__be16 newlen = htons(skb->len - nhoff);
@@ -1581,7 +1588,9 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
* because any hdr with option will have been flushed in
* inet_gro_receive().
*/
- err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
+ err = INDIRECT_CALL_2(ops->callbacks.gro_complete,
+ tcp4_gro_complete, udp4_gro_complete,
+ skb, nhoff + sizeof(*iph));
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 0d0ad19ecb87..0c9f171fb085 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -1061,6 +1061,13 @@ static int gue_err(struct sk_buff *skb, u32 info)
if (validate_gue_flags(guehdr, optlen))
return -EINVAL;
+ /* Handling exceptions for direct UDP encapsulation in GUE would lead to
+ * recursion. Besides, this kind of encapsulation can't even be
+ * configured currently. Discard this.
+ */
+ if (guehdr->proto_ctype == IPPROTO_UDP)
+ return -EOPNOTSUPP;
+
skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
ret = gue_err_proto_handler(guehdr->proto_ctype, skb, info);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 13890d5bfc34..2445614de6a7 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -234,24 +234,16 @@ static inline int compute_score(struct sock *sk, struct net *net,
const int dif, const int sdif, bool exact_dif)
{
int score = -1;
- struct inet_sock *inet = inet_sk(sk);
- bool dev_match;
- if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
+ if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&
!ipv6_only_sock(sk)) {
- __be32 rcv_saddr = inet->inet_rcv_saddr;
- score = sk->sk_family == PF_INET ? 2 : 1;
- if (rcv_saddr) {
- if (rcv_saddr != daddr)
- return -1;
- score += 4;
- }
- dev_match = inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
- dif, sdif);
- if (!dev_match)
+ if (sk->sk_rcv_saddr != daddr)
+ return -1;
+
+ if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
- score += 4;
+ score = sk->sk_family == PF_INET ? 2 : 1;
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
}
@@ -307,26 +299,12 @@ struct sock *__inet_lookup_listener(struct net *net,
const __be32 daddr, const unsigned short hnum,
const int dif, const int sdif)
{
- unsigned int hash = inet_lhashfn(net, hnum);
- struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
- bool exact_dif = inet_exact_dif_match(net, skb);
struct inet_listen_hashbucket *ilb2;
- struct sock *sk, *result = NULL;
- int score, hiscore = 0;
+ struct sock *result = NULL;
unsigned int hash2;
- u32 phash = 0;
-
- if (ilb->count <= 10 || !hashinfo->lhash2)
- goto port_lookup;
-
- /* Too many sk in the ilb bucket (which is hashed by port alone).
- * Try lhash2 (which is hashed by port and addr) instead.
- */
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
- if (ilb2->count > ilb->count)
- goto port_lookup;
result = inet_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
@@ -335,34 +313,12 @@ struct sock *__inet_lookup_listener(struct net *net,
goto done;
/* Lookup lhash2 with INADDR_ANY */
-
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
- if (ilb2->count > ilb->count)
- goto port_lookup;
result = inet_lhash2_lookup(net, ilb2, skb, doff,
- saddr, sport, daddr, hnum,
+ saddr, sport, htonl(INADDR_ANY), hnum,
dif, sdif);
- goto done;
-
-port_lookup:
- sk_for_each_rcu(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr,
- dif, sdif, exact_dif);
- if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- goto done;
- }
- result = sk;
- hiscore = score;
- }
- }
done:
if (unlikely(IS_ERR(result)))
return NULL;
@@ -829,6 +785,7 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
h->lhash2[i].count = 0;
}
}
+EXPORT_SYMBOL_GPL(inet_hashinfo2_init);
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 55757764c381..208a5b4419c6 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1361,18 +1361,7 @@ static int ntp_servers_seq_show(struct seq_file *seq, void *v)
}
return 0;
}
-
-static int ntp_servers_seq_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ntp_servers_seq_show, NULL);
-}
-
-static const struct file_operations ntp_servers_seq_fops = {
- .open = ntp_servers_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ntp_servers_seq);
#endif /* CONFIG_PROC_FS */
/*
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index ea04e38f56e9..75c654924532 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1820,8 +1820,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
/* Processing handlers for ipmr_forward */
static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
- int in_vifi, struct sk_buff *skb,
- struct mfc_cache *c, int vifi)
+ int in_vifi, struct sk_buff *skb, int vifi)
{
const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &mrt->vif_table[vifi];
@@ -2027,7 +2026,7 @@ forward:
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi,
- skb2, c, psend);
+ skb2, psend);
}
psend = ct;
}
@@ -2039,9 +2038,9 @@ last_forward:
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi, skb2,
- c, psend);
+ psend);
} else {
- ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb, psend);
return;
}
}
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 870b0a335061..0fbf7d4df9da 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -10,6 +10,7 @@
* TCPv4 GSO/GRO support
*/
+#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
#include <net/tcp.h>
#include <net/protocol.h>
@@ -305,7 +306,8 @@ int tcp_gro_complete(struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_gro_complete);
-static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
@@ -318,7 +320,7 @@ static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *
return tcp_gro_receive(head, skb);
}
-static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
+INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
{
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index aff2a8e99e01..3fb0ed5e4789 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -380,15 +380,12 @@ static int compute_score(struct sock *sk, struct net *net,
ipv6_only_sock(sk))
return -1;
- score = (sk->sk_family == PF_INET) ? 2 : 1;
- inet = inet_sk(sk);
+ if (sk->sk_rcv_saddr != daddr)
+ return -1;
- if (inet->inet_rcv_saddr) {
- if (inet->inet_rcv_saddr != daddr)
- return -1;
- score += 4;
- }
+ score = (sk->sk_family == PF_INET) ? 2 : 1;
+ inet = inet_sk(sk);
if (inet->inet_daddr) {
if (inet->inet_daddr != saddr)
return -1;
@@ -464,65 +461,30 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport, int dif,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
- struct sock *sk, *result;
+ struct sock *result;
unsigned short hnum = ntohs(dport);
- unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
- struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
+ unsigned int hash2, slot2;
+ struct udp_hslot *hslot2;
bool exact_dif = udp_lib_exact_dif_match(net, skb);
- int score, badness;
- u32 hash = 0;
- if (hslot->count > 10) {
- hash2 = ipv4_portaddr_hash(net, daddr, hnum);
+ hash2 = ipv4_portaddr_hash(net, daddr, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+
+ result = udp4_lib_lookup2(net, saddr, sport,
+ daddr, hnum, dif, sdif,
+ exact_dif, hslot2, skb);
+ if (!result) {
+ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
- if (hslot->count < hslot2->count)
- goto begin;
result = udp4_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, sdif,
+ htonl(INADDR_ANY), hnum, dif, sdif,
exact_dif, hslot2, skb);
- if (!result) {
- unsigned int old_slot2 = slot2;
- hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
- slot2 = hash2 & udptable->mask;
- /* avoid searching the same slot again. */
- if (unlikely(slot2 == old_slot2))
- return result;
-
- hslot2 = &udptable->hash2[slot2];
- if (hslot->count < hslot2->count)
- goto begin;
-
- result = udp4_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, sdif,
- exact_dif, hslot2, skb);
- }
- if (unlikely(IS_ERR(result)))
- return NULL;
- return result;
- }
-begin:
- result = NULL;
- badness = 0;
- sk_for_each_rcu(sk, &hslot->head) {
- score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, sdif, exact_dif);
- if (score > badness) {
- if (sk->sk_reuseport) {
- hash = udp_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (unlikely(IS_ERR(result)))
- return NULL;
- if (result)
- return result;
- }
- result = sk;
- badness = score;
- }
}
+ if (unlikely(IS_ERR(result)))
+ return NULL;
return result;
}
EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0646d61f4fa8..64f9715173ac 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -13,6 +13,7 @@
#include <linux/skbuff.h>
#include <net/udp.h>
#include <net/protocol.h>
+#include <net/inet_common.h>
static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
@@ -391,6 +392,8 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
return NULL;
}
+INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
+ __be16 sport, __be16 dport));
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct udphdr *uh, udp_lookup_t lookup)
{
@@ -402,7 +405,8 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct sock *sk;
rcu_read_lock();
- sk = (*lookup)(skb, uh->source, uh->dest);
+ sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
+ udp4_lib_lookup_skb, skb, uh->source, uh->dest);
if (!sk)
goto out_unlock;
@@ -451,8 +455,8 @@ out_unlock:
}
EXPORT_SYMBOL(udp_gro_receive);
-static struct sk_buff *udp4_gro_receive(struct list_head *head,
- struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
@@ -502,7 +506,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
uh->len = newlen;
rcu_read_lock();
- sk = (*lookup)(skb, uh->source, uh->dest);
+ sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
+ udp4_lib_lookup_skb, skb, uh->source, uh->dest);
if (sk && udp_sk(sk)->gro_enabled) {
err = udp_gro_complete_segment(skb);
} else if (sk && udp_sk(sk)->gro_complete) {
@@ -525,7 +530,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
}
EXPORT_SYMBOL(udp_gro_complete);
-static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
+INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct iphdr *iph = ip_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 5eeeba7181a1..f3515ebe9b3a 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -99,23 +99,16 @@ static inline int compute_score(struct sock *sk, struct net *net,
const int dif, const int sdif, bool exact_dif)
{
int score = -1;
- bool dev_match;
if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
sk->sk_family == PF_INET6) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ return -1;
- score = 1;
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
- return -1;
- score++;
- }
- dev_match = inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
- dif, sdif);
- if (!dev_match)
+ if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
- score++;
+ score = 1;
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
}
@@ -164,26 +157,12 @@ struct sock *inet6_lookup_listener(struct net *net,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif, const int sdif)
{
- unsigned int hash = inet_lhashfn(net, hnum);
- struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
- bool exact_dif = inet6_exact_dif_match(net, skb);
struct inet_listen_hashbucket *ilb2;
- struct sock *sk, *result = NULL;
- int score, hiscore = 0;
+ struct sock *result = NULL;
unsigned int hash2;
- u32 phash = 0;
-
- if (ilb->count <= 10 || !hashinfo->lhash2)
- goto port_lookup;
-
- /* Too many sk in the ilb bucket (which is hashed by port alone).
- * Try lhash2 (which is hashed by port and addr) instead.
- */
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
- if (ilb2->count > ilb->count)
- goto port_lookup;
result = inet6_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
@@ -192,33 +171,12 @@ struct sock *inet6_lookup_listener(struct net *net,
goto done;
/* Lookup lhash2 with in6addr_any */
-
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
- if (ilb2->count > ilb->count)
- goto port_lookup;
result = inet6_lhash2_lookup(net, ilb2, skb, doff,
- saddr, sport, daddr, hnum,
+ saddr, sport, &in6addr_any, hnum,
dif, sdif);
- goto done;
-
-port_lookup:
- sk_for_each(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
- if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- goto done;
- }
- result = sk;
- hiscore = score;
- }
- }
done:
if (unlikely(IS_ERR(result)))
return NULL;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 70f525c33cb6..5c045691c302 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -20,6 +20,23 @@
#include "ip6_offload.h"
+/* All GRO functions are always builtin, except UDP over ipv6, which lays in
+ * ipv6 module, as it depends on UDPv6 lookup function, so we need special care
+ * when ipv6 is built as a module
+ */
+#if IS_BUILTIN(CONFIG_IPV6)
+#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__)
+#endif
+
+#define indirect_call_gro_receive_l4(f2, f1, cb, head, skb) \
+({ \
+ unlikely(gro_recursion_inc_test(skb)) ? \
+ NAPI_GRO_CB(skb)->flush |= 1, NULL : \
+ INDIRECT_CALL_L4(cb, f2, f1, head, skb); \
+})
+
static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
{
const struct net_offload *ops = NULL;
@@ -164,8 +181,12 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
return len;
}
-static struct sk_buff *ipv6_gro_receive(struct list_head *head,
- struct sk_buff *skb)
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
+ struct sk_buff *skb)
{
const struct net_offload *ops;
struct sk_buff *pp = NULL;
@@ -260,7 +281,8 @@ not_same_flow:
skb_gro_postpull_rcsum(skb, iph, nlen);
- pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+ pp = indirect_call_gro_receive_l4(tcp6_gro_receive, udp6_gro_receive,
+ ops->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
@@ -301,7 +323,9 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
return inet_gro_receive(head, skb);
}
-static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
+INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct net_offload *ops;
struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
@@ -320,7 +344,8 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
goto out_unlock;
- err = ops->callbacks.gro_complete(skb, nhoff);
+ err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete,
+ udp6_gro_complete, skb, nhoff);
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8c63494400c4..34b8a90e6be2 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1968,7 +1968,7 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
*/
static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *c, int vifi)
+ struct sk_buff *skb, int vifi)
{
struct ipv6hdr *ipv6h;
struct vif_device *vif = &mrt->vif_table[vifi];
@@ -2134,15 +2134,14 @@ forward:
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ip6mr_forward2(net, mrt, skb2,
- c, psend);
+ ip6mr_forward2(net, mrt, skb2, psend);
}
psend = ct;
}
}
last_forward:
if (psend != -1) {
- ip6mr_forward2(net, mrt, skb, c, psend);
+ ip6mr_forward2(net, mrt, skb, psend);
return;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a3f559162521..b81eb7cb815e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -737,6 +737,7 @@ static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
{
+ bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
struct inet_request_sock *ireq = inet_rsk(req);
const struct ipv6_pinfo *np = inet6_sk(sk_listener);
@@ -744,7 +745,7 @@ static void tcp_v6_init_req(struct request_sock *req,
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
/* So that link locals have meaning */
- if (!sk_listener->sk_bound_dev_if &&
+ if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index e72947c99454..3179c425d7ff 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -9,14 +9,15 @@
*
* TCPv6 GSO/GRO support
*/
+#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
-static struct sk_buff *tcp6_gro_receive(struct list_head *head,
- struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
@@ -29,7 +30,7 @@ static struct sk_buff *tcp6_gro_receive(struct list_head *head,
return tcp_gro_receive(head, skb);
}
-static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
+INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 09cba4cfe31f..9cbf363172bd 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -125,6 +125,9 @@ static int compute_score(struct sock *sk, struct net *net,
sk->sk_family != PF_INET6)
return -1;
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ return -1;
+
score = 0;
inet = inet_sk(sk);
@@ -134,12 +137,6 @@ static int compute_score(struct sock *sk, struct net *net,
score++;
}
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
- return -1;
- score++;
- }
-
if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
return -1;
@@ -197,66 +194,32 @@ struct sock *__udp6_lib_lookup(struct net *net,
int dif, int sdif, struct udp_table *udptable,
struct sk_buff *skb)
{
- struct sock *sk, *result;
unsigned short hnum = ntohs(dport);
- unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
- struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
+ unsigned int hash2, slot2;
+ struct udp_hslot *hslot2;
+ struct sock *result;
bool exact_dif = udp6_lib_exact_dif_match(net, skb);
- int score, badness;
- u32 hash = 0;
- if (hslot->count > 10) {
- hash2 = ipv6_portaddr_hash(net, daddr, hnum);
+ hash2 = ipv6_portaddr_hash(net, daddr, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+
+ result = udp6_lib_lookup2(net, saddr, sport,
+ daddr, hnum, dif, sdif, exact_dif,
+ hslot2, skb);
+ if (!result) {
+ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
+
hslot2 = &udptable->hash2[slot2];
- if (hslot->count < hslot2->count)
- goto begin;
result = udp6_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, sdif, exact_dif,
- hslot2, skb);
- if (!result) {
- unsigned int old_slot2 = slot2;
- hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
- slot2 = hash2 & udptable->mask;
- /* avoid searching the same slot again. */
- if (unlikely(slot2 == old_slot2))
- return result;
-
- hslot2 = &udptable->hash2[slot2];
- if (hslot->count < hslot2->count)
- goto begin;
-
- result = udp6_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, sdif,
- exact_dif, hslot2,
- skb);
- }
- if (unlikely(IS_ERR(result)))
- return NULL;
- return result;
- }
-begin:
- result = NULL;
- badness = -1;
- sk_for_each_rcu(sk, &hslot->head) {
- score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
- sdif, exact_dif);
- if (score > badness) {
- if (sk->sk_reuseport) {
- hash = udp6_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (unlikely(IS_ERR(result)))
- return NULL;
- if (result)
- return result;
- }
- result = sk;
- badness = score;
- }
+ &in6addr_any, hnum, dif, sdif,
+ exact_dif, hslot2,
+ skb);
}
+ if (unlikely(IS_ERR(result)))
+ return NULL;
return result;
}
EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 828b2457f97b..83b11d0ac091 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -11,6 +11,7 @@
*/
#include <linux/skbuff.h>
#include <linux/netdevice.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/protocol.h>
#include <net/ipv6.h>
#include <net/udp.h>
@@ -114,8 +115,8 @@ out:
return segs;
}
-static struct sk_buff *udp6_gro_receive(struct list_head *head,
- struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
@@ -142,7 +143,7 @@ flush:
return NULL;
}
-static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
+INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d35bcf92969c..769f8f78d3b8 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -262,7 +262,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (xdst->u.rt6.rt6i_idev->dev == dev) {
struct inet6_dev *loopback_idev =
in6_dev_get(dev_net(dev)->loopback_dev);
- BUG_ON(!loopback_idev);
do {
in6_dev_put(xdst->u.rt6.rt6i_idev);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9d61266526e7..655c787f9d54 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2020,7 +2020,7 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp)
{
- struct xfrm_sec_ctx *xfrm_ctx = xp->security;
+ struct xfrm_sec_ctx *xfrm_ctx = xp->security;
if (xfrm_ctx) {
int len = sizeof(struct sadb_x_sec_ctx);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 04d9946dcdba..c03c6461f236 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -236,6 +236,10 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
skb->data[1] == PPP_UI)
skb_pull(skb, 2);
+ /* Decompress protocol field if PFC is enabled */
+ if ((*skb->data) & 0x1)
+ *(u8 *)skb_push(skb, 1) = 0;
+
if (sk->sk_state & PPPOX_BOUND) {
struct pppox_sock *po;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 6207f265b87c..8ce2a0507970 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1270,29 +1270,6 @@ void tcf_block_cb_unregister(struct tcf_block *block,
}
EXPORT_SYMBOL(tcf_block_cb_unregister);
-static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
- void *type_data, bool err_stop)
-{
- struct tcf_block_cb *block_cb;
- int ok_count = 0;
- int err;
-
- /* Make sure all netdevs sharing this block are offload-capable. */
- if (block->nooffloaddevcnt && err_stop)
- return -EOPNOTSUPP;
-
- list_for_each_entry(block_cb, &block->cb_list, list) {
- err = block_cb->cb(type, type_data, block_cb->cb_priv);
- if (err) {
- if (err_stop)
- return err;
- } else {
- ok_count++;
- }
- }
- return ok_count;
-}
-
/* Main classifier routine: scans classifier chain attached
* to this qdisc, (optionally) tests for protocol and asks
* specific classifiers.
@@ -2515,10 +2492,27 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
}
EXPORT_SYMBOL(tcf_exts_dump_stats);
-int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
- enum tc_setup_type type, void *type_data, bool err_stop)
+int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
+ void *type_data, bool err_stop)
{
- return tcf_block_cb_call(block, type, type_data, err_stop);
+ struct tcf_block_cb *block_cb;
+ int ok_count = 0;
+ int err;
+
+ /* Make sure all netdevs sharing this block are offload-capable. */
+ if (block->nooffloaddevcnt && err_stop)
+ return -EOPNOTSUPP;
+
+ list_for_each_entry(block_cb, &block->cb_list, list) {
+ err = block_cb->cb(type, type_data, block_cb->cb_priv);
+ if (err) {
+ if (err_stop)
+ return err;
+ } else {
+ ok_count++;
+ }
+ }
+ return ok_count;
}
EXPORT_SYMBOL(tc_setup_cb_call);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index fa6fe2fe0f32..a95cb240a606 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -169,7 +169,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
if (oldprog)
tcf_block_offload_dec(block, &oldprog->gen_flags);
- err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
+ err = tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
if (prog) {
if (err < 0) {
cls_bpf_offload_cmd(tp, oldprog, prog, extack);
@@ -234,7 +234,7 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
cls_bpf.name = prog->bpf_name;
cls_bpf.exts_integrated = prog->exts_integrated;
- tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false);
}
static int cls_bpf_init(struct tcf_proto *tp)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 544811dded60..1eb2e2c31dd5 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -368,8 +368,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
cls_flower.command = TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long) f;
- tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
- &cls_flower, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
tcf_block_offload_dec(block, &f->flags);
}
@@ -391,8 +390,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
cls_flower.exts = &f->exts;
cls_flower.classid = f->res.classid;
- err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
- &cls_flower, skip_sw);
+ err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
if (err < 0) {
fl_hw_destroy_filter(tp, f, NULL);
return err;
@@ -418,8 +416,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
cls_flower.exts = &f->exts;
cls_flower.classid = f->res.classid;
- tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
- &cls_flower, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
}
static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
@@ -1502,8 +1499,7 @@ static void fl_hw_create_tmplt(struct tcf_chain *chain,
/* We don't care if driver (any of them) fails to handle this
* call. It serves just as a hint for it.
*/
- tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
- &cls_flower, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
}
static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
@@ -1516,8 +1512,7 @@ static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
cls_flower.command = TC_CLSFLOWER_TMPLT_DESTROY;
cls_flower.cookie = (unsigned long) tmplt;
- tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
- &cls_flower, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
}
static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 856fa79d4ffd..0e408ee9dcec 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -71,7 +71,7 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
cls_mall.command = TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = cookie;
- tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false);
tcf_block_offload_dec(block, &head->flags);
}
@@ -90,8 +90,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
cls_mall.exts = &head->exts;
cls_mall.cookie = cookie;
- err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
- &cls_mall, skip_sw);
+ err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw);
if (err < 0) {
mall_destroy_hw_filter(tp, head, cookie, NULL);
return err;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4c54bc440798..dcea21004604 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -491,7 +491,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
cls_u32.hnode.handle = h->handle;
cls_u32.hnode.prio = h->prio;
- tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false);
}
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
@@ -509,7 +509,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
cls_u32.hnode.handle = h->handle;
cls_u32.hnode.prio = h->prio;
- err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+ err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
u32_clear_hw_hnode(tp, h, NULL);
return err;
@@ -533,7 +533,7 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
cls_u32.command = TC_CLSU32_DELETE_KNODE;
cls_u32.knode.handle = n->handle;
- tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false);
tcf_block_offload_dec(block, &n->flags);
}
@@ -563,7 +563,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
if (n->ht_down)
cls_u32.knode.link_handle = ht->handle;
- err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+ err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
u32_remove_hw_knode(tp, n, NULL);
return err;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 9c88cec7e8a2..187a57e7d601 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -335,7 +335,6 @@ out:
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
unsigned long cl;
- struct Qdisc *leaf;
const struct Qdisc_class_ops *cops = p->ops->cl_ops;
if (cops == NULL)
@@ -344,8 +343,7 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
if (cl == 0)
return NULL;
- leaf = cops->leaf(p, cl);
- return leaf;
+ return cops->leaf(p, cl);
}
/* Find queueing discipline by name */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 119a427d9b2b..be04091eb7db 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -26,6 +26,7 @@
#include <linux/cache.h>
#include <linux/cpu.h>
#include <linux/audit.h>
+#include <linux/rhashtable.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/xfrm.h>
@@ -45,6 +46,99 @@ struct xfrm_flo {
u8 flags;
};
+/* prefixes smaller than this are stored in lists, not trees. */
+#define INEXACT_PREFIXLEN_IPV4 16
+#define INEXACT_PREFIXLEN_IPV6 48
+
+struct xfrm_pol_inexact_node {
+ struct rb_node node;
+ union {
+ xfrm_address_t addr;
+ struct rcu_head rcu;
+ };
+ u8 prefixlen;
+
+ struct rb_root root;
+
+ /* the policies matching this node, can be empty list */
+ struct hlist_head hhead;
+};
+
+/* xfrm inexact policy search tree:
+ * xfrm_pol_inexact_bin = hash(dir,type,family,if_id);
+ * |
+ * +---- root_d: sorted by daddr:prefix
+ * | |
+ * | xfrm_pol_inexact_node
+ * | |
+ * | +- root: sorted by saddr/prefix
+ * | | |
+ * | | xfrm_pol_inexact_node
+ * | | |
+ * | | + root: unused
+ * | | |
+ * | | + hhead: saddr:daddr policies
+ * | |
+ * | +- coarse policies and all any:daddr policies
+ * |
+ * +---- root_s: sorted by saddr:prefix
+ * | |
+ * | xfrm_pol_inexact_node
+ * | |
+ * | + root: unused
+ * | |
+ * | + hhead: saddr:any policies
+ * |
+ * +---- coarse policies and all any:any policies
+ *
+ * Lookups return four candidate lists:
+ * 1. any:any list from top-level xfrm_pol_inexact_bin
+ * 2. any:daddr list from daddr tree
+ * 3. saddr:daddr list from 2nd level daddr tree
+ * 4. saddr:any list from saddr tree
+ *
+ * This result set then needs to be searched for the policy with
+ * the lowest priority. If two results have same prio, youngest one wins.
+ */
+
+struct xfrm_pol_inexact_key {
+ possible_net_t net;
+ u32 if_id;
+ u16 family;
+ u8 dir, type;
+};
+
+struct xfrm_pol_inexact_bin {
+ struct xfrm_pol_inexact_key k;
+ struct rhash_head head;
+ /* list containing '*:*' policies */
+ struct hlist_head hhead;
+
+ seqcount_t count;
+ /* tree sorted by daddr/prefix */
+ struct rb_root root_d;
+
+ /* tree sorted by saddr/prefix */
+ struct rb_root root_s;
+
+ /* slow path below */
+ struct list_head inexact_bins;
+ struct rcu_head rcu;
+};
+
+enum xfrm_pol_inexact_candidate_type {
+ XFRM_POL_CAND_BOTH,
+ XFRM_POL_CAND_SADDR,
+ XFRM_POL_CAND_DADDR,
+ XFRM_POL_CAND_ANY,
+
+ XFRM_POL_CAND_MAX,
+};
+
+struct xfrm_pol_inexact_candidates {
+ struct hlist_head *res[XFRM_POL_CAND_MAX];
+};
+
static DEFINE_SPINLOCK(xfrm_if_cb_lock);
static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
@@ -55,6 +149,9 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
static struct kmem_cache *xfrm_dst_cache __ro_after_init;
static __read_mostly seqcount_t xfrm_policy_hash_generation;
+static struct rhashtable xfrm_policy_inexact_table;
+static const struct rhashtable_params xfrm_pol_inexact_params;
+
static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);
static int stale_bundle(struct dst_entry *dst);
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
@@ -64,6 +161,25 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
int dir);
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family, u8 dir,
+ u32 if_id);
+
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_lookup_rcu(struct net *net,
+ u8 type, u16 family, u8 dir, u32 if_id);
+static struct xfrm_policy *
+xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy,
+ bool excl);
+static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
+ struct xfrm_policy *policy);
+
+static bool
+xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
+ struct xfrm_pol_inexact_bin *b,
+ const xfrm_address_t *saddr,
+ const xfrm_address_t *daddr);
+
static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy)
{
return refcount_inc_not_zero(&policy->refcnt);
@@ -269,6 +385,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
+ INIT_HLIST_NODE(&policy->bydst_inexact_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
@@ -365,7 +482,7 @@ static struct hlist_head *policy_hash_bysel(struct net *net,
hash = __sel_hash(sel, family, hmask, dbits, sbits);
if (hash == hmask + 1)
- return &net->xfrm.policy_inexact[dir];
+ return NULL;
return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
@@ -563,6 +680,533 @@ static void xfrm_hash_resize(struct work_struct *work)
mutex_unlock(&hash_resize_mutex);
}
+static void xfrm_hash_reset_inexact_table(struct net *net)
+{
+ struct xfrm_pol_inexact_bin *b;
+
+ lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+ list_for_each_entry(b, &net->xfrm.inexact_bins, inexact_bins)
+ INIT_HLIST_HEAD(&b->hhead);
+}
+
+/* Make sure *pol can be inserted into fastbin.
+ * Useful to check that later insert requests will be sucessful
+ * (provided xfrm_policy_lock is held throughout).
+ */
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_alloc_bin(const struct xfrm_policy *pol, u8 dir)
+{
+ struct xfrm_pol_inexact_bin *bin, *prev;
+ struct xfrm_pol_inexact_key k = {
+ .family = pol->family,
+ .type = pol->type,
+ .dir = dir,
+ .if_id = pol->if_id,
+ };
+ struct net *net = xp_net(pol);
+
+ lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+ write_pnet(&k.net, net);
+ bin = rhashtable_lookup_fast(&xfrm_policy_inexact_table, &k,
+ xfrm_pol_inexact_params);
+ if (bin)
+ return bin;
+
+ bin = kzalloc(sizeof(*bin), GFP_ATOMIC);
+ if (!bin)
+ return NULL;
+
+ bin->k = k;
+ INIT_HLIST_HEAD(&bin->hhead);
+ bin->root_d = RB_ROOT;
+ bin->root_s = RB_ROOT;
+ seqcount_init(&bin->count);
+
+ prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table,
+ &bin->k, &bin->head,
+ xfrm_pol_inexact_params);
+ if (!prev) {
+ list_add(&bin->inexact_bins, &net->xfrm.inexact_bins);
+ return bin;
+ }
+
+ kfree(bin);
+
+ return IS_ERR(prev) ? NULL : prev;
+}
+
+static bool xfrm_pol_inexact_addr_use_any_list(const xfrm_address_t *addr,
+ int family, u8 prefixlen)
+{
+ if (xfrm_addr_any(addr, family))
+ return true;
+
+ if (family == AF_INET6 && prefixlen < INEXACT_PREFIXLEN_IPV6)
+ return true;
+
+ if (family == AF_INET && prefixlen < INEXACT_PREFIXLEN_IPV4)
+ return true;
+
+ return false;
+}
+
+static bool
+xfrm_policy_inexact_insert_use_any_list(const struct xfrm_policy *policy)
+{
+ const xfrm_address_t *addr;
+ bool saddr_any, daddr_any;
+ u8 prefixlen;
+
+ addr = &policy->selector.saddr;
+ prefixlen = policy->selector.prefixlen_s;
+
+ saddr_any = xfrm_pol_inexact_addr_use_any_list(addr,
+ policy->family,
+ prefixlen);
+ addr = &policy->selector.daddr;
+ prefixlen = policy->selector.prefixlen_d;
+ daddr_any = xfrm_pol_inexact_addr_use_any_list(addr,
+ policy->family,
+ prefixlen);
+ return saddr_any && daddr_any;
+}
+
+static void xfrm_pol_inexact_node_init(struct xfrm_pol_inexact_node *node,
+ const xfrm_address_t *addr, u8 prefixlen)
+{
+ node->addr = *addr;
+ node->prefixlen = prefixlen;
+}
+
+static struct xfrm_pol_inexact_node *
+xfrm_pol_inexact_node_alloc(const xfrm_address_t *addr, u8 prefixlen)
+{
+ struct xfrm_pol_inexact_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_ATOMIC);
+ if (node)
+ xfrm_pol_inexact_node_init(node, addr, prefixlen);
+
+ return node;
+}
+
+static int xfrm_policy_addr_delta(const xfrm_address_t *a,
+ const xfrm_address_t *b,
+ u8 prefixlen, u16 family)
+{
+ unsigned int pdw, pbi;
+ int delta = 0;
+
+ switch (family) {
+ case AF_INET:
+ if (sizeof(long) == 4 && prefixlen == 0)
+ return ntohl(a->a4) - ntohl(b->a4);
+ return (ntohl(a->a4) & ((~0UL << (32 - prefixlen)))) -
+ (ntohl(b->a4) & ((~0UL << (32 - prefixlen))));
+ case AF_INET6:
+ pdw = prefixlen >> 5;
+ pbi = prefixlen & 0x1f;
+
+ if (pdw) {
+ delta = memcmp(a->a6, b->a6, pdw << 2);
+ if (delta)
+ return delta;
+ }
+ if (pbi) {
+ u32 mask = ~0u << (32 - pbi);
+
+ delta = (ntohl(a->a6[pdw]) & mask) -
+ (ntohl(b->a6[pdw]) & mask);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return delta;
+}
+
+static void xfrm_policy_inexact_list_reinsert(struct net *net,
+ struct xfrm_pol_inexact_node *n,
+ u16 family)
+{
+ unsigned int matched_s, matched_d;
+ struct hlist_node *newpos = NULL;
+ struct xfrm_policy *policy, *p;
+
+ matched_s = 0;
+ matched_d = 0;
+
+ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+ bool matches_s, matches_d;
+
+ if (!policy->bydst_reinsert)
+ continue;
+
+ WARN_ON_ONCE(policy->family != family);
+
+ policy->bydst_reinsert = false;
+ hlist_for_each_entry(p, &n->hhead, bydst) {
+ if (policy->priority >= p->priority)
+ newpos = &p->bydst;
+ else
+ break;
+ }
+
+ if (newpos)
+ hlist_add_behind(&policy->bydst, newpos);
+ else
+ hlist_add_head(&policy->bydst, &n->hhead);
+
+ /* paranoia checks follow.
+ * Check that the reinserted policy matches at least
+ * saddr or daddr for current node prefix.
+ *
+ * Matching both is fine, matching saddr in one policy
+ * (but not daddr) and then matching only daddr in another
+ * is a bug.
+ */
+ matches_s = xfrm_policy_addr_delta(&policy->selector.saddr,
+ &n->addr,
+ n->prefixlen,
+ family) == 0;
+ matches_d = xfrm_policy_addr_delta(&policy->selector.daddr,
+ &n->addr,
+ n->prefixlen,
+ family) == 0;
+ if (matches_s && matches_d)
+ continue;
+
+ WARN_ON_ONCE(!matches_s && !matches_d);
+ if (matches_s)
+ matched_s++;
+ if (matches_d)
+ matched_d++;
+ WARN_ON_ONCE(matched_s && matched_d);
+ }
+}
+
+static void xfrm_policy_inexact_node_reinsert(struct net *net,
+ struct xfrm_pol_inexact_node *n,
+ struct rb_root *new,
+ u16 family)
+{
+ struct rb_node **p, *parent = NULL;
+ struct xfrm_pol_inexact_node *node;
+
+ /* we should not have another subtree here */
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root));
+
+ p = &new->rb_node;
+ while (*p) {
+ u8 prefixlen;
+ int delta;
+
+ parent = *p;
+ node = rb_entry(*p, struct xfrm_pol_inexact_node, node);
+
+ prefixlen = min(node->prefixlen, n->prefixlen);
+
+ delta = xfrm_policy_addr_delta(&n->addr, &node->addr,
+ prefixlen, family);
+ if (delta < 0) {
+ p = &parent->rb_left;
+ } else if (delta > 0) {
+ p = &parent->rb_right;
+ } else {
+ struct xfrm_policy *tmp;
+
+ hlist_for_each_entry(tmp, &node->hhead, bydst)
+ tmp->bydst_reinsert = true;
+ hlist_for_each_entry(tmp, &n->hhead, bydst)
+ tmp->bydst_reinsert = true;
+
+ INIT_HLIST_HEAD(&node->hhead);
+ xfrm_policy_inexact_list_reinsert(net, node, family);
+
+ if (node->prefixlen == n->prefixlen) {
+ kfree_rcu(n, rcu);
+ return;
+ }
+
+ rb_erase(*p, new);
+ kfree_rcu(n, rcu);
+ n = node;
+ n->prefixlen = prefixlen;
+ *p = new->rb_node;
+ parent = NULL;
+ }
+ }
+
+ rb_link_node_rcu(&n->node, parent, p);
+ rb_insert_color(&n->node, new);
+}
+
+/* merge nodes v and n */
+static void xfrm_policy_inexact_node_merge(struct net *net,
+ struct xfrm_pol_inexact_node *v,
+ struct xfrm_pol_inexact_node *n,
+ u16 family)
+{
+ struct xfrm_pol_inexact_node *node;
+ struct xfrm_policy *tmp;
+ struct rb_node *rnode;
+
+ /* To-be-merged node v has a subtree.
+ *
+ * Dismantle it and insert its nodes to n->root.
+ */
+ while ((rnode = rb_first(&v->root)) != NULL) {
+ node = rb_entry(rnode, struct xfrm_pol_inexact_node, node);
+ rb_erase(&node->node, &v->root);
+ xfrm_policy_inexact_node_reinsert(net, node, &n->root,
+ family);
+ }
+
+ hlist_for_each_entry(tmp, &v->hhead, bydst)
+ tmp->bydst_reinsert = true;
+ hlist_for_each_entry(tmp, &n->hhead, bydst)
+ tmp->bydst_reinsert = true;
+
+ INIT_HLIST_HEAD(&n->hhead);
+ xfrm_policy_inexact_list_reinsert(net, n, family);
+}
+
+static struct xfrm_pol_inexact_node *
+xfrm_policy_inexact_insert_node(struct net *net,
+ struct rb_root *root,
+ xfrm_address_t *addr,
+ u16 family, u8 prefixlen, u8 dir)
+{
+ struct xfrm_pol_inexact_node *cached = NULL;
+ struct rb_node **p, *parent = NULL;
+ struct xfrm_pol_inexact_node *node;
+
+ p = &root->rb_node;
+ while (*p) {
+ int delta;
+
+ parent = *p;
+ node = rb_entry(*p, struct xfrm_pol_inexact_node, node);
+
+ delta = xfrm_policy_addr_delta(addr, &node->addr,
+ node->prefixlen,
+ family);
+ if (delta == 0 && prefixlen >= node->prefixlen) {
+ WARN_ON_ONCE(cached); /* ipsec policies got lost */
+ return node;
+ }
+
+ if (delta < 0)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+
+ if (prefixlen < node->prefixlen) {
+ delta = xfrm_policy_addr_delta(addr, &node->addr,
+ prefixlen,
+ family);
+ if (delta)
+ continue;
+
+ /* This node is a subnet of the new prefix. It needs
+ * to be removed and re-inserted with the smaller
+ * prefix and all nodes that are now also covered
+ * by the reduced prefixlen.
+ */
+ rb_erase(&node->node, root);
+
+ if (!cached) {
+ xfrm_pol_inexact_node_init(node, addr,
+ prefixlen);
+ cached = node;
+ } else {
+ /* This node also falls within the new
+ * prefixlen. Merge the to-be-reinserted
+ * node and this one.
+ */
+ xfrm_policy_inexact_node_merge(net, node,
+ cached, family);
+ kfree_rcu(node, rcu);
+ }
+
+ /* restart */
+ p = &root->rb_node;
+ parent = NULL;
+ }
+ }
+
+ node = cached;
+ if (!node) {
+ node = xfrm_pol_inexact_node_alloc(addr, prefixlen);
+ if (!node)
+ return NULL;
+ }
+
+ rb_link_node_rcu(&node->node, parent, p);
+ rb_insert_color(&node->node, root);
+
+ return node;
+}
+
+static void xfrm_policy_inexact_gc_tree(struct rb_root *r, bool rm)
+{
+ struct xfrm_pol_inexact_node *node;
+ struct rb_node *rn = rb_first(r);
+
+ while (rn) {
+ node = rb_entry(rn, struct xfrm_pol_inexact_node, node);
+
+ xfrm_policy_inexact_gc_tree(&node->root, rm);
+ rn = rb_next(rn);
+
+ if (!hlist_empty(&node->hhead) || !RB_EMPTY_ROOT(&node->root)) {
+ WARN_ON_ONCE(rm);
+ continue;
+ }
+
+ rb_erase(&node->node, r);
+ kfree_rcu(node, rcu);
+ }
+}
+
+static void __xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b, bool net_exit)
+{
+ write_seqcount_begin(&b->count);
+ xfrm_policy_inexact_gc_tree(&b->root_d, net_exit);
+ xfrm_policy_inexact_gc_tree(&b->root_s, net_exit);
+ write_seqcount_end(&b->count);
+
+ if (!RB_EMPTY_ROOT(&b->root_d) || !RB_EMPTY_ROOT(&b->root_s) ||
+ !hlist_empty(&b->hhead)) {
+ WARN_ON_ONCE(net_exit);
+ return;
+ }
+
+ if (rhashtable_remove_fast(&xfrm_policy_inexact_table, &b->head,
+ xfrm_pol_inexact_params) == 0) {
+ list_del(&b->inexact_bins);
+ kfree_rcu(b, rcu);
+ }
+}
+
+static void xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b)
+{
+ struct net *net = read_pnet(&b->k.net);
+
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ __xfrm_policy_inexact_prune_bin(b, false);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+}
+
+static void __xfrm_policy_inexact_flush(struct net *net)
+{
+ struct xfrm_pol_inexact_bin *bin, *t;
+
+ lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+ list_for_each_entry_safe(bin, t, &net->xfrm.inexact_bins, inexact_bins)
+ __xfrm_policy_inexact_prune_bin(bin, false);
+}
+
+static struct hlist_head *
+xfrm_policy_inexact_alloc_chain(struct xfrm_pol_inexact_bin *bin,
+ struct xfrm_policy *policy, u8 dir)
+{
+ struct xfrm_pol_inexact_node *n;
+ struct net *net;
+
+ net = xp_net(policy);
+ lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+ if (xfrm_policy_inexact_insert_use_any_list(policy))
+ return &bin->hhead;
+
+ if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.daddr,
+ policy->family,
+ policy->selector.prefixlen_d)) {
+ write_seqcount_begin(&bin->count);
+ n = xfrm_policy_inexact_insert_node(net,
+ &bin->root_s,
+ &policy->selector.saddr,
+ policy->family,
+ policy->selector.prefixlen_s,
+ dir);
+ write_seqcount_end(&bin->count);
+ if (!n)
+ return NULL;
+
+ return &n->hhead;
+ }
+
+ /* daddr is fixed */
+ write_seqcount_begin(&bin->count);
+ n = xfrm_policy_inexact_insert_node(net,
+ &bin->root_d,
+ &policy->selector.daddr,
+ policy->family,
+ policy->selector.prefixlen_d, dir);
+ write_seqcount_end(&bin->count);
+ if (!n)
+ return NULL;
+
+ /* saddr is wildcard */
+ if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.saddr,
+ policy->family,
+ policy->selector.prefixlen_s))
+ return &n->hhead;
+
+ write_seqcount_begin(&bin->count);
+ n = xfrm_policy_inexact_insert_node(net,
+ &n->root,
+ &policy->selector.saddr,
+ policy->family,
+ policy->selector.prefixlen_s, dir);
+ write_seqcount_end(&bin->count);
+ if (!n)
+ return NULL;
+
+ return &n->hhead;
+}
+
+static struct xfrm_policy *
+xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl)
+{
+ struct xfrm_pol_inexact_bin *bin;
+ struct xfrm_policy *delpol;
+ struct hlist_head *chain;
+ struct net *net;
+
+ bin = xfrm_policy_inexact_alloc_bin(policy, dir);
+ if (!bin)
+ return ERR_PTR(-ENOMEM);
+
+ net = xp_net(policy);
+ lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+ chain = xfrm_policy_inexact_alloc_chain(bin, policy, dir);
+ if (!chain) {
+ __xfrm_policy_inexact_prune_bin(bin, false);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ delpol = xfrm_policy_insert_list(chain, policy, excl);
+ if (delpol && excl) {
+ __xfrm_policy_inexact_prune_bin(bin, false);
+ return ERR_PTR(-EEXIST);
+ }
+
+ chain = &net->xfrm.policy_inexact[dir];
+ xfrm_policy_insert_inexact_list(chain, policy);
+
+ if (delpol)
+ __xfrm_policy_inexact_prune_bin(bin, false);
+
+ return delpol;
+}
+
static void xfrm_hash_rebuild(struct work_struct *work)
{
struct net *net = container_of(work, struct net,
@@ -592,7 +1236,50 @@ static void xfrm_hash_rebuild(struct work_struct *work)
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ /* make sure that we can insert the indirect policies again before
+ * we start with destructive action.
+ */
+ list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
+ struct xfrm_pol_inexact_bin *bin;
+ u8 dbits, sbits;
+
+ dir = xfrm_policy_id2dir(policy->index);
+ if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
+ continue;
+
+ if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
+ if (policy->family == AF_INET) {
+ dbits = rbits4;
+ sbits = lbits4;
+ } else {
+ dbits = rbits6;
+ sbits = lbits6;
+ }
+ } else {
+ if (policy->family == AF_INET) {
+ dbits = lbits4;
+ sbits = rbits4;
+ } else {
+ dbits = lbits6;
+ sbits = rbits6;
+ }
+ }
+
+ if (policy->selector.prefixlen_d < dbits ||
+ policy->selector.prefixlen_s < sbits)
+ continue;
+
+ bin = xfrm_policy_inexact_alloc_bin(policy, dir);
+ if (!bin)
+ goto out_unlock;
+
+ if (!xfrm_policy_inexact_alloc_chain(bin, policy, dir))
+ goto out_unlock;
+ }
+
/* reset the bydst and inexact table in all directions */
+ xfrm_hash_reset_inexact_table(net);
+
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
hmask = net->xfrm.policy_bydst[dir].hmask;
@@ -616,15 +1303,23 @@ static void xfrm_hash_rebuild(struct work_struct *work)
/* re-insert all policies by order of creation */
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
- if (policy->walk.dead ||
- xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
+ if (policy->walk.dead)
+ continue;
+ dir = xfrm_policy_id2dir(policy->index);
+ if (dir >= XFRM_POLICY_MAX) {
/* skip socket policies */
continue;
}
newpos = NULL;
chain = policy_hash_bysel(net, &policy->selector,
- policy->family,
- xfrm_policy_id2dir(policy->index));
+ policy->family, dir);
+ if (!chain) {
+ void *p = xfrm_policy_inexact_insert(policy, dir, 0);
+
+ WARN_ONCE(IS_ERR(p), "reinsert: %ld\n", PTR_ERR(p));
+ continue;
+ }
+
hlist_for_each_entry(pol, chain, bydst) {
if (policy->priority >= pol->priority)
newpos = &pol->bydst;
@@ -637,6 +1332,8 @@ static void xfrm_hash_rebuild(struct work_struct *work)
hlist_add_head_rcu(&policy->bydst, chain);
}
+out_unlock:
+ __xfrm_policy_inexact_flush(net);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
mutex_unlock(&hash_resize_mutex);
@@ -740,18 +1437,97 @@ static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
return false;
}
-int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed)
{
- struct net *net = xp_net(policy);
- struct xfrm_policy *pol;
- struct xfrm_policy *delpol;
- struct hlist_head *chain;
- struct hlist_node *newpos;
+ const struct xfrm_pol_inexact_key *k = data;
+ u32 a = k->type << 24 | k->dir << 16 | k->family;
+
+ return jhash_3words(a, k->if_id, net_hash_mix(read_pnet(&k->net)),
+ seed);
+}
+
+static u32 xfrm_pol_bin_obj(const void *data, u32 len, u32 seed)
+{
+ const struct xfrm_pol_inexact_bin *b = data;
+
+ return xfrm_pol_bin_key(&b->k, 0, seed);
+}
+
+static int xfrm_pol_bin_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct xfrm_pol_inexact_key *key = arg->key;
+ const struct xfrm_pol_inexact_bin *b = ptr;
+ int ret;
+
+ if (!net_eq(read_pnet(&b->k.net), read_pnet(&key->net)))
+ return -1;
+
+ ret = b->k.dir ^ key->dir;
+ if (ret)
+ return ret;
+
+ ret = b->k.type ^ key->type;
+ if (ret)
+ return ret;
+
+ ret = b->k.family ^ key->family;
+ if (ret)
+ return ret;
+
+ return b->k.if_id ^ key->if_id;
+}
+
+static const struct rhashtable_params xfrm_pol_inexact_params = {
+ .head_offset = offsetof(struct xfrm_pol_inexact_bin, head),
+ .hashfn = xfrm_pol_bin_key,
+ .obj_hashfn = xfrm_pol_bin_obj,
+ .obj_cmpfn = xfrm_pol_bin_cmp,
+ .automatic_shrinking = true,
+};
+
+static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
+ struct xfrm_policy *policy)
+{
+ struct xfrm_policy *pol, *delpol = NULL;
+ struct hlist_node *newpos = NULL;
+ int i = 0;
+
+ hlist_for_each_entry(pol, chain, bydst_inexact_list) {
+ if (pol->type == policy->type &&
+ pol->if_id == policy->if_id &&
+ !selector_cmp(&pol->selector, &policy->selector) &&
+ xfrm_policy_mark_match(policy, pol) &&
+ xfrm_sec_ctx_match(pol->security, policy->security) &&
+ !WARN_ON(delpol)) {
+ delpol = pol;
+ if (policy->priority > pol->priority)
+ continue;
+ } else if (policy->priority >= pol->priority) {
+ newpos = &pol->bydst_inexact_list;
+ continue;
+ }
+ if (delpol)
+ break;
+ }
+
+ if (newpos)
+ hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
+ else
+ hlist_add_head_rcu(&policy->bydst_inexact_list, chain);
+
+ hlist_for_each_entry(pol, chain, bydst_inexact_list) {
+ pol->pos = i;
+ i++;
+ }
+}
+
+static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
+ struct xfrm_policy *policy,
+ bool excl)
+{
+ struct xfrm_policy *pol, *newpos = NULL, *delpol = NULL;
- spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
- delpol = NULL;
- newpos = NULL;
hlist_for_each_entry(pol, chain, bydst) {
if (pol->type == policy->type &&
pol->if_id == policy->if_id &&
@@ -759,24 +1535,45 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
xfrm_policy_mark_match(policy, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
- if (excl) {
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
- return -EEXIST;
- }
+ if (excl)
+ return ERR_PTR(-EEXIST);
delpol = pol;
if (policy->priority > pol->priority)
continue;
} else if (policy->priority >= pol->priority) {
- newpos = &pol->bydst;
+ newpos = pol;
continue;
}
if (delpol)
break;
}
+
if (newpos)
- hlist_add_behind_rcu(&policy->bydst, newpos);
+ hlist_add_behind_rcu(&policy->bydst, &newpos->bydst);
else
hlist_add_head_rcu(&policy->bydst, chain);
+
+ return delpol;
+}
+
+int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+{
+ struct net *net = xp_net(policy);
+ struct xfrm_policy *delpol;
+ struct hlist_head *chain;
+
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
+ if (chain)
+ delpol = xfrm_policy_insert_list(chain, policy, excl);
+ else
+ delpol = xfrm_policy_inexact_insert(policy, dir, excl);
+
+ if (IS_ERR(delpol)) {
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ return PTR_ERR(delpol);
+ }
+
__xfrm_policy_link(policy, dir);
/* After previous checking, family can either be AF_INET or AF_INET6 */
@@ -806,43 +1603,96 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
}
EXPORT_SYMBOL(xfrm_policy_insert);
+static struct xfrm_policy *
+__xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
+ u8 type, int dir,
+ struct xfrm_selector *sel,
+ struct xfrm_sec_ctx *ctx)
+{
+ struct xfrm_policy *pol;
+
+ if (!chain)
+ return NULL;
+
+ hlist_for_each_entry(pol, chain, bydst) {
+ if (pol->type == type &&
+ pol->if_id == if_id &&
+ (mark & pol->mark.m) == pol->mark.v &&
+ !selector_cmp(sel, &pol->selector) &&
+ xfrm_sec_ctx_match(ctx, pol->security))
+ return pol;
+ }
+
+ return NULL;
+}
+
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
u8 type, int dir,
struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx, int delete,
int *err)
{
- struct xfrm_policy *pol, *ret;
+ struct xfrm_pol_inexact_bin *bin = NULL;
+ struct xfrm_policy *pol, *ret = NULL;
struct hlist_head *chain;
*err = 0;
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, sel, sel->family, dir);
- ret = NULL;
- hlist_for_each_entry(pol, chain, bydst) {
- if (pol->type == type &&
- pol->if_id == if_id &&
- (mark & pol->mark.m) == pol->mark.v &&
- !selector_cmp(sel, &pol->selector) &&
- xfrm_sec_ctx_match(ctx, pol->security)) {
- xfrm_pol_hold(pol);
- if (delete) {
- *err = security_xfrm_policy_delete(
- pol->security);
- if (*err) {
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
- return pol;
- }
- __xfrm_policy_unlink(pol, dir);
+ if (!chain) {
+ struct xfrm_pol_inexact_candidates cand;
+ int i;
+
+ bin = xfrm_policy_inexact_lookup(net, type,
+ sel->family, dir, if_id);
+ if (!bin) {
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ return NULL;
+ }
+
+ if (!xfrm_policy_find_inexact_candidates(&cand, bin,
+ &sel->saddr,
+ &sel->daddr)) {
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ return NULL;
+ }
+
+ pol = NULL;
+ for (i = 0; i < ARRAY_SIZE(cand.res); i++) {
+ struct xfrm_policy *tmp;
+
+ tmp = __xfrm_policy_bysel_ctx(cand.res[i], mark,
+ if_id, type, dir,
+ sel, ctx);
+ if (!tmp)
+ continue;
+
+ if (!pol || tmp->pos < pol->pos)
+ pol = tmp;
+ }
+ } else {
+ pol = __xfrm_policy_bysel_ctx(chain, mark, if_id, type, dir,
+ sel, ctx);
+ }
+
+ if (pol) {
+ xfrm_pol_hold(pol);
+ if (delete) {
+ *err = security_xfrm_policy_delete(pol->security);
+ if (*err) {
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ return pol;
}
- ret = pol;
- break;
+ __xfrm_policy_unlink(pol, dir);
}
+ ret = pol;
}
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (ret && delete)
xfrm_policy_kill(ret);
+ if (bin && delete)
+ xfrm_policy_inexact_prune_bin(bin);
return ret;
}
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
@@ -892,36 +1742,19 @@ EXPORT_SYMBOL(xfrm_policy_byid);
static inline int
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
{
- int dir, err = 0;
+ struct xfrm_policy *pol;
+ int err = 0;
- for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
- struct xfrm_policy *pol;
- int i;
+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead ||
+ xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX ||
+ pol->type != type)
+ continue;
- hlist_for_each_entry(pol,
- &net->xfrm.policy_inexact[dir], bydst) {
- if (pol->type != type)
- continue;
- err = security_xfrm_policy_delete(pol->security);
- if (err) {
- xfrm_audit_policy_delete(pol, 0, task_valid);
- return err;
- }
- }
- for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
- hlist_for_each_entry(pol,
- net->xfrm.policy_bydst[dir].table + i,
- bydst) {
- if (pol->type != type)
- continue;
- err = security_xfrm_policy_delete(
- pol->security);
- if (err) {
- xfrm_audit_policy_delete(pol, 0,
- task_valid);
- return err;
- }
- }
+ err = security_xfrm_policy_delete(pol->security);
+ if (err) {
+ xfrm_audit_policy_delete(pol, 0, task_valid);
+ return err;
}
}
return err;
@@ -937,6 +1770,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
{
int dir, err = 0, cnt = 0;
+ struct xfrm_policy *pol;
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -944,48 +1778,25 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
if (err)
goto out;
- for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
- struct xfrm_policy *pol;
- int i;
-
- again1:
- hlist_for_each_entry(pol,
- &net->xfrm.policy_inexact[dir], bydst) {
- if (pol->type != type)
- continue;
- __xfrm_policy_unlink(pol, dir);
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
- cnt++;
-
- xfrm_audit_policy_delete(pol, 1, task_valid);
-
- xfrm_policy_kill(pol);
-
- spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- goto again1;
- }
-
- for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
- again2:
- hlist_for_each_entry(pol,
- net->xfrm.policy_bydst[dir].table + i,
- bydst) {
- if (pol->type != type)
- continue;
- __xfrm_policy_unlink(pol, dir);
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
- cnt++;
-
- xfrm_audit_policy_delete(pol, 1, task_valid);
- xfrm_policy_kill(pol);
-
- spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- goto again2;
- }
- }
+again:
+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ dir = xfrm_policy_id2dir(pol->index);
+ if (pol->walk.dead ||
+ dir >= XFRM_POLICY_MAX ||
+ pol->type != type)
+ continue;
+ __xfrm_policy_unlink(pol, dir);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ cnt++;
+ xfrm_audit_policy_delete(pol, 1, task_valid);
+ xfrm_policy_kill(pol);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ goto again;
}
- if (!cnt)
+ if (cnt)
+ __xfrm_policy_inexact_flush(net);
+ else
err = -ESRCH;
out:
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
@@ -1084,21 +1895,189 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
if (match)
ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
dir);
-
return ret;
}
+static struct xfrm_pol_inexact_node *
+xfrm_policy_lookup_inexact_addr(const struct rb_root *r,
+ seqcount_t *count,
+ const xfrm_address_t *addr, u16 family)
+{
+ const struct rb_node *parent;
+ int seq;
+
+again:
+ seq = read_seqcount_begin(count);
+
+ parent = rcu_dereference_raw(r->rb_node);
+ while (parent) {
+ struct xfrm_pol_inexact_node *node;
+ int delta;
+
+ node = rb_entry(parent, struct xfrm_pol_inexact_node, node);
+
+ delta = xfrm_policy_addr_delta(addr, &node->addr,
+ node->prefixlen, family);
+ if (delta < 0) {
+ parent = rcu_dereference_raw(parent->rb_left);
+ continue;
+ } else if (delta > 0) {
+ parent = rcu_dereference_raw(parent->rb_right);
+ continue;
+ }
+
+ return node;
+ }
+
+ if (read_seqcount_retry(count, seq))
+ goto again;
+
+ return NULL;
+}
+
+static bool
+xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
+ struct xfrm_pol_inexact_bin *b,
+ const xfrm_address_t *saddr,
+ const xfrm_address_t *daddr)
+{
+ struct xfrm_pol_inexact_node *n;
+ u16 family;
+
+ if (!b)
+ return false;
+
+ family = b->k.family;
+ memset(cand, 0, sizeof(*cand));
+ cand->res[XFRM_POL_CAND_ANY] = &b->hhead;
+
+ n = xfrm_policy_lookup_inexact_addr(&b->root_d, &b->count, daddr,
+ family);
+ if (n) {
+ cand->res[XFRM_POL_CAND_DADDR] = &n->hhead;
+ n = xfrm_policy_lookup_inexact_addr(&n->root, &b->count, saddr,
+ family);
+ if (n)
+ cand->res[XFRM_POL_CAND_BOTH] = &n->hhead;
+ }
+
+ n = xfrm_policy_lookup_inexact_addr(&b->root_s, &b->count, saddr,
+ family);
+ if (n)
+ cand->res[XFRM_POL_CAND_SADDR] = &n->hhead;
+
+ return true;
+}
+
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_lookup_rcu(struct net *net, u8 type, u16 family,
+ u8 dir, u32 if_id)
+{
+ struct xfrm_pol_inexact_key k = {
+ .family = family,
+ .type = type,
+ .dir = dir,
+ .if_id = if_id,
+ };
+
+ write_pnet(&k.net, net);
+
+ return rhashtable_lookup(&xfrm_policy_inexact_table, &k,
+ xfrm_pol_inexact_params);
+}
+
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family,
+ u8 dir, u32 if_id)
+{
+ struct xfrm_pol_inexact_bin *bin;
+
+ lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+ rcu_read_lock();
+ bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
+ rcu_read_unlock();
+
+ return bin;
+}
+
+static struct xfrm_policy *
+__xfrm_policy_eval_candidates(struct hlist_head *chain,
+ struct xfrm_policy *prefer,
+ const struct flowi *fl,
+ u8 type, u16 family, int dir, u32 if_id)
+{
+ u32 priority = prefer ? prefer->priority : ~0u;
+ struct xfrm_policy *pol;
+
+ if (!chain)
+ return NULL;
+
+ hlist_for_each_entry_rcu(pol, chain, bydst) {
+ int err;
+
+ if (pol->priority > priority)
+ break;
+
+ err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
+ if (err) {
+ if (err != -ESRCH)
+ return ERR_PTR(err);
+
+ continue;
+ }
+
+ if (prefer) {
+ /* matches. Is it older than *prefer? */
+ if (pol->priority == priority &&
+ prefer->pos < pol->pos)
+ return prefer;
+ }
+
+ return pol;
+ }
+
+ return NULL;
+}
+
+static struct xfrm_policy *
+xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand,
+ struct xfrm_policy *prefer,
+ const struct flowi *fl,
+ u8 type, u16 family, int dir, u32 if_id)
+{
+ struct xfrm_policy *tmp;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cand->res); i++) {
+ tmp = __xfrm_policy_eval_candidates(cand->res[i],
+ prefer,
+ fl, type, family, dir,
+ if_id);
+ if (!tmp)
+ continue;
+
+ if (IS_ERR(tmp))
+ return tmp;
+ prefer = tmp;
+ }
+
+ return prefer;
+}
+
static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
const struct flowi *fl,
u16 family, u8 dir,
u32 if_id)
{
- int err;
- struct xfrm_policy *pol, *ret;
+ struct xfrm_pol_inexact_candidates cand;
const xfrm_address_t *daddr, *saddr;
+ struct xfrm_pol_inexact_bin *bin;
+ struct xfrm_policy *pol, *ret;
struct hlist_head *chain;
unsigned int sequence;
u32 priority;
+ int err;
daddr = xfrm_flowi_daddr(fl, family);
saddr = xfrm_flowi_saddr(fl, family);
@@ -1129,25 +2108,20 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
break;
}
}
- chain = &net->xfrm.policy_inexact[dir];
- hlist_for_each_entry_rcu(pol, chain, bydst) {
- if ((pol->priority >= priority) && ret)
- break;
+ bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
+ if (!bin || !xfrm_policy_find_inexact_candidates(&cand, bin, saddr,
+ daddr))
+ goto skip_inexact;
- err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
- if (err) {
- if (err == -ESRCH)
- continue;
- else {
- ret = ERR_PTR(err);
- goto fail;
- }
- } else {
- ret = pol;
- break;
- }
+ pol = xfrm_policy_eval_candidates(&cand, ret, fl, type,
+ family, dir, if_id);
+ if (pol) {
+ ret = pol;
+ if (IS_ERR(pol))
+ goto fail;
}
+skip_inexact:
if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
goto retry;
@@ -1239,6 +2213,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
/* Socket policies are not hashed. */
if (!hlist_unhashed(&pol->bydst)) {
hlist_del_rcu(&pol->bydst);
+ hlist_del_init(&pol->bydst_inexact_list);
hlist_del(&pol->byidx);
}
@@ -1811,7 +2786,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
pq->timeout = pq->timeout << 1;
if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
xfrm_pol_hold(pol);
- goto out;
+ goto out;
}
dst_release(dst);
@@ -2816,13 +3791,17 @@ static void xfrm_statistics_fini(struct net *net)
static int __net_init xfrm_policy_init(struct net *net)
{
unsigned int hmask, sz;
- int dir;
+ int dir, err;
- if (net_eq(net, &init_net))
+ if (net_eq(net, &init_net)) {
xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
sizeof(struct xfrm_dst),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
+ err = rhashtable_init(&xfrm_policy_inexact_table,
+ &xfrm_pol_inexact_params);
+ BUG_ON(err);
+ }
hmask = 8 - 1;
sz = (hmask+1) * sizeof(struct hlist_head);
@@ -2857,6 +3836,7 @@ static int __net_init xfrm_policy_init(struct net *net)
seqlock_init(&net->xfrm.policy_hthresh.lock);
INIT_LIST_HEAD(&net->xfrm.policy_all);
+ INIT_LIST_HEAD(&net->xfrm.inexact_bins);
INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
return 0;
@@ -2875,6 +3855,7 @@ out_byidx:
static void xfrm_policy_fini(struct net *net)
{
+ struct xfrm_pol_inexact_bin *b, *t;
unsigned int sz;
int dir;
@@ -2900,6 +3881,11 @@ static void xfrm_policy_fini(struct net *net)
sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
xfrm_hash_free(net->xfrm.policy_byidx, sz);
+
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ list_for_each_entry_safe(b, t, &net->xfrm.inexact_bins, inexact_bins)
+ __xfrm_policy_inexact_prune_bin(b, true);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
}
static int __net_init xfrm_net_init(struct net *net)
@@ -3065,7 +4051,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
}
}
chain = &net->xfrm.policy_inexact[dir];
- hlist_for_each_entry(pol, chain, bydst) {
+ hlist_for_each_entry(pol, chain, bydst_inexact_list) {
if ((pol->priority >= priority) && ret)
break;
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
index 00ae99fbc253..b41d6256b2d0 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -8,7 +8,8 @@
lib_dir=$(dirname $0)/../../../../net/forwarding
ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
- multiple_masks_test ctcam_edge_cases_test delta_simple_test"
+ multiple_masks_test ctcam_edge_cases_test delta_simple_test \
+ bloom_simple_test bloom_complex_test bloom_delta_test"
NUM_NETIFS=2
source $lib_dir/tc_common.sh
source $lib_dir/lib.sh
@@ -404,6 +405,178 @@ delta_simple_test()
log_test "delta simple test ($tcflags)"
}
+bloom_simple_test()
+{
+ # Bloom filter requires that the eRP table is used. This test
+ # verifies that Bloom filter is not harming correctness of ACLs.
+ # First, make sure that eRP table is used and then set rule patterns
+ # which are distant enough and will result skipping a lookup after
+ # consulting the Bloom filter. Although some eRP lookups are skipped,
+ # the correct filter should be hit.
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 5 handle 104 flower \
+ $tcflags dst_ip 198.51.100.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.0.0/8 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Two filters - did not match highest priority"
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Single filter - did not match"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Low prio filter - did not match"
+
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 198.0.0.0/8 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Two filters - did not match highest priority after add"
+
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol ip pref 5 handle 104 flower
+
+ log_test "bloom simple test ($tcflags)"
+}
+
+bloom_complex_test()
+{
+ # Bloom filter index computation is affected from region ID, eRP
+ # ID and from the region key size. In order to excercise those parts
+ # of the Bloom filter code, use a series of regions, each with a
+ # different key size and send packet that should hit all of them.
+ local index
+
+ RET=0
+ NUM_CHAINS=4
+ BASE_INDEX=100
+
+ # Create chain with up to 2 key blocks (ip_proto only)
+ tc chain add dev $h2 ingress chain 1 protocol ip flower \
+ ip_proto tcp &> /dev/null
+ # Create chain with 2-4 key blocks (ip_proto, src MAC)
+ tc chain add dev $h2 ingress chain 2 protocol ip flower \
+ ip_proto tcp \
+ src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+ # Create chain with 4-8 key blocks (ip_proto, src & dst MAC, IPv4 dest)
+ tc chain add dev $h2 ingress chain 3 protocol ip flower \
+ ip_proto tcp \
+ dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \
+ src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \
+ dst_ip 0.0.0.0/32 &> /dev/null
+ # Default chain contains all fields and therefore is 8-12 key blocks
+ tc chain add dev $h2 ingress chain 4
+
+ # We need at least 2 rules in every region to have eRP table active
+ # so create a dummy rule per chain using a different pattern
+ for i in $(eval echo {0..$NUM_CHAINS}); do
+ index=$((BASE_INDEX - 1 - i))
+ tc filter add dev $h2 ingress chain $i protocol ip \
+ pref 2 handle $index flower \
+ $tcflags ip_proto tcp action drop
+ done
+
+ # Add rules to test Bloom filter, each in a different chain
+ index=$BASE_INDEX
+ tc filter add dev $h2 ingress protocol ip \
+ pref 1 handle $((++index)) flower \
+ $tcflags dst_ip 192.0.0.0/16 action goto chain 1
+ tc filter add dev $h2 ingress chain 1 protocol ip \
+ pref 1 handle $((++index)) flower \
+ $tcflags action goto chain 2
+ tc filter add dev $h2 ingress chain 2 protocol ip \
+ pref 1 handle $((++index)) flower \
+ $tcflags src_mac $h1mac action goto chain 3
+ tc filter add dev $h2 ingress chain 3 protocol ip \
+ pref 1 handle $((++index)) flower \
+ $tcflags dst_ip 192.0.0.0/8 action goto chain 4
+ tc filter add dev $h2 ingress chain 4 protocol ip \
+ pref 1 handle $((++index)) flower \
+ $tcflags src_ip 192.0.2.0/24 action drop
+
+ # Send a packet that is supposed to hit all chains
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ for i in $(eval echo {0..$NUM_CHAINS}); do
+ index=$((BASE_INDEX + i + 1))
+ tc_check_packets "dev $h2 ingress" $index 1
+ check_err $? "Did not match chain $i"
+ done
+
+ # Rules cleanup
+ for i in $(eval echo {$NUM_CHAINS..0}); do
+ index=$((BASE_INDEX - i - 1))
+ tc filter del dev $h2 ingress chain $i \
+ pref 2 handle $index flower
+ index=$((BASE_INDEX + i + 1))
+ tc filter del dev $h2 ingress chain $i \
+ pref 1 handle $index flower
+ done
+
+ # Chains cleanup
+ for i in $(eval echo {$NUM_CHAINS..1}); do
+ tc chain del dev $h2 ingress chain $i
+ done
+
+ log_test "bloom complex test ($tcflags)"
+}
+
+
+bloom_delta_test()
+{
+ # When multiple masks are used, the eRP table is activated. When
+ # masks are close enough (delta) the masks reside on the same
+ # eRP table. This test verifies that the eRP table is correctly
+ # allocated and used in delta condition and that Bloom filter is
+ # still functional with delta.
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.1.0.0/16 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.1.2.1 -B 192.1.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Single filter - did not match"
+
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.2.1.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.2.1.1 -B 192.2.1.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Delta filters - did not match second filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "bloom delta test ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 7f57b916e6b2..6f81130605d7 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -3,6 +3,7 @@ socket
psock_fanout
psock_snd
psock_tpacket
+reuseport_addr_any
reuseport_bpf
reuseport_bpf_cpu
reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index ee2e27b1cd0d..9543a4c2f9be 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -4,13 +4,14 @@
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/
-TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
+TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
+ rtnetlink.sh xfrm_policy.sh
TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
-TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh
+TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket
-TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
new file mode 100644
index 000000000000..4ac3e24b7cfc
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Test that sockets listening on a specific address are preferred
+ * over sockets listening on addr_any.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/dccp.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+static const char *IP4_ADDR = "127.0.0.1";
+static const char *IP6_ADDR = "::1";
+static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
+
+static const int PORT = 8888;
+
+static void build_rcv_fd(int family, int proto, int *rcv_fds, int count,
+ const char *addr_str)
+{
+ struct sockaddr_in addr4 = {0};
+ struct sockaddr_in6 addr6 = {0};
+ struct sockaddr *addr;
+ int opt, i, sz;
+
+ memset(&addr, 0, sizeof(addr));
+
+ switch (family) {
+ case AF_INET:
+ addr4.sin_family = family;
+ if (!addr_str)
+ addr4.sin_addr.s_addr = htonl(INADDR_ANY);
+ else if (!inet_pton(family, addr_str, &addr4.sin_addr.s_addr))
+ error(1, errno, "inet_pton failed: %s", addr_str);
+ addr4.sin_port = htons(PORT);
+ sz = sizeof(addr4);
+ addr = (struct sockaddr *)&addr4;
+ break;
+ case AF_INET6:
+ addr6.sin6_family = AF_INET6;
+ if (!addr_str)
+ addr6.sin6_addr = in6addr_any;
+ else if (!inet_pton(family, addr_str, &addr6.sin6_addr))
+ error(1, errno, "inet_pton failed: %s", addr_str);
+ addr6.sin6_port = htons(PORT);
+ sz = sizeof(addr6);
+ addr = (struct sockaddr *)&addr6;
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ for (i = 0; i < count; ++i) {
+ rcv_fds[i] = socket(family, proto, 0);
+ if (rcv_fds[i] < 0)
+ error(1, errno, "failed to create receive socket");
+
+ opt = 1;
+ if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+
+ if (bind(rcv_fds[i], addr, sz))
+ error(1, errno, "failed to bind receive socket");
+
+ if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
+ error(1, errno, "tcp: failed to listen on receive port");
+ else if (proto == SOCK_DCCP) {
+ if (setsockopt(rcv_fds[i], SOL_DCCP,
+ DCCP_SOCKOPT_SERVICE,
+ &(int) {htonl(42)}, sizeof(int)))
+ error(1, errno, "failed to setsockopt");
+
+ if (listen(rcv_fds[i], 10))
+ error(1, errno, "dccp: failed to listen on receive port");
+ }
+ }
+}
+
+static int connect_and_send(int family, int proto)
+{
+ struct sockaddr_in saddr4 = {0};
+ struct sockaddr_in daddr4 = {0};
+ struct sockaddr_in6 saddr6 = {0};
+ struct sockaddr_in6 daddr6 = {0};
+ struct sockaddr *saddr, *daddr;
+ int fd, sz;
+
+ switch (family) {
+ case AF_INET:
+ saddr4.sin_family = AF_INET;
+ saddr4.sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr4.sin_port = 0;
+
+ daddr4.sin_family = AF_INET;
+ if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr))
+ error(1, errno, "inet_pton failed: %s", IP4_ADDR);
+ daddr4.sin_port = htons(PORT);
+
+ sz = sizeof(saddr4);
+ saddr = (struct sockaddr *)&saddr4;
+ daddr = (struct sockaddr *)&daddr4;
+ break;
+ case AF_INET6:
+ saddr6.sin6_family = AF_INET6;
+ saddr6.sin6_addr = in6addr_any;
+
+ daddr6.sin6_family = AF_INET6;
+ if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr))
+ error(1, errno, "inet_pton failed: %s", IP6_ADDR);
+ daddr6.sin6_port = htons(PORT);
+
+ sz = sizeof(saddr6);
+ saddr = (struct sockaddr *)&saddr6;
+ daddr = (struct sockaddr *)&daddr6;
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ fd = socket(family, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (proto == SOCK_DCCP &&
+ setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE,
+ &(int){htonl(42)}, sizeof(int)))
+ error(1, errno, "failed to setsockopt");
+
+ if (bind(fd, saddr, sz))
+ error(1, errno, "failed to bind send socket");
+
+ if (connect(fd, daddr, sz))
+ error(1, errno, "failed to connect send socket");
+
+ if (send(fd, "a", 1, 0) < 0)
+ error(1, errno, "failed to send message");
+
+ return fd;
+}
+
+static int receive_once(int epfd, int proto)
+{
+ struct epoll_event ev;
+ int i, fd;
+ char buf[8];
+
+ i = epoll_wait(epfd, &ev, 1, 3);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+
+ if (proto == SOCK_STREAM || proto == SOCK_DCCP) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ i = recv(fd, buf, sizeof(buf), 0);
+ close(fd);
+ } else {
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ }
+
+ if (i < 0)
+ error(1, errno, "failed to recv");
+
+ return ev.data.fd;
+}
+
+static void test(int *rcv_fds, int count, int family, int proto, int fd)
+{
+ struct epoll_event ev;
+ int epfd, i, send_fd, recv_fd;
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+
+ ev.events = EPOLLIN;
+ for (i = 0; i < count; ++i) {
+ ev.data.fd = rcv_fds[i];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+
+ send_fd = connect_and_send(family, proto);
+
+ recv_fd = receive_once(epfd, proto);
+ if (recv_fd != fd)
+ error(1, 0, "received on an unexpected socket");
+
+ close(send_fd);
+ close(epfd);
+}
+
+int main(void)
+{
+ /* Below we test that a socket listening on a specific address
+ * is always selected in preference over a socket listening
+ * on addr_any. Bugs where this is not the case often result
+ * in sockets created first or last to get picked. So below
+ * we make sure that there are always addr_any sockets created
+ * before and after a specific socket is created.
+ */
+ int rcv_fds[10], i;
+
+ fprintf(stderr, "---- UDP IPv4 ----\n");
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL);
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 4, 1, IP4_ADDR);
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL);
+ test(rcv_fds, 9, AF_INET, SOCK_DGRAM, rcv_fds[4]);
+ for (i = 0; i < 9; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- UDP IPv6 ----\n");
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 4, 1, IP6_ADDR);
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL);
+ test(rcv_fds, 9, AF_INET6, SOCK_DGRAM, rcv_fds[4]);
+ for (i = 0; i < 9; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- UDP IPv4 mapped to IPv6 ----\n");
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 4, 1, IP4_MAPPED6);
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL);
+ test(rcv_fds, 9, AF_INET, SOCK_DGRAM, rcv_fds[4]);
+ for (i = 0; i < 9; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- TCP IPv4 ----\n");
+ build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL);
+ build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 4, 1, IP4_ADDR);
+ build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL);
+ test(rcv_fds, 9, AF_INET, SOCK_STREAM, rcv_fds[4]);
+ for (i = 0; i < 9; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- TCP IPv6 ----\n");
+ build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 4, 1, IP6_ADDR);
+ build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL);
+ test(rcv_fds, 9, AF_INET6, SOCK_STREAM, rcv_fds[4]);
+ for (i = 0; i < 9; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- TCP IPv4 mapped to IPv6 ----\n");
+ build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 4, 1, IP4_MAPPED6);
+ build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL);
+ test(rcv_fds, 9, AF_INET, SOCK_STREAM, rcv_fds[4]);
+ for (i = 0; i < 9; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- DCCP IPv4 ----\n");
+ build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 2, 2, NULL);
+ build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 4, 1, IP4_ADDR);
+ build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 5, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 7, 2, NULL);
+ test(rcv_fds, 9, AF_INET, SOCK_DCCP, rcv_fds[4]);
+ for (i = 0; i < 9; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- DCCP IPv6 ----\n");
+ build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 2, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 4, 1, IP6_ADDR);
+ build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 5, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 7, 2, NULL);
+ test(rcv_fds, 9, AF_INET6, SOCK_DCCP, rcv_fds[4]);
+ for (i = 0; i < 9; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- DCCP IPv4 mapped to IPv6 ----\n");
+ build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 2, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 4, 1, IP4_MAPPED6);
+ build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 5, 2, NULL);
+ build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 7, 2, NULL);
+ test(rcv_fds, 9, AF_INET, SOCK_DCCP, rcv_fds[4]);
+ for (i = 0; i < 9; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_addr_any.sh b/tools/testing/selftests/net/reuseport_addr_any.sh
new file mode 100755
index 000000000000..104592f62ad4
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_addr_any.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+./in_netns.sh ./reuseport_addr_any
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index e101af52d1d6..bb3436f8807f 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -955,6 +955,58 @@ kci_test_ip6erspan()
ip netns del "$testns"
}
+kci_test_fdb_get()
+{
+ IP="ip -netns testns"
+ BRIDGE="bridge -netns testns"
+ brdev="test-br0"
+ vxlandev="vxlan10"
+ test_mac=de:ad:be:ef:13:37
+ localip="10.0.2.2"
+ dstip="10.0.2.3"
+ ret=0
+
+ bridge fdb help 2>&1 |grep -q 'bridge fdb get'
+ if [ $? -ne 0 ];then
+ echo "SKIP: fdb get tests: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ ip netns add testns
+ if [ $? -ne 0 ]; then
+ echo "SKIP fdb get tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ $IP link add "$vxlandev" type vxlan id 10 local $localip \
+ dstport 4789 2>/dev/null
+ check_err $?
+ $IP link add name "$brdev" type bridge &>/dev/null
+ check_err $?
+ $IP link set dev "$vxlandev" master "$brdev" &>/dev/null
+ check_err $?
+ $BRIDGE fdb add $test_mac dev "$vxlandev" master &>/dev/null
+ check_err $?
+ $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self &>/dev/null
+ check_err $?
+
+ $BRIDGE fdb get $test_mac brport "$vxlandev" 2>/dev/null | grep -q "dev $vxlandev master $brdev"
+ check_err $?
+ $BRIDGE fdb get $test_mac br "$brdev" 2>/dev/null | grep -q "dev $vxlandev master $brdev"
+ check_err $?
+ $BRIDGE fdb get $test_mac dev "$vxlandev" self 2>/dev/null | grep -q "dev $vxlandev dst $dstip"
+ check_err $?
+
+ ip netns del testns &>/dev/null
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: bridge fdb get"
+ return 1
+ fi
+
+ echo "PASS: bridge fdb get"
+}
+
kci_test_rtnl()
{
kci_add_dummy
@@ -979,6 +1031,7 @@ kci_test_rtnl()
kci_test_macsec
kci_test_ipsec
kci_test_ipsec_offload
+ kci_test_fdb_get
kci_del_dummy
}
diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
new file mode 100755
index 000000000000..8db35b99457c
--- /dev/null
+++ b/tools/testing/selftests/net/xfrm_policy.sh
@@ -0,0 +1,302 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check xfrm policy resolution. Topology:
+#
+# 1.2 1.1 3.1 3.10 2.1 2.2
+# eth1 eth1 veth0 veth0 eth1 eth1
+# ns1 ---- ns3 ----- ns4 ---- ns2
+#
+# ns3 and ns4 are connected via ipsec tunnel.
+# pings from ns1 to ns2 (and vice versa) are supposed to work like this:
+# ns1: ping 10.0.2.2: passes via ipsec tunnel.
+# ns2: ping 10.0.1.2: passes via ipsec tunnel.
+
+# ns1: ping 10.0.1.253: passes via ipsec tunnel (direct policy)
+# ns2: ping 10.0.2.253: passes via ipsec tunnel (direct policy)
+#
+# ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception)
+# ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception)
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+policy_checks_ok=1
+
+KEY_SHA=0xdeadbeef1234567890abcdefabcdefabcdefabcd
+KEY_AES=0x0123456789abcdef0123456789012345
+SPI1=0x1
+SPI2=0x2
+
+do_esp() {
+ local ns=$1
+ local me=$2
+ local remote=$3
+ local lnet=$4
+ local rnet=$5
+ local spi_out=$6
+ local spi_in=$7
+
+ ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
+ ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
+
+ # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
+ ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow
+ # to fwd decrypted packets after esp processing:
+ ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow
+}
+
+do_esp_policy_get_check() {
+ local ns=$1
+ local lnet=$2
+ local rnet=$3
+
+ ip -net $ns xfrm policy get src $lnet dst $rnet dir out > /dev/null
+ if [ $? -ne 0 ] && [ $policy_checks_ok -eq 1 ] ;then
+ policy_checks_ok=0
+ echo "FAIL: ip -net $ns xfrm policy get src $lnet dst $rnet dir out"
+ ret=1
+ fi
+
+ ip -net $ns xfrm policy get src $rnet dst $lnet dir fwd > /dev/null
+ if [ $? -ne 0 ] && [ $policy_checks_ok -eq 1 ] ;then
+ policy_checks_ok=0
+ echo "FAIL: ip -net $ns xfrm policy get src $rnet dst $lnet dir fwd"
+ ret=1
+ fi
+}
+
+do_exception() {
+ local ns=$1
+ local me=$2
+ local remote=$3
+ local encryptip=$4
+ local plain=$5
+
+ # network $plain passes without tunnel
+ ip -net $ns xfrm policy add dst $plain dir out priority 10 action allow
+
+ # direct policy for $encryptip, use tunnel, higher prio takes precedence
+ ip -net $ns xfrm policy add dst $encryptip dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
+}
+
+# policies that are not supposed to match any packets generated in this test.
+do_dummies4() {
+ local ns=$1
+
+ for i in $(seq 10 16);do
+ # dummy policy with wildcard src/dst.
+ echo netns exec $ns ip xfrm policy add src 0.0.0.0/0 dst 10.$i.99.0/30 dir out action block
+ echo netns exec $ns ip xfrm policy add src 10.$i.99.0/30 dst 0.0.0.0/0 dir out action block
+ for j in $(seq 32 64);do
+ echo netns exec $ns ip xfrm policy add src 10.$i.1.0/30 dst 10.$i.$j.0/30 dir out action block
+ # silly, as it encompasses the one above too, but its allowed:
+ echo netns exec $ns ip xfrm policy add src 10.$i.1.0/29 dst 10.$i.$j.0/29 dir out action block
+ # and yet again, even more broad one.
+ echo netns exec $ns ip xfrm policy add src 10.$i.1.0/24 dst 10.$i.$j.0/24 dir out action block
+ echo netns exec $ns ip xfrm policy add src 10.$i.$j.0/24 dst 10.$i.1.0/24 dir fwd action block
+ done
+ done | ip -batch /dev/stdin
+}
+
+do_dummies6() {
+ local ns=$1
+
+ for i in $(seq 10 16);do
+ for j in $(seq 32 64);do
+ echo netns exec $ns ip xfrm policy add src dead:$i::/64 dst dead:$i:$j::/64 dir out action block
+ echo netns exec $ns ip xfrm policy add src dead:$i:$j::/64 dst dead:$i::/24 dir fwd action block
+ done
+ done | ip -batch /dev/stdin
+}
+
+check_ipt_policy_count()
+{
+ ns=$1
+
+ ip netns exec $ns iptables-save -c |grep policy | ( read c rest
+ ip netns exec $ns iptables -Z
+ if [ x"$c" = x'[0:0]' ]; then
+ exit 0
+ elif [ x"$c" = x ]; then
+ echo "ERROR: No counters"
+ ret=1
+ exit 111
+ else
+ exit 1
+ fi
+ )
+}
+
+check_xfrm() {
+ # 0: iptables -m policy rule count == 0
+ # 1: iptables -m policy rule count != 0
+ rval=$1
+ ip=$2
+ lret=0
+
+ ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null
+
+ check_ipt_policy_count ns3
+ if [ $? -ne $rval ] ; then
+ lret=1
+ fi
+ check_ipt_policy_count ns4
+ if [ $? -ne $rval ] ; then
+ lret=1
+ fi
+
+ ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null
+
+ check_ipt_policy_count ns3
+ if [ $? -ne $rval ] ; then
+ lret=1
+ fi
+ check_ipt_policy_count ns4
+ if [ $? -ne $rval ] ; then
+ lret=1
+ fi
+
+ return $lret
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+ip -Version 2>/dev/null >/dev/null
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without the ip tool"
+ exit $ksft_skip
+fi
+
+# needed to check if policy lookup got valid ipsec result
+iptables --version 2>/dev/null >/dev/null
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without iptables tool"
+ exit $ksft_skip
+fi
+
+for i in 1 2 3 4; do
+ ip netns add ns$i
+ ip -net ns$i link set lo up
+done
+
+DEV=veth0
+ip link add $DEV netns ns1 type veth peer name eth1 netns ns3
+ip link add $DEV netns ns2 type veth peer name eth1 netns ns4
+
+ip link add $DEV netns ns3 type veth peer name veth0 netns ns4
+
+DEV=veth0
+for i in 1 2; do
+ ip -net ns$i link set $DEV up
+ ip -net ns$i addr add 10.0.$i.2/24 dev $DEV
+ ip -net ns$i addr add dead:$i::2/64 dev $DEV
+
+ ip -net ns$i addr add 10.0.$i.253 dev $DEV
+ ip -net ns$i addr add 10.0.$i.254 dev $DEV
+ ip -net ns$i addr add dead:$i::fd dev $DEV
+ ip -net ns$i addr add dead:$i::fe dev $DEV
+done
+
+for i in 3 4; do
+ip -net ns$i link set eth1 up
+ip -net ns$i link set veth0 up
+done
+
+ip -net ns1 route add default via 10.0.1.1
+ip -net ns2 route add default via 10.0.2.1
+
+ip -net ns3 addr add 10.0.1.1/24 dev eth1
+ip -net ns3 addr add 10.0.3.1/24 dev veth0
+ip -net ns3 addr add 2001:1::1/64 dev eth1
+ip -net ns3 addr add 2001:3::1/64 dev veth0
+
+ip -net ns3 route add default via 10.0.3.10
+
+ip -net ns4 addr add 10.0.2.1/24 dev eth1
+ip -net ns4 addr add 10.0.3.10/24 dev veth0
+ip -net ns4 addr add 2001:2::1/64 dev eth1
+ip -net ns4 addr add 2001:3::10/64 dev veth0
+ip -net ns4 route add default via 10.0.3.1
+
+for j in 4 6; do
+ for i in 3 4;do
+ ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null
+ ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null
+ done
+done
+
+# abuse iptables rule counter to check if ping matches a policy
+ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
+ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not insert iptables rule"
+ for i in 1 2 3 4;do ip netns del ns$i;done
+ exit $ksft_skip
+fi
+
+# localip remoteip localnet remotenet
+do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
+do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2
+do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
+do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1
+
+do_dummies4 ns3
+do_dummies6 ns4
+
+do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24
+do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24
+do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64
+do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64
+
+# ping to .254 should use ipsec, exception is not installed.
+check_xfrm 1 254
+if [ $? -ne 0 ]; then
+ echo "FAIL: expected ping to .254 to use ipsec tunnel"
+ ret=1
+else
+ echo "PASS: policy before exception matches"
+fi
+
+# installs exceptions
+# localip remoteip encryptdst plaindst
+do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
+do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28
+
+do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96
+do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96
+
+# ping to .254 should now be excluded from the tunnel
+check_xfrm 0 254
+if [ $? -ne 0 ]; then
+ echo "FAIL: expected ping to .254 to fail"
+ ret=1
+else
+ echo "PASS: ping to .254 bypassed ipsec tunnel"
+fi
+
+# ping to .253 should use use ipsec due to direct policy exception.
+check_xfrm 1 253
+if [ $? -ne 0 ]; then
+ echo "FAIL: expected ping to .253 to use ipsec tunnel"
+ ret=1
+else
+ echo "PASS: direct policy matches"
+fi
+
+# ping to .2 should use ipsec.
+check_xfrm 1 2
+if [ $? -ne 0 ]; then
+ echo "FAIL: expected ping to .2 to use ipsec tunnel"
+ ret=1
+else
+ echo "PASS: policy matches"
+fi
+
+for i in 1 2 3 4;do ip netns del ns$i;done
+
+exit $ret