aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <[email protected]>2022-08-03 19:20:17 -0700
committerJakub Kicinski <[email protected]>2022-08-03 19:20:18 -0700
commit7de196a6aa3a02f6079f5bf8796df4cb6cb5e783 (patch)
tree68ce7f01a394516f08b9d36579bbc3f1fb81bf64
parentf86d1fbbe7858884d6754534a0afbb74fc30bc26 (diff)
parentcba8d8f57dfb1d01d961a0e50e7fddb82df57ad7 (diff)
Merge branch 'make-dsa-work-with-bonding-s-arp-monitor'
Vladimir Oltean says: ==================== Make DSA work with bonding's ARP monitor Since commit 2b86cb829976 ("net: dsa: declare lockless TX feature for slave ports") in v5.7, DSA breaks the ARP monitoring logic from the bonding driver, fact which was pointed out by Brian Hutchinson who uses a linux-5.10.y stable kernel. Initially I got lured by other similar hacks introduced for other NETIF_F_LLTX drivers, which, inspired by the bonding documentation, update the trans_start of their TX queues by hand. However Jakub pointed out that this simply isn't a proper solution, and after coming to think more about it, I agree, and it doesn't work properly with DSA nor is it maintainable for the future changes I plan for it (multiple DSA masters in a LAG). I've tested these changes using a DSA-based setup and a veth-based setup, using the active-backup mode and ARP monitoring, with and without arp_validate. Link to v1: https://patchwork.kernel.org/project/netdevbpf/patch/[email protected]/ Link to v2: https://patchwork.kernel.org/project/netdevbpf/patch/[email protected]/ ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
-rw-r--r--Documentation/networking/bonding.rst9
-rw-r--r--drivers/net/bonding/bond_main.c35
-rw-r--r--drivers/net/veth.c4
-rw-r--r--include/net/bonding.h13
-rw-r--r--net/sched/sch_generic.c8
5 files changed, 34 insertions, 35 deletions
diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst
index 53a18ff7cf23..7823a069a903 100644
--- a/Documentation/networking/bonding.rst
+++ b/Documentation/networking/bonding.rst
@@ -1982,15 +1982,6 @@ uses the response as an indication that the link is operating. This
gives some assurance that traffic is actually flowing to and from one
or more peers on the local network.
-The ARP monitor relies on the device driver itself to verify
-that traffic is flowing. In particular, the driver must keep up to
-date the last receive time, dev->last_rx. Drivers that use NETIF_F_LLTX
-flag must also update netdev_queue->trans_start. If they do not, then the
-ARP monitor will immediately fail any slaves using that driver, and
-those slaves will stay down. If networking monitoring (tcpdump, etc)
-shows the ARP requests and replies on the network, then it may be that
-your device driver is not updating last_rx and trans_start.
-
7.2 Configuring Multiple ARP Targets
------------------------------------
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e75acb14d066..ab7fdbbc2530 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2001,6 +2001,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
new_slave->target_last_arp_rx[i] = new_slave->last_rx;
+ new_slave->last_tx = new_slave->last_rx;
+
if (bond->params.miimon && !bond->params.use_carrier) {
link_reporting = bond_check_dev_link(bond, slave_dev, 1);
@@ -2884,8 +2886,11 @@ static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip,
return;
}
- if (bond_handle_vlan(slave, tags, skb))
+ if (bond_handle_vlan(slave, tags, skb)) {
+ slave_update_last_tx(slave);
arp_xmit(skb);
+ }
+
return;
}
@@ -3074,8 +3079,7 @@ static int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
curr_active_slave->last_link_up))
bond_validate_arp(bond, slave, tip, sip);
else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
- bond_time_in_interval(bond,
- dev_trans_start(curr_arp_slave->dev), 1))
+ bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1))
bond_validate_arp(bond, slave, sip, tip);
out_unlock:
@@ -3103,8 +3107,10 @@ static void bond_ns_send(struct slave *slave, const struct in6_addr *daddr,
}
addrconf_addr_solict_mult(daddr, &mcaddr);
- if (bond_handle_vlan(slave, tags, skb))
+ if (bond_handle_vlan(slave, tags, skb)) {
+ slave_update_last_tx(slave);
ndisc_send_skb(skb, &mcaddr, saddr);
+ }
}
static void bond_ns_send_all(struct bonding *bond, struct slave *slave)
@@ -3246,8 +3252,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond,
curr_active_slave->last_link_up))
bond_validate_ns(bond, slave, saddr, daddr);
else if (curr_arp_slave &&
- bond_time_in_interval(bond,
- dev_trans_start(curr_arp_slave->dev), 1))
+ bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1))
bond_validate_ns(bond, slave, saddr, daddr);
out:
@@ -3335,12 +3340,12 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
* so it can wait
*/
bond_for_each_slave_rcu(bond, slave, iter) {
- unsigned long trans_start = dev_trans_start(slave->dev);
+ unsigned long last_tx = slave_last_tx(slave);
bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
if (slave->link != BOND_LINK_UP) {
- if (bond_time_in_interval(bond, trans_start, 1) &&
+ if (bond_time_in_interval(bond, last_tx, 1) &&
bond_time_in_interval(bond, slave->last_rx, 1)) {
bond_propose_link_state(slave, BOND_LINK_UP);
@@ -3365,7 +3370,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
* when the source ip is 0, so don't take the link down
* if we don't know our ip yet
*/
- if (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) ||
+ if (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
!bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) {
bond_propose_link_state(slave, BOND_LINK_DOWN);
@@ -3431,7 +3436,7 @@ re_arm:
*/
static int bond_ab_arp_inspect(struct bonding *bond)
{
- unsigned long trans_start, last_rx;
+ unsigned long last_tx, last_rx;
struct list_head *iter;
struct slave *slave;
int commit = 0;
@@ -3482,9 +3487,9 @@ static int bond_ab_arp_inspect(struct bonding *bond)
* - (more than missed_max*delta since receive AND
* the bond has an IP address)
*/
- trans_start = dev_trans_start(slave->dev);
+ last_tx = slave_last_tx(slave);
if (bond_is_active_slave(slave) &&
- (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) ||
+ (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
!bond_time_in_interval(bond, last_rx, bond->params.missed_max))) {
bond_propose_link_state(slave, BOND_LINK_DOWN);
commit++;
@@ -3501,8 +3506,8 @@ static int bond_ab_arp_inspect(struct bonding *bond)
*/
static void bond_ab_arp_commit(struct bonding *bond)
{
- unsigned long trans_start;
struct list_head *iter;
+ unsigned long last_tx;
struct slave *slave;
bond_for_each_slave(bond, slave, iter) {
@@ -3511,10 +3516,10 @@ static void bond_ab_arp_commit(struct bonding *bond)
continue;
case BOND_LINK_UP:
- trans_start = dev_trans_start(slave->dev);
+ last_tx = slave_last_tx(slave);
if (rtnl_dereference(bond->curr_active_slave) != slave ||
(!rtnl_dereference(bond->curr_active_slave) &&
- bond_time_in_interval(bond, trans_start, 1))) {
+ bond_time_in_interval(bond, last_tx, 1))) {
struct slave *current_arp_slave;
current_arp_slave = rtnl_dereference(bond->current_arp_slave);
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 2cb833b3006a..466da01ba2e3 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -312,7 +312,6 @@ static bool veth_skb_is_eligible_for_gro(const struct net_device *dev,
static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
- struct netdev_queue *queue = NULL;
struct veth_rq *rq = NULL;
struct net_device *rcv;
int length = skb->len;
@@ -330,7 +329,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
rxq = skb_get_queue_mapping(skb);
if (rxq < rcv->real_num_rx_queues) {
rq = &rcv_priv->rq[rxq];
- queue = netdev_get_tx_queue(dev, rxq);
/* The napi pointer is available when an XDP program is
* attached or when GRO is enabled
@@ -342,8 +340,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
skb_tx_timestamp(skb);
if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
- if (queue)
- txq_trans_cond_update(queue);
if (!use_napi)
dev_lstats_add(dev, length);
} else {
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 6e78d657aa05..afd606df149a 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -161,8 +161,9 @@ struct slave {
struct net_device *dev; /* first - useful for panic debug */
struct bonding *bond; /* our master */
int delay;
- /* all three in jiffies */
+ /* all 4 in jiffies */
unsigned long last_link_up;
+ unsigned long last_tx;
unsigned long last_rx;
unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
s8 link; /* one of BOND_LINK_XXXX */
@@ -540,6 +541,16 @@ static inline unsigned long slave_last_rx(struct bonding *bond,
return slave->last_rx;
}
+static inline void slave_update_last_tx(struct slave *slave)
+{
+ WRITE_ONCE(slave->last_tx, jiffies);
+}
+
+static inline unsigned long slave_last_tx(struct slave *slave)
+{
+ return READ_ONCE(slave->last_tx);
+}
+
#ifdef CONFIG_NET_POLL_CONTROLLER
static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave,
struct sk_buff *skb)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cc6eabee2830..d47b9689eba6 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -427,14 +427,10 @@ void __qdisc_run(struct Qdisc *q)
unsigned long dev_trans_start(struct net_device *dev)
{
- unsigned long val, res;
+ unsigned long res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
+ unsigned long val;
unsigned int i;
- if (is_vlan_dev(dev))
- dev = vlan_dev_real_dev(dev);
- else if (netif_is_macvlan(dev))
- dev = macvlan_dev_real_dev(dev);
- res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
for (i = 1; i < dev->num_tx_queues; i++) {
val = READ_ONCE(netdev_get_tx_queue(dev, i)->trans_start);
if (val && time_after(val, res))