diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 176 |
1 files changed, 85 insertions, 91 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 2b67f2aa59dd..eb7fb6daa1ef 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * NET3 Protocol independent device support routines. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Derived from the non IP parts of dev.c 1.0.19 * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -131,7 +127,6 @@ #include <trace/events/napi.h> #include <trace/events/net.h> #include <trace/events/skb.h> -#include <linux/pci.h> #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> #include <linux/static_key.h> @@ -146,6 +141,7 @@ #include <net/udp_tunnel.h> #include <linux/net_namespace.h> #include <linux/indirect_call_wrapper.h> +#include <net/devlink.h> #include "net-sysfs.h" @@ -1184,7 +1180,21 @@ int dev_change_name(struct net_device *dev, const char *newname) BUG_ON(!dev_net(dev)); net = dev_net(dev); - if (dev->flags & IFF_UP) + + /* Some auto-enslaved devices e.g. failover slaves are + * special, as userspace might rename the device after + * the interface had been brought up and running since + * the point kernel initiated auto-enslavement. Allow + * live name change even when these slave devices are + * up and running. + * + * Typically, users of these auto-enslaving devices + * don't actually care about slave name change, as + * they are supposed to operate on master interface + * directly. + */ + if (dev->flags & IFF_UP && + likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK))) return -EBUSY; write_seqcount_begin(&devnet_rename_seq); @@ -3468,6 +3478,15 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { __qdisc_drop(skb, &to_free); rc = NET_XMIT_DROP; + } else if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty && + qdisc_run_begin(q)) { + qdisc_bstats_cpu_update(q, skb); + + if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) + __qdisc_run(q); + + qdisc_run_end(q); + rc = NET_XMIT_SUCCESS; } else { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; qdisc_run(q); @@ -3556,9 +3575,6 @@ static void skb_update_prio(struct sk_buff *skb) #define skb_update_prio(skb) #endif -DEFINE_PER_CPU(int, xmit_recursion); -EXPORT_SYMBOL(xmit_recursion); - /** * dev_loopback_xmit - loop back @skb * @net: network namespace this loopback is happening in @@ -3689,23 +3705,21 @@ get_cpus_map: } u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { return 0; } EXPORT_SYMBOL(dev_pick_tx_zero); u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { return (u16)raw_smp_processor_id() % dev->real_num_tx_queues; } EXPORT_SYMBOL(dev_pick_tx_cpu_id); -static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) +u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) { struct sock *sk = skb->sk; int queue_index = sk_tx_queue_get(sk); @@ -3729,10 +3743,11 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, return queue_index; } +EXPORT_SYMBOL(netdev_pick_tx); -struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb, - struct net_device *sb_dev) +struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, + struct sk_buff *skb, + struct net_device *sb_dev) { int queue_index = 0; @@ -3747,10 +3762,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) - queue_index = ops->ndo_select_queue(dev, skb, sb_dev, - __netdev_pick_tx); + queue_index = ops->ndo_select_queue(dev, skb, sb_dev); else - queue_index = __netdev_pick_tx(dev, skb, sb_dev); + queue_index = netdev_pick_tx(dev, skb, sb_dev); queue_index = netdev_cap_txqueue(dev, queue_index); } @@ -3824,7 +3838,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) else skb_dst_force(skb); - txq = netdev_pick_tx(dev, skb, sb_dev); + txq = netdev_core_pick_tx(dev, skb, sb_dev); q = rcu_dereference_bh(txq->qdisc); trace_net_dev_queue(skb); @@ -3849,8 +3863,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) int cpu = smp_processor_id(); /* ok because BHs are off */ if (txq->xmit_lock_owner != cpu) { - if (unlikely(__this_cpu_read(xmit_recursion) > - XMIT_RECURSION_LIMIT)) + if (dev_xmit_recursion()) goto recursion_alert; skb = validate_xmit_skb(skb, dev, &again); @@ -3860,9 +3873,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { - __this_cpu_inc(xmit_recursion); + dev_xmit_recursion_inc(); skb = dev_hard_start_xmit(skb, dev, txq, &rc); - __this_cpu_dec(xmit_recursion); + dev_xmit_recursion_dec(); if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); goto out; @@ -3975,9 +3988,9 @@ EXPORT_SYMBOL(rps_sock_flow_table); u32 rps_cpu_mask __read_mostly; EXPORT_SYMBOL(rps_cpu_mask); -struct static_key rps_needed __read_mostly; +struct static_key_false rps_needed __read_mostly; EXPORT_SYMBOL(rps_needed); -struct static_key rfs_needed __read_mostly; +struct static_key_false rfs_needed __read_mostly; EXPORT_SYMBOL(rfs_needed); static struct rps_dev_flow * @@ -4429,7 +4442,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) bool free_skb = true; int cpu, rc; - txq = netdev_pick_tx(dev, skb, NULL); + txq = netdev_core_pick_tx(dev, skb, NULL); cpu = smp_processor_id(); HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { @@ -4485,25 +4498,8 @@ static int netif_rx_internal(struct sk_buff *skb) trace_netif_rx(skb); - if (static_branch_unlikely(&generic_xdp_needed_key)) { - int ret; - - preempt_disable(); - rcu_read_lock(); - ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); - rcu_read_unlock(); - preempt_enable(); - - /* Consider XDP consuming the packet a success from - * the netdev point of view we do not want to count - * this as an error. - */ - if (ret != XDP_PASS) - return NET_RX_SUCCESS; - } - #ifdef CONFIG_RPS - if (static_key_false(&rps_needed)) { + if (static_branch_unlikely(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -4841,6 +4837,18 @@ another_round: __this_cpu_inc(softnet_data.processed); + if (static_branch_unlikely(&generic_xdp_needed_key)) { + int ret2; + + preempt_disable(); + ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); + preempt_enable(); + + if (ret2 != XDP_PASS) + return NET_RX_DROP; + skb_reset_mac_len(skb); + } + if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = skb_vlan_untag(skb); @@ -4970,7 +4978,8 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc) ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); if (pt_prev) - ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, + skb->dev, pt_prev, orig_dev); return ret; } @@ -5012,10 +5021,13 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head, if (list_empty(head)) return; if (pt_prev->list_func != NULL) - pt_prev->list_func(head, pt_prev, orig_dev); + INDIRECT_CALL_INET(pt_prev->list_func, ipv6_list_rcv, + ip_list_rcv, head, pt_prev, orig_dev); else - list_for_each_entry_safe(skb, next, head, list) + list_for_each_entry_safe(skb, next, head, list) { + skb_list_del_init(skb); pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + } } static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc) @@ -5157,22 +5169,9 @@ static int netif_receive_skb_internal(struct sk_buff *skb) if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; - if (static_branch_unlikely(&generic_xdp_needed_key)) { - int ret; - - preempt_disable(); - rcu_read_lock(); - ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); - rcu_read_unlock(); - preempt_enable(); - - if (ret != XDP_PASS) - return NET_RX_DROP; - } - rcu_read_lock(); #ifdef CONFIG_RPS - if (static_key_false(&rps_needed)) { + if (static_branch_unlikely(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu = get_rps_cpu(skb->dev, skb, &rflow); @@ -5190,7 +5189,6 @@ static int netif_receive_skb_internal(struct sk_buff *skb) static void netif_receive_skb_list_internal(struct list_head *head) { - struct bpf_prog *xdp_prog = NULL; struct sk_buff *skb, *next; struct list_head sublist; @@ -5203,24 +5201,9 @@ static void netif_receive_skb_list_internal(struct list_head *head) } list_splice_init(&sublist, head); - if (static_branch_unlikely(&generic_xdp_needed_key)) { - preempt_disable(); - rcu_read_lock(); - list_for_each_entry_safe(skb, next, head, list) { - xdp_prog = rcu_dereference(skb->dev->xdp_prog); - skb_list_del_init(skb); - if (do_xdp_generic(xdp_prog, skb) == XDP_PASS) - list_add_tail(&skb->list, &sublist); - } - rcu_read_unlock(); - preempt_enable(); - /* Put passed packets back on main list */ - list_splice_init(&sublist, head); - } - rcu_read_lock(); #ifdef CONFIG_RPS - if (static_key_false(&rps_needed)) { + if (static_branch_unlikely(&rps_needed)) { list_for_each_entry_safe(skb, next, head, list) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu = get_rps_cpu(skb->dev, skb, &rflow); @@ -5788,7 +5771,6 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) skb_reset_mac_header(skb); skb_gro_reset_offset(skb); - eth = skb_gro_header_fast(skb, 0); if (unlikely(skb_gro_header_hard(skb, hlen))) { eth = skb_gro_header_slow(skb, hlen, 0); if (unlikely(!eth)) { @@ -5798,6 +5780,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) return NULL; } } else { + eth = (const struct ethhdr *)skb->data; gro_pull_from_frag0(skb, hlen); NAPI_GRO_CB(skb)->frag0 += hlen; NAPI_GRO_CB(skb)->frag0_len -= hlen; @@ -7870,10 +7853,14 @@ int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len) { const struct net_device_ops *ops = dev->netdev_ops; + int err; - if (!ops->ndo_get_phys_port_name) - return -EOPNOTSUPP; - return ops->ndo_get_phys_port_name(dev, name, len); + if (ops->ndo_get_phys_port_name) { + err = ops->ndo_get_phys_port_name(dev, name, len); + if (err != -EOPNOTSUPP) + return err; + } + return devlink_compat_phys_port_name_get(dev, name, len); } EXPORT_SYMBOL(dev_get_phys_port_name); @@ -7893,14 +7880,21 @@ int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id first = { }; struct net_device *lower_dev; struct list_head *iter; - int err = -EOPNOTSUPP; + int err; - if (ops->ndo_get_port_parent_id) - return ops->ndo_get_port_parent_id(dev, ppid); + if (ops->ndo_get_port_parent_id) { + err = ops->ndo_get_port_parent_id(dev, ppid); + if (err != -EOPNOTSUPP) + return err; + } - if (!recurse) + err = devlink_compat_switch_id_get(dev, ppid); + if (!err || err != -EOPNOTSUPP) return err; + if (!recurse) + return -EOPNOTSUPP; + netdev_for_each_lower_dev(dev, lower_dev, iter) { err = dev_get_port_parent_id(lower_dev, ppid, recurse); if (err) @@ -8895,7 +8889,7 @@ static void netdev_wait_allrefs(struct net_device *dev) refcnt = netdev_refcnt_read(dev); - if (time_after(jiffies, warning_time + 10 * HZ)) { + if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", dev->name, refcnt); warning_time = jiffies; |