aboutsummaryrefslogtreecommitdiff
path: root/include/linux/netdevice.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/netdevice.h')
-rw-r--r--include/linux/netdevice.h315
1 files changed, 134 insertions, 181 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e16a2a980ea8..f40f0ab3847a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -192,6 +192,7 @@ struct net_device_stats {
#ifdef CONFIG_RPS
#include <linux/static_key.h>
extern struct static_key rps_needed;
+extern struct static_key rfs_needed;
#endif
struct neighbour;
@@ -316,7 +317,6 @@ struct napi_struct {
unsigned int gro_count;
int (*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
- spinlock_t poll_lock;
int poll_owner;
#endif
struct net_device *dev;
@@ -334,6 +334,16 @@ enum {
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
+ NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
+};
+
+enum {
+ NAPIF_STATE_SCHED = (1UL << NAPI_STATE_SCHED),
+ NAPIF_STATE_DISABLE = (1UL << NAPI_STATE_DISABLE),
+ NAPIF_STATE_NPSVC = (1UL << NAPI_STATE_NPSVC),
+ NAPIF_STATE_HASHED = (1UL << NAPI_STATE_HASHED),
+ NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL),
+ NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL),
};
enum gro_result {
@@ -342,6 +352,7 @@ enum gro_result {
GRO_HELD,
GRO_NORMAL,
GRO_DROP,
+ GRO_CONSUMED,
};
typedef enum gro_result gro_result_t;
@@ -453,32 +464,21 @@ static inline bool napi_reschedule(struct napi_struct *napi)
return false;
}
-void __napi_complete(struct napi_struct *n);
-void napi_complete_done(struct napi_struct *n, int work_done);
+bool napi_complete_done(struct napi_struct *n, int work_done);
/**
* napi_complete - NAPI processing complete
* @n: NAPI context
*
* Mark NAPI processing as complete.
* Consider using napi_complete_done() instead.
+ * Return false if device should avoid rearming interrupts.
*/
-static inline void napi_complete(struct napi_struct *n)
+static inline bool napi_complete(struct napi_struct *n)
{
return napi_complete_done(n, 0);
}
/**
- * napi_hash_add - add a NAPI to global hashtable
- * @napi: NAPI context
- *
- * Generate a new napi_id and store a @napi under it in napi_hash.
- * Used for busy polling (CONFIG_NET_RX_BUSY_POLL).
- * Note: This is normally automatically done from netif_napi_add(),
- * so might disappear in a future Linux version.
- */
-void napi_hash_add(struct napi_struct *napi);
-
-/**
* napi_hash_del - remove a NAPI from global table
* @napi: NAPI context
*
@@ -732,8 +732,8 @@ struct xps_dev_maps {
struct rcu_head rcu;
struct xps_map __rcu *cpu_map[0];
};
-#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \
- (nr_cpu_ids * sizeof(struct xps_map *)))
+#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \
+ (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
#endif /* CONFIG_XPS */
#define TC_MAX_QUEUE 16
@@ -803,6 +803,7 @@ struct tc_to_netdev {
struct tc_cls_matchall_offload *cls_mall;
struct tc_cls_bpf_offload *cls_bpf;
};
+ bool egress_dev;
};
/* These structures hold the attributes of xdp state that are being passed
@@ -865,11 +866,15 @@ struct netdev_xdp {
* of useless work if you return NETDEV_TX_BUSY.
* Required; cannot be NULL.
*
- * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
- * netdev_features_t features);
- * Adjusts the requested feature flags according to device-specific
- * constraints, and returns the resulting flags. Must not modify
- * the device state.
+ * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
+ * struct net_device *dev
+ * netdev_features_t features);
+ * Called by core transmit path to determine if device is capable of
+ * performing offload operations on a given packet. This is to give
+ * the device an opportunity to implement any restrictions that cannot
+ * be otherwise expressed by feature flags. The check is called with
+ * the set of features that the stack has calculated and it returns
+ * those the driver believes to be appropriate.
*
* u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
* void *accel_priv, select_queue_fallback_t fallback);
@@ -912,8 +917,8 @@ struct netdev_xdp {
* Callback used when the transmitter has not made any progress
* for dev->watchdog ticks.
*
- * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
- * struct rtnl_link_stats64 *storage);
+ * void (*ndo_get_stats64)(struct net_device *dev,
+ * struct rtnl_link_stats64 *storage);
* struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
* Called when a user wants to get the network device usage
* statistics. Drivers must do one of the following:
@@ -926,7 +931,7 @@ struct netdev_xdp {
* 3. Update dev->stats asynchronously and atomically, and define
* neither operation.
*
- * bool (*ndo_has_offload_stats)(int attr_id)
+ * bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id)
* Return true if this device supports offload stats of this attr_id.
*
* int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev,
@@ -963,11 +968,12 @@ struct netdev_xdp {
* with PF and querying it may introduce a theoretical security risk.
* int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
* int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
- * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
- * Called to setup 'tc' number of traffic classes in the net device. This
- * is always called from the stack with the rtnl lock held and netif tx
- * queues stopped. This allows the netdevice to perform queue management
- * safely.
+ * int (*ndo_setup_tc)(struct net_device *dev, u32 handle,
+ * __be16 protocol, struct tc_to_netdev *tc);
+ * Called to setup any 'tc' scheduler, classifier or action on @dev.
+ * This is always called from the stack with the rtnl lock held and netif
+ * tx queues stopped. This allows the netdevice to perform queue
+ * management safely.
*
* Fiber Channel over Ethernet (FCoE) offload functions.
* int (*ndo_fcoe_enable)(struct net_device *dev);
@@ -1027,6 +1033,12 @@ struct netdev_xdp {
* Called to release previously enslaved netdev.
*
* Feature/offload setting functions.
+ * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
+ * netdev_features_t features);
+ * Adjusts the requested feature flags according to device-specific
+ * constraints, and returns the resulting flags. Must not modify
+ * the device state.
+ *
* int (*ndo_set_features)(struct net_device *dev, netdev_features_t features);
* Called to update device configuration to new features. Passed
* feature set might be less than what was returned by ndo_fix_features()).
@@ -1099,15 +1111,6 @@ struct netdev_xdp {
* Callback to use for xmit over the accelerated station. This
* is used in place of ndo_start_xmit on accelerated net
* devices.
- * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
- * struct net_device *dev
- * netdev_features_t features);
- * Called by core transmit path to determine if device is capable of
- * performing offload operations on a given packet. This is to give
- * the device an opportunity to implement any restrictions that cannot
- * be otherwise expressed by feature flags. The check is called with
- * the set of features that the stack has calculated and it returns
- * those the driver believes to be appropriate.
* int (*ndo_set_tx_maxrate)(struct net_device *dev,
* int queue_index, u32 maxrate);
* Called when a user wants to set a max-rate limitation of specific
@@ -1164,9 +1167,9 @@ struct net_device_ops {
struct neigh_parms *);
void (*ndo_tx_timeout) (struct net_device *dev);
- struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
- struct rtnl_link_stats64 *storage);
- bool (*ndo_has_offload_stats)(int attr_id);
+ void (*ndo_get_stats64)(struct net_device *dev,
+ struct rtnl_link_stats64 *storage);
+ bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id);
int (*ndo_get_offload_stats)(int attr_id,
const struct net_device *dev,
void *attr_data);
@@ -1182,9 +1185,6 @@ struct net_device_ops {
struct netpoll_info *info);
void (*ndo_netpoll_cleanup)(struct net_device *dev);
#endif
-#ifdef CONFIG_NET_RX_BUSY_POLL
- int (*ndo_busy_poll)(struct napi_struct *dev);
-#endif
int (*ndo_set_vf_mac)(struct net_device *dev,
int queue, u8 *mac);
int (*ndo_set_vf_vlan)(struct net_device *dev,
@@ -1456,7 +1456,6 @@ enum netdev_priv_flags {
* @ptype_specific: Device-specific, protocol-specific packet handlers
*
* @adj_list: Directly linked devices, like slaves for bonding
- * @all_adj_list: All linked devices, *including* neighbours
* @features: Currently active device features
* @hw_features: User-changeable features
*
@@ -1506,8 +1505,11 @@ enum netdev_priv_flags {
* @if_port: Selectable AUI, TP, ...
* @dma: DMA channel
* @mtu: Interface MTU value
+ * @min_mtu: Interface Minimum MTU value
+ * @max_mtu: Interface Maximum MTU value
* @type: Interface hardware type
* @hard_header_len: Maximum hardware header length.
+ * @min_header_len: Minimum hardware header length
*
* @needed_headroom: Extra headroom the hardware may need, but not in all
* cases can this be guaranteed
@@ -1549,7 +1551,6 @@ enum netdev_priv_flags {
* @ax25_ptr: AX.25 specific data
* @ieee80211_ptr: IEEE 802.11 specific data, assign before registering
*
- * @last_rx: Time of last Rx
* @dev_addr: Hw address (before bcast,
* because most packets are unicast)
*
@@ -1673,11 +1674,6 @@ struct net_device {
struct list_head lower;
} adj_list;
- struct {
- struct list_head upper;
- struct list_head lower;
- } all_adj_list;
-
netdev_features_t features;
netdev_features_t hw_features;
netdev_features_t wanted_features;
@@ -1726,8 +1722,11 @@ struct net_device {
unsigned char dma;
unsigned int mtu;
+ unsigned int min_mtu;
+ unsigned int max_mtu;
unsigned short type;
unsigned short hard_header_len;
+ unsigned short min_header_len;
unsigned short needed_headroom;
unsigned short needed_tailroom;
@@ -1778,8 +1777,6 @@ struct net_device {
/*
* Cache lines mostly used on receive path (including eth_type_trans())
*/
- unsigned long last_rx;
-
/* Interface address info used in eth_type_trans() */
unsigned char *dev_addr;
@@ -1869,8 +1866,12 @@ struct net_device {
struct pcpu_vstats __percpu *vstats;
};
+#if IS_ENABLED(CONFIG_GARP)
struct garp_port __rcu *garp_port;
+#endif
+#if IS_ENABLED(CONFIG_MRP)
struct mrp_port __rcu *mrp_port;
+#endif
struct device dev;
const struct attribute_group *sysfs_groups[4];
@@ -1922,34 +1923,10 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
return 0;
}
-static inline
-void netdev_reset_tc(struct net_device *dev)
-{
- dev->num_tc = 0;
- memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
- memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
-}
-
-static inline
-int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
-{
- if (tc >= dev->num_tc)
- return -EINVAL;
-
- dev->tc_to_txq[tc].count = count;
- dev->tc_to_txq[tc].offset = offset;
- return 0;
-}
-
-static inline
-int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
-{
- if (num_tc > TC_MAX_QUEUE)
- return -EINVAL;
-
- dev->num_tc = num_tc;
- return 0;
-}
+int netdev_txq_to_tc(struct net_device *dev, unsigned int txq);
+void netdev_reset_tc(struct net_device *dev);
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset);
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc);
static inline
int netdev_get_num_tc(struct net_device *dev)
@@ -2502,14 +2479,19 @@ static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
return NAPI_GRO_CB(skb)->frag0_len < hlen;
}
+static inline void skb_gro_frag0_invalidate(struct sk_buff *skb)
+{
+ NAPI_GRO_CB(skb)->frag0 = NULL;
+ NAPI_GRO_CB(skb)->frag0_len = 0;
+}
+
static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
unsigned int offset)
{
if (!pskb_may_pull(skb, hlen))
return NULL;
- NAPI_GRO_CB(skb)->frag0 = NULL;
- NAPI_GRO_CB(skb)->frag0_len = 0;
+ skb_gro_frag0_invalidate(skb);
return skb->data + offset;
}
@@ -2686,70 +2668,18 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
remcsum_unadjust((__sum16 *)ptr, grc->delta);
}
-struct skb_csum_offl_spec {
- __u16 ipv4_okay:1,
- ipv6_okay:1,
- encap_okay:1,
- ip_options_okay:1,
- ext_hdrs_okay:1,
- tcp_okay:1,
- udp_okay:1,
- sctp_okay:1,
- vlan_okay:1,
- no_encapped_ipv6:1,
- no_not_encapped:1;
-};
-
-bool __skb_csum_offload_chk(struct sk_buff *skb,
- const struct skb_csum_offl_spec *spec,
- bool *csum_encapped,
- bool csum_help);
-
-static inline bool skb_csum_offload_chk(struct sk_buff *skb,
- const struct skb_csum_offl_spec *spec,
- bool *csum_encapped,
- bool csum_help)
-{
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- return false;
-
- return __skb_csum_offload_chk(skb, spec, csum_encapped, csum_help);
-}
-
-static inline bool skb_csum_offload_chk_help(struct sk_buff *skb,
- const struct skb_csum_offl_spec *spec)
+#ifdef CONFIG_XFRM_OFFLOAD
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
{
- bool csum_encapped;
-
- return skb_csum_offload_chk(skb, spec, &csum_encapped, true);
+ if (PTR_ERR(pp) != -EINPROGRESS)
+ NAPI_GRO_CB(skb)->flush |= flush;
}
-
-static inline bool skb_csum_off_chk_help_cmn(struct sk_buff *skb)
-{
- static const struct skb_csum_offl_spec csum_offl_spec = {
- .ipv4_okay = 1,
- .ip_options_okay = 1,
- .ipv6_okay = 1,
- .vlan_okay = 1,
- .tcp_okay = 1,
- .udp_okay = 1,
- };
-
- return skb_csum_offload_chk_help(skb, &csum_offl_spec);
-}
-
-static inline bool skb_csum_off_chk_help_cmn_v4_only(struct sk_buff *skb)
+#else
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
{
- static const struct skb_csum_offl_spec csum_offl_spec = {
- .ipv4_okay = 1,
- .ip_options_okay = 1,
- .tcp_okay = 1,
- .udp_okay = 1,
- .vlan_okay = 1,
- };
-
- return skb_csum_offload_chk_help(skb, &csum_offl_spec);
+ NAPI_GRO_CB(skb)->flush |= flush;
}
+#endif
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
@@ -2778,6 +2708,8 @@ static inline bool dev_validate_header(const struct net_device *dev,
{
if (likely(len >= dev->hard_header_len))
return true;
+ if (len < dev->min_header_len)
+ return false;
if (capable(CAP_SYS_RAWIO)) {
memset(ll_header + len, 0, dev->hard_header_len - len);
@@ -3191,7 +3123,19 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev,
return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb));
}
-void netif_wake_subqueue(struct net_device *dev, u16 queue_index);
+/**
+ * netif_wake_subqueue - allow sending packets on subqueue
+ * @dev: network device
+ * @queue_index: sub queue index
+ *
+ * Resume individual transmit queue of a device with multiple transmit queues.
+ */
+static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
+{
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
+
+ netif_tx_wake_queue(txq);
+}
#ifdef CONFIG_XPS
int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
@@ -3345,7 +3289,7 @@ int dev_get_phys_port_id(struct net_device *dev,
int dev_get_phys_port_name(struct net_device *dev,
char *name, size_t len);
int dev_change_proto_down(struct net_device *dev, bool proto_down);
-int dev_change_xdp_fd(struct net_device *dev, int fd);
+int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags);
struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, int *ret);
@@ -3554,6 +3498,17 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
txq->xmit_lock_owner = cpu;
}
+static inline bool __netif_tx_acquire(struct netdev_queue *txq)
+{
+ __acquire(&txq->_xmit_lock);
+ return true;
+}
+
+static inline void __netif_tx_release(struct netdev_queue *txq)
+{
+ __release(&txq->_xmit_lock);
+}
+
static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
{
spin_lock_bh(&txq->_xmit_lock);
@@ -3655,17 +3610,21 @@ static inline void netif_tx_unlock_bh(struct net_device *dev)
#define HARD_TX_LOCK(dev, txq, cpu) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
__netif_tx_lock(txq, cpu); \
+ } else { \
+ __netif_tx_acquire(txq); \
} \
}
#define HARD_TX_TRYLOCK(dev, txq) \
(((dev->features & NETIF_F_LLTX) == 0) ? \
__netif_tx_trylock(txq) : \
- true )
+ __netif_tx_acquire(txq))
#define HARD_TX_UNLOCK(dev, txq) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
__netif_tx_unlock(txq); \
+ } else { \
+ __netif_tx_release(txq); \
} \
}
@@ -3870,6 +3829,10 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
extern int netdev_max_backlog;
extern int netdev_tstamp_prequeue;
extern int weight_p;
+extern int dev_weight_rx_bias;
+extern int dev_weight_tx_bias;
+extern int dev_rx_weight;
+extern int dev_tx_weight;
bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
@@ -3884,12 +3847,13 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
updev; \
updev = netdev_upper_get_next_dev_rcu(dev, &(iter)))
-/* iterate through upper list, must be called under RCU read lock */
-#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \
- for (iter = &(dev)->all_adj_list.upper, \
- updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \
- updev; \
- updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)))
+int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
+ int (*fn)(struct net_device *upper_dev,
+ void *data),
+ void *data);
+
+bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
+ struct net_device *upper_dev);
void *netdev_lower_get_next_private(struct net_device *dev,
struct list_head **iter);
@@ -3922,17 +3886,14 @@ struct net_device *netdev_all_lower_get_next(struct net_device *dev,
struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
struct list_head **iter);
-#define netdev_for_each_all_lower_dev(dev, ldev, iter) \
- for (iter = (dev)->all_adj_list.lower.next, \
- ldev = netdev_all_lower_get_next(dev, &(iter)); \
- ldev; \
- ldev = netdev_all_lower_get_next(dev, &(iter)))
-
-#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \
- for (iter = &(dev)->all_adj_list.lower, \
- ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \
- ldev; \
- ldev = netdev_all_lower_get_next_rcu(dev, &(iter)))
+int netdev_walk_all_lower_dev(struct net_device *dev,
+ int (*fn)(struct net_device *lower_dev,
+ void *data),
+ void *data);
+int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
+ int (*fn)(struct net_device *lower_dev,
+ void *data),
+ void *data);
void *netdev_adjacent_get_private(struct list_head *adj_list);
void *netdev_lower_get_first_private_rcu(struct net_device *dev);
@@ -3949,10 +3910,6 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
struct net_device *lower_dev);
void netdev_lower_state_changed(struct net_device *lower_dev,
void *lower_state_info);
-int netdev_default_l2upper_neigh_construct(struct net_device *dev,
- struct neighbour *n);
-void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
- struct neighbour *n);
/* RSS keys are 40 or 52 bytes long */
#define NETDEV_RSS_KEY_LEN 52
@@ -4009,19 +3966,6 @@ static inline bool can_checksum_protocol(netdev_features_t features,
}
}
-/* Map an ethertype into IP protocol if possible */
-static inline int eproto_to_ipproto(int eproto)
-{
- switch (eproto) {
- case htons(ETH_P_IP):
- return IPPROTO_IP;
- case htons(ETH_P_IPV6):
- return IPPROTO_IPV6;
- default:
- return -1;
- }
-}
-
#ifdef CONFIG_BUG
void netdev_rx_csum_fault(struct net_device *dev);
#else
@@ -4418,6 +4362,15 @@ do { \
})
#endif
+/* if @cond then downgrade to debug, else print at @level */
+#define netif_cond_dbg(priv, type, netdev, cond, level, fmt, args...) \
+ do { \
+ if (cond) \
+ netif_dbg(priv, type, netdev, fmt, ##args); \
+ else \
+ netif_ ## level(priv, type, netdev, fmt, ##args); \
+ } while (0)
+
#if defined(VERBOSE_DEBUG)
#define netif_vdbg netif_dbg
#else