From e227867f12302633737bd2a48a10a9a72c0630cb Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 18 Feb 2014 22:54:36 +0900 Subject: treewide: Fix typo in Documentation/DocBook This patch fix spelling typo in Documentation/DocBook. It is because .html and .xml files are generated by make htmldocs, I have to fix a typo within the source files. Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d2b87dbbbb1a..70d2da3bfb0d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3424,7 +3424,7 @@ out: * @rx_handler: receive handler to register * @rx_handler_data: data pointer that is used by rx handler * - * Register a receive hander for a device. This handler will then be + * Register a receive handler for a device. This handler will then be * called from __netif_receive_skb. A negative errno code is returned * on a failure. * -- cgit From fce8ad1568c57e7f334018dec4fa1744c926c135 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 24 Feb 2014 16:40:01 +0100 Subject: smp: Remove wait argument from __smp_call_function_single() The main point of calling __smp_call_function_single() is to send an IPI in a pure asynchronous way. By embedding a csd in an object, a caller can send the IPI without waiting for a previous one to complete as is required by smp_call_function_single() for example. As such, sending this kind of IPI can be safe even when irqs are disabled. This flexibility comes at the expense of the caller who then needs to synchronize the csd lifecycle by himself and make sure that IPIs on a single csd are serialized. This is how __smp_call_function_single() works when wait = 0 and this usecase is relevant. Now there don't seem to be any usecase with wait = 1 that can't be covered by smp_call_function_single() instead, which is safer. Lets look at the two possible scenario: 1) The user calls __smp_call_function_single(wait = 1) on a csd embedded in an object. It looks like a nice and convenient pattern at the first sight because we can then retrieve the object from the IPI handler easily. But actually it is a waste of memory space in the object since the csd can be allocated from the stack by smp_call_function_single(wait = 1) and the object can be passed an the IPI argument. Besides that, embedding the csd in an object is more error prone because the caller must take care of the serialization of the IPIs for this csd. 2) The user calls __smp_call_function_single(wait = 1) on a csd that is allocated on the stack. It's ok but smp_call_function_single() can do it as well and it already takes care of the allocation on the stack. Again it's more simple and less error prone. Therefore, using the underscore prepend API version with wait = 1 is a bad pattern and a sign that the caller can do safer and more simple. There was a single user of that which has just been converted. So lets remove this option to discourage further users. Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- block/blk-softirq.c | 2 +- drivers/cpuidle/coupled.c | 2 +- include/linux/smp.h | 2 +- kernel/sched/core.c | 2 +- kernel/smp.c | 19 ++++--------------- kernel/up.c | 3 +-- net/core/dev.c | 2 +- 8 files changed, 11 insertions(+), 23 deletions(-) (limited to 'net/core/dev.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 1fa9dd153fde..62154edf1489 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -353,7 +353,7 @@ void __blk_mq_complete_request(struct request *rq) rq->csd.func = __blk_mq_complete_request_remote; rq->csd.info = rq; rq->csd.flags = 0; - __smp_call_function_single(ctx->cpu, &rq->csd, 0); + __smp_call_function_single(ctx->cpu, &rq->csd); } else { rq->q->softirq_done_fn(rq); } diff --git a/block/blk-softirq.c b/block/blk-softirq.c index b5c37d96cf0e..6345b7ebd0df 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -70,7 +70,7 @@ static int raise_blk_irq(int cpu, struct request *rq) data->info = rq; data->flags = 0; - __smp_call_function_single(cpu, data, 0); + __smp_call_function_single(cpu, data); return 0; } diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index e952936418d0..04115947accc 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -323,7 +323,7 @@ static void cpuidle_coupled_poke(int cpu) struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) - __smp_call_function_single(cpu, csd, 0); + __smp_call_function_single(cpu, csd); } /** diff --git a/include/linux/smp.h b/include/linux/smp.h index c39074c794c5..b410a1f23281 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,7 +50,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags); -int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait); +int __smp_call_function_single(int cpu, struct call_single_data *csd); #ifdef CONFIG_SMP diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b46131ef6aab..eba3d84765f3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -432,7 +432,7 @@ void hrtick_start(struct rq *rq, u64 delay) if (rq == this_rq()) { __hrtick_restart(rq); } else if (!rq->hrtick_csd_pending) { - __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0); + __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); rq->hrtick_csd_pending = 1; } } diff --git a/kernel/smp.c b/kernel/smp.c index fa04ab938e52..b76763189752 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -241,29 +241,18 @@ EXPORT_SYMBOL(smp_call_function_single); * __smp_call_function_single(): Run a function on a specific CPU * @cpu: The CPU to run on. * @csd: Pre-allocated and setup data structure - * @wait: If true, wait until function has completed on specified CPU. * * Like smp_call_function_single(), but allow caller to pass in a * pre-allocated data structure. Useful for embedding @data inside * other structures, for instance. */ -int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait) +int __smp_call_function_single(int cpu, struct call_single_data *csd) { int err = 0; - int this_cpu; - this_cpu = get_cpu(); - /* - * Can deadlock when called with interrupts disabled. - * We allow cpu's that are not yet online though, as no one else can - * send smp call function interrupt to this cpu and as such deadlocks - * can't happen. - */ - WARN_ON_ONCE(cpu_online(this_cpu) && wait && irqs_disabled() - && !oops_in_progress); - - err = generic_exec_single(cpu, csd, csd->func, csd->info, wait); - put_cpu(); + preempt_disable(); + err = generic_exec_single(cpu, csd, csd->func, csd->info, 0); + preempt_enable(); return err; } diff --git a/kernel/up.c b/kernel/up.c index cdf03d16840e..4e199d4cef8e 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -22,8 +22,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); -int __smp_call_function_single(int cpu, struct call_single_data *csd, - int wait) +int __smp_call_function_single(int cpu, struct call_single_data *csd) { unsigned long flags; diff --git a/net/core/dev.c b/net/core/dev.c index 4ad1b78c9c77..d1298128bff4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4129,7 +4129,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) if (cpu_online(remsd->cpu)) __smp_call_function_single(remsd->cpu, - &remsd->csd, 0); + &remsd->csd); remsd = next; } } else -- cgit From c46fff2a3b29794b35d717b5680a27f31a6a6bc0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 24 Feb 2014 16:40:02 +0100 Subject: smp: Rename __smp_call_function_single() to smp_call_function_single_async() The name __smp_call_function_single() doesn't tell much about the properties of this function, especially when compared to smp_call_function_single(). The comments above the implementation are also misleading. The main point of this function is actually not to be able to embed the csd in an object. This is actually a requirement that result from the purpose of this function which is to raise an IPI asynchronously. As such it can be called with interrupts disabled. And this feature comes at the cost of the caller who then needs to serialize the IPIs on this csd. Lets rename the function and enhance the comments so that they reflect these properties. Suggested-by: Christoph Hellwig Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- block/blk-softirq.c | 2 +- drivers/cpuidle/coupled.c | 2 +- include/linux/smp.h | 2 +- kernel/sched/core.c | 2 +- kernel/smp.c | 19 +++++++++++++------ kernel/up.c | 4 ++-- net/core/dev.c | 2 +- 8 files changed, 21 insertions(+), 14 deletions(-) (limited to 'net/core/dev.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 62154edf1489..6468a715a0e4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -353,7 +353,7 @@ void __blk_mq_complete_request(struct request *rq) rq->csd.func = __blk_mq_complete_request_remote; rq->csd.info = rq; rq->csd.flags = 0; - __smp_call_function_single(ctx->cpu, &rq->csd); + smp_call_function_single_async(ctx->cpu, &rq->csd); } else { rq->q->softirq_done_fn(rq); } diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 6345b7ebd0df..ebd6b6f1bdeb 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -70,7 +70,7 @@ static int raise_blk_irq(int cpu, struct request *rq) data->info = rq; data->flags = 0; - __smp_call_function_single(cpu, data); + smp_call_function_single_async(cpu, data); return 0; } diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 04115947accc..cb6654bfad77 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -323,7 +323,7 @@ static void cpuidle_coupled_poke(int cpu) struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) - __smp_call_function_single(cpu, csd); + smp_call_function_single_async(cpu, csd); } /** diff --git a/include/linux/smp.h b/include/linux/smp.h index b410a1f23281..633f5edd7470 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,7 +50,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags); -int __smp_call_function_single(int cpu, struct call_single_data *csd); +int smp_call_function_single_async(int cpu, struct call_single_data *csd); #ifdef CONFIG_SMP diff --git a/kernel/sched/core.c b/kernel/sched/core.c index eba3d84765f3..0cca04a53de0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -432,7 +432,7 @@ void hrtick_start(struct rq *rq, u64 delay) if (rq == this_rq()) { __hrtick_restart(rq); } else if (!rq->hrtick_csd_pending) { - __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); + smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd); rq->hrtick_csd_pending = 1; } } diff --git a/kernel/smp.c b/kernel/smp.c index b76763189752..06d574e42c72 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -238,15 +238,22 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, EXPORT_SYMBOL(smp_call_function_single); /** - * __smp_call_function_single(): Run a function on a specific CPU + * smp_call_function_single_async(): Run an asynchronous function on a + * specific CPU. * @cpu: The CPU to run on. * @csd: Pre-allocated and setup data structure * - * Like smp_call_function_single(), but allow caller to pass in a - * pre-allocated data structure. Useful for embedding @data inside - * other structures, for instance. + * Like smp_call_function_single(), but the call is asynchonous and + * can thus be done from contexts with disabled interrupts. + * + * The caller passes his own pre-allocated data structure + * (ie: embedded in an object) and is responsible for synchronizing it + * such that the IPIs performed on the @csd are strictly serialized. + * + * NOTE: Be careful, there is unfortunately no current debugging facility to + * validate the correctness of this serialization. */ -int __smp_call_function_single(int cpu, struct call_single_data *csd) +int smp_call_function_single_async(int cpu, struct call_single_data *csd) { int err = 0; @@ -256,7 +263,7 @@ int __smp_call_function_single(int cpu, struct call_single_data *csd) return err; } -EXPORT_SYMBOL_GPL(__smp_call_function_single); +EXPORT_SYMBOL_GPL(smp_call_function_single_async); /* * smp_call_function_any - Run a function on any of the given cpus diff --git a/kernel/up.c b/kernel/up.c index 4e199d4cef8e..1760bf3d1463 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -22,7 +22,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); -int __smp_call_function_single(int cpu, struct call_single_data *csd) +int smp_call_function_single_async(int cpu, struct call_single_data *csd) { unsigned long flags; @@ -31,7 +31,7 @@ int __smp_call_function_single(int cpu, struct call_single_data *csd) local_irq_restore(flags); return 0; } -EXPORT_SYMBOL(__smp_call_function_single); +EXPORT_SYMBOL(smp_call_function_single_async); int on_each_cpu(smp_call_func_t func, void *info, int wait) { diff --git a/net/core/dev.c b/net/core/dev.c index d1298128bff4..ac7a2abb7f1a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4128,7 +4128,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) struct softnet_data *next = remsd->rps_ipi_next; if (cpu_online(remsd->cpu)) - __smp_call_function_single(remsd->cpu, + smp_call_function_single_async(remsd->cpu, &remsd->csd); remsd = next; } -- cgit From 2b8837aeaaa0bb6b4b3be1b3afd1cc088f68a362 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Mar 2014 10:04:17 -0700 Subject: net: Convert uses of __constant_ to The use of __constant_ has been unnecessary for quite awhile now. Make these uses consistent with the rest of the kernel. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/core/dev.c | 10 +++++----- net/core/flow_dissector.c | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b1b0c8d4d7df..587f9fb85d73 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3495,11 +3495,11 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); static bool skb_pfmemalloc_protocol(struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_ARP): - case __constant_htons(ETH_P_IP): - case __constant_htons(ETH_P_IPV6): - case __constant_htons(ETH_P_8021Q): - case __constant_htons(ETH_P_8021AD): + case htons(ETH_P_ARP): + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): return true; default: return false; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index e29e810663d7..80201bf69d59 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -61,7 +61,7 @@ bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) again: switch (proto) { - case __constant_htons(ETH_P_IP): { + case htons(ETH_P_IP): { const struct iphdr *iph; struct iphdr _iph; ip: @@ -77,7 +77,7 @@ ip: iph_to_flow_copy_addrs(flow, iph); break; } - case __constant_htons(ETH_P_IPV6): { + case htons(ETH_P_IPV6): { const struct ipv6hdr *iph; struct ipv6hdr _iph; ipv6: @@ -91,8 +91,8 @@ ipv6: nhoff += sizeof(struct ipv6hdr); break; } - case __constant_htons(ETH_P_8021AD): - case __constant_htons(ETH_P_8021Q): { + case htons(ETH_P_8021AD): + case htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; struct vlan_hdr _vlan; @@ -104,7 +104,7 @@ ipv6: nhoff += sizeof(*vlan); goto again; } - case __constant_htons(ETH_P_PPP_SES): { + case htons(ETH_P_PPP_SES): { struct { struct pppoe_hdr hdr; __be16 proto; @@ -115,9 +115,9 @@ ipv6: proto = hdr->proto; nhoff += PPPOE_SES_HLEN; switch (proto) { - case __constant_htons(PPP_IP): + case htons(PPP_IP): goto ip; - case __constant_htons(PPP_IPV6): + case htons(PPP_IPV6): goto ipv6; default: return false; -- cgit From 9c62a68d13119a1ca9718381d97b0cb415ff4e9d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 14 Mar 2014 20:51:52 -0700 Subject: netpoll: Remove dead packet receive code (CONFIG_NETPOLL_TRAP) The netpoll packet receive code only becomes active if the netpoll rx_skb_hook is implemented, and there is not a single implementation of the netpoll rx_skb_hook in the kernel. All of the out of tree implementations I have found all call netpoll_poll which was removed from the kernel in 2011, so this change should not add any additional breakage. There are problems with the netpoll packet receive code. __netpoll_rx does not call dev_kfree_skb_irq or dev_kfree_skb_any in hard irq context. netpoll_neigh_reply leaks every skb it receives. Reception of packets does not work successfully on stacked devices (aka bonding, team, bridge, and vlans). Given that the netpoll packet receive code is buggy, there are no out of tree users that will be merged soon, and the code has not been used for in tree for a decade let's just remove it. Reverting this commit can server as a starting point for anyone who wants to resurrect netpoll packet reception support. Acked-by: Eric Dumazet Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- drivers/net/Kconfig | 5 - include/linux/netdevice.h | 17 -- include/linux/netpoll.h | 84 -------- net/core/dev.c | 11 +- net/core/netpoll.c | 520 +--------------------------------------------- 5 files changed, 2 insertions(+), 635 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 494b888a6568..89402c3b64f8 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -177,11 +177,6 @@ config NETCONSOLE_DYNAMIC config NETPOLL def_bool NETCONSOLE -config NETPOLL_TRAP - bool "Netpoll traffic trapping" - default n - depends on NETPOLL - config NET_POLL_CONTROLLER def_bool NETPOLL diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b8d8c805fd75..4b6d12c7b803 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1979,9 +1979,6 @@ struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); -#ifdef CONFIG_NETPOLL_TRAP -int netpoll_trap(void); -#endif int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb); static inline unsigned int skb_gro_offset(const struct sk_buff *skb) @@ -2186,12 +2183,6 @@ static inline void netif_tx_start_all_queues(struct net_device *dev) static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) { -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) { - netif_tx_start_queue(dev_queue); - return; - } -#endif if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) __netif_schedule(dev_queue->qdisc); } @@ -2435,10 +2426,6 @@ static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) - return; -#endif netif_tx_stop_queue(txq); } @@ -2473,10 +2460,6 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev, static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) - return; -#endif if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) __netif_schedule(txq->qdisc); } diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index a0632af88d8b..1b475a5a7239 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -31,12 +31,6 @@ struct netpoll { u8 remote_mac[ETH_ALEN]; struct work_struct cleanup_work; - -#ifdef CONFIG_NETPOLL_TRAP - void (*rx_skb_hook)(struct netpoll *np, int source, struct sk_buff *skb, - int offset, int len); - struct list_head rx; /* rx_np list element */ -#endif }; struct netpoll_info { @@ -50,12 +44,6 @@ struct netpoll_info { struct netpoll *netpoll; struct rcu_head rcu; - -#ifdef CONFIG_NETPOLL_TRAP - spinlock_t rx_lock; - struct list_head rx_np; /* netpolls that registered an rx_skb_hook */ - struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */ -#endif }; #ifdef CONFIG_NETPOLL @@ -84,78 +72,6 @@ static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) local_irq_restore(flags); } -#ifdef CONFIG_NETPOLL_TRAP -int netpoll_trap(void); -void netpoll_set_trap(int trap); -int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo); -static inline bool netpoll_rx_processing(struct netpoll_info *npinfo) -{ - return !list_empty(&npinfo->rx_np); -} - -static inline bool netpoll_rx_on(struct sk_buff *skb) -{ - struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo); - - return npinfo && netpoll_rx_processing(npinfo); -} - -static inline bool netpoll_rx(struct sk_buff *skb) -{ - struct netpoll_info *npinfo; - unsigned long flags; - bool ret = false; - - local_irq_save(flags); - - if (!netpoll_rx_on(skb)) - goto out; - - npinfo = rcu_dereference_bh(skb->dev->npinfo); - spin_lock(&npinfo->rx_lock); - /* check rx_processing again with the lock held */ - if (netpoll_rx_processing(npinfo) && __netpoll_rx(skb, npinfo)) - ret = true; - spin_unlock(&npinfo->rx_lock); - -out: - local_irq_restore(flags); - return ret; -} - -static inline int netpoll_receive_skb(struct sk_buff *skb) -{ - if (!list_empty(&skb->dev->napi_list)) - return netpoll_rx(skb); - return 0; -} - -#else -static inline int netpoll_trap(void) -{ - return 0; -} -static inline void netpoll_set_trap(int trap) -{ -} -static inline bool netpoll_rx_processing(struct netpoll_info *npinfo) -{ - return false; -} -static inline bool netpoll_rx(struct sk_buff *skb) -{ - return false; -} -static inline bool netpoll_rx_on(struct sk_buff *skb) -{ - return false; -} -static inline int netpoll_receive_skb(struct sk_buff *skb) -{ - return 0; -} -#endif - #ifdef CONFIG_NETPOLL static inline void *netpoll_poll_lock(struct napi_struct *napi) { diff --git a/net/core/dev.c b/net/core/dev.c index 587f9fb85d73..55f8e64c03a2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3231,10 +3231,6 @@ static int netif_rx_internal(struct sk_buff *skb) { int ret; - /* if netpoll wants it, pretend we never saw it */ - if (netpoll_rx(skb)) - return NET_RX_DROP; - net_timestamp_check(netdev_tstamp_prequeue, skb); trace_netif_rx(skb); @@ -3520,10 +3516,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) trace_netif_receive_skb(skb); - /* if we've gotten here through NAPI, check netpoll */ - if (netpoll_receive_skb(skb)) - goto out; - orig_dev = skb->dev; skb_reset_network_header(skb); @@ -3650,7 +3642,6 @@ drop: unlock: rcu_read_unlock(); -out: return ret; } @@ -3875,7 +3866,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff int same_flow; enum gro_result ret; - if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) + if (!(skb->dev->features & NETIF_F_GRO)) goto normal; if (skb_is_gso(skb) || skb_has_frag_list(skb)) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index eed8b1d2d302..7291dde93469 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -46,11 +46,6 @@ static struct sk_buff_head skb_pool; -#ifdef CONFIG_NETPOLL_TRAP -static atomic_t trapped; -static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); -#endif - DEFINE_STATIC_SRCU(netpoll_srcu); #define USEC_PER_POLL 50 @@ -109,27 +104,6 @@ static void queue_process(struct work_struct *work) } } -#ifdef CONFIG_NETPOLL_TRAP -static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, __be32 saddr, __be32 daddr) -{ - __wsum psum; - - if (uh->check == 0 || skb_csum_unnecessary(skb)) - return 0; - - psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); - - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_fold(csum_add(psum, skb->csum))) - return 0; - - skb->csum = psum; - - return __skb_checksum_complete(skb); -} -#endif /* CONFIG_NETPOLL_TRAP */ - /* * Check whether delayed processing was scheduled for our NIC. If so, * we attempt to grab the poll lock and use ->poll() to pump the card. @@ -140,11 +114,6 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, * trylock here and interrupts are already disabled in the softirq * case. Further, we test the poll_owner to avoid recursion on UP * systems where the lock doesn't exist. - * - * In cases where there is bi-directional communications, reading only - * one message at a time can lead to packets being dropped by the - * network adapter, forcing superfluous retries and possibly timeouts. - * Thus, we set our budget to greater than 1. */ static int poll_one_napi(struct napi_struct *napi, int budget) { @@ -181,38 +150,11 @@ static void poll_napi(struct net_device *dev, int budget) } } -#ifdef CONFIG_NETPOLL_TRAP -static void service_neigh_queue(struct net_device *dev, - struct netpoll_info *npi) -{ - struct sk_buff *skb; - if (dev->flags & IFF_SLAVE) { - struct net_device *bond_dev; - struct netpoll_info *bond_ni; - - bond_dev = netdev_master_upper_dev_get_rcu(dev); - bond_ni = rcu_dereference_bh(bond_dev->npinfo); - while ((skb = skb_dequeue(&npi->neigh_tx))) { - skb->dev = bond_dev; - skb_queue_tail(&bond_ni->neigh_tx, skb); - } - } - while ((skb = skb_dequeue(&npi->neigh_tx))) - netpoll_neigh_reply(skb, npi); -} -#else /* !CONFIG_NETPOLL_TRAP */ -static inline void service_neigh_queue(struct net_device *dev, - struct netpoll_info *npi) -{ -} -#endif /* CONFIG_NETPOLL_TRAP */ - static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); - bool rx_processing = netpoll_rx_processing(ni); - int budget = rx_processing? 16 : 0; + int budget = 0; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity @@ -226,9 +168,6 @@ static void netpoll_poll_dev(struct net_device *dev) return; } - if (rx_processing) - netpoll_set_trap(1); - ops = dev->netdev_ops; if (!ops->ndo_poll_controller) { up(&ni->dev_lock); @@ -240,13 +179,8 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev, budget); - if (rx_processing) - netpoll_set_trap(0); - up(&ni->dev_lock); - service_neigh_queue(dev, ni); - zap_completion_queue(); } @@ -531,434 +465,6 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -#ifdef CONFIG_NETPOLL_TRAP -static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo) -{ - int size, type = ARPOP_REPLY; - __be32 sip, tip; - unsigned char *sha; - struct sk_buff *send_skb; - struct netpoll *np, *tmp; - unsigned long flags; - int hlen, tlen; - int hits = 0, proto; - - if (!netpoll_rx_processing(npinfo)) - return; - - /* Before checking the packet, we do some early - inspection whether this is interesting at all */ - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->dev == skb->dev) - hits++; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - - /* No netpoll struct is using this dev */ - if (!hits) - return; - - proto = ntohs(eth_hdr(skb)->h_proto); - if (proto == ETH_P_ARP) { - struct arphdr *arp; - unsigned char *arp_ptr; - /* No arp on this interface */ - if (skb->dev->flags & IFF_NOARP) - return; - - if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) - return; - - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - arp = arp_hdr(skb); - - if ((arp->ar_hrd != htons(ARPHRD_ETHER) && - arp->ar_hrd != htons(ARPHRD_IEEE802)) || - arp->ar_pro != htons(ETH_P_IP) || - arp->ar_op != htons(ARPOP_REQUEST)) - return; - - arp_ptr = (unsigned char *)(arp+1); - /* save the location of the src hw addr */ - sha = arp_ptr; - arp_ptr += skb->dev->addr_len; - memcpy(&sip, arp_ptr, 4); - arp_ptr += 4; - /* If we actually cared about dst hw addr, - it would get copied here */ - arp_ptr += skb->dev->addr_len; - memcpy(&tip, arp_ptr, 4); - - /* Should we ignore arp? */ - if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) - return; - - size = arp_hdr_len(skb->dev); - - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (tip != np->local_ip.ip) - continue; - - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; - - skb_reset_network_header(send_skb); - arp = (struct arphdr *) skb_put(send_skb, size); - send_skb->dev = skb->dev; - send_skb->protocol = htons(ETH_P_ARP); - - /* Fill the device header for the ARP frame */ - if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP, - sha, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; - } - - /* - * Fill out the arp protocol part. - * - * we only support ethernet device type, - * which (according to RFC 1390) should - * always equal 1 (Ethernet). - */ - - arp->ar_hrd = htons(np->dev->type); - arp->ar_pro = htons(ETH_P_IP); - arp->ar_hln = np->dev->addr_len; - arp->ar_pln = 4; - arp->ar_op = htons(type); - - arp_ptr = (unsigned char *)(arp + 1); - memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &tip, 4); - arp_ptr += 4; - memcpy(arp_ptr, sha, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &sip, 4); - - netpoll_send_skb(np, send_skb); - - /* If there are several rx_skb_hooks for the same - * address we're fine by sending a single reply - */ - break; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } else if( proto == ETH_P_IPV6) { -#if IS_ENABLED(CONFIG_IPV6) - struct nd_msg *msg; - u8 *lladdr = NULL; - struct ipv6hdr *hdr; - struct icmp6hdr *icmp6h; - const struct in6_addr *saddr; - const struct in6_addr *daddr; - struct inet6_dev *in6_dev = NULL; - struct in6_addr *target; - - in6_dev = in6_dev_get(skb->dev); - if (!in6_dev || !in6_dev->cnf.accept_ra) - return; - - if (!pskb_may_pull(skb, skb->len)) - return; - - msg = (struct nd_msg *)skb_transport_header(skb); - - __skb_push(skb, skb->data - skb_transport_header(skb)); - - if (ipv6_hdr(skb)->hop_limit != 255) - return; - if (msg->icmph.icmp6_code != 0) - return; - if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) - return; - - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; - - size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); - - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) - continue; - - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; - - send_skb->protocol = htons(ETH_P_IPV6); - send_skb->dev = skb->dev; - - skb_reset_network_header(send_skb); - hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr)); - *(__be32*)hdr = htonl(0x60000000); - hdr->payload_len = htons(size); - hdr->nexthdr = IPPROTO_ICMPV6; - hdr->hop_limit = 255; - hdr->saddr = *saddr; - hdr->daddr = *daddr; - - icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr)); - icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; - icmp6h->icmp6_router = 0; - icmp6h->icmp6_solicited = 1; - - target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr)); - *target = msg->target; - icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, - IPPROTO_ICMPV6, - csum_partial(icmp6h, - size, 0)); - - if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6, - lladdr, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; - } - - netpoll_send_skb(np, send_skb); - - /* If there are several rx_skb_hooks for the same - * address, we're fine by sending a single reply - */ - break; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); -#endif - } -} - -static bool pkt_is_ns(struct sk_buff *skb) -{ - struct nd_msg *msg; - struct ipv6hdr *hdr; - - if (skb->protocol != htons(ETH_P_ARP)) - return false; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) - return false; - - msg = (struct nd_msg *)skb_transport_header(skb); - __skb_push(skb, skb->data - skb_transport_header(skb)); - hdr = ipv6_hdr(skb); - - if (hdr->nexthdr != IPPROTO_ICMPV6) - return false; - if (hdr->hop_limit != 255) - return false; - if (msg->icmph.icmp6_code != 0) - return false; - if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) - return false; - - return true; -} - -int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) -{ - int proto, len, ulen, data_len; - int hits = 0, offset; - const struct iphdr *iph; - struct udphdr *uh; - struct netpoll *np, *tmp; - uint16_t source; - - if (!netpoll_rx_processing(npinfo)) - goto out; - - if (skb->dev->type != ARPHRD_ETHER) - goto out; - - /* check if netpoll clients need ARP */ - if (skb->protocol == htons(ETH_P_ARP) && netpoll_trap()) { - skb_queue_tail(&npinfo->neigh_tx, skb); - return 1; - } else if (pkt_is_ns(skb) && netpoll_trap()) { - skb_queue_tail(&npinfo->neigh_tx, skb); - return 1; - } - - if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { - skb = vlan_untag(skb); - if (unlikely(!skb)) - goto out; - } - - proto = ntohs(eth_hdr(skb)->h_proto); - if (proto != ETH_P_IP && proto != ETH_P_IPV6) - goto out; - if (skb->pkt_type == PACKET_OTHERHOST) - goto out; - if (skb_shared(skb)) - goto out; - - if (proto == ETH_P_IP) { - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - iph = (struct iphdr *)skb->data; - if (iph->ihl < 5 || iph->version != 4) - goto out; - if (!pskb_may_pull(skb, iph->ihl*4)) - goto out; - iph = (struct iphdr *)skb->data; - if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) - goto out; - - len = ntohs(iph->tot_len); - if (skb->len < len || len < iph->ihl*4) - goto out; - - /* - * Our transport medium may have padded the buffer out. - * Now We trim to the true length of the frame. - */ - if (pskb_trim_rcsum(skb, len)) - goto out; - - iph = (struct iphdr *)skb->data; - if (iph->protocol != IPPROTO_UDP) - goto out; - - len -= iph->ihl*4; - uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); - offset = (unsigned char *)(uh + 1) - skb->data; - ulen = ntohs(uh->len); - data_len = skb->len - offset; - source = ntohs(uh->source); - - if (ulen != len) - goto out; - if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) - goto out; - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->local_ip.ip && np->local_ip.ip != iph->daddr) - continue; - if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; - - np->rx_skb_hook(np, source, skb, offset, data_len); - hits++; - } - } else { -#if IS_ENABLED(CONFIG_IPV6) - const struct ipv6hdr *ip6h; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto out; - ip6h = (struct ipv6hdr *)skb->data; - if (ip6h->version != 6) - goto out; - len = ntohs(ip6h->payload_len); - if (!len) - goto out; - if (len + sizeof(struct ipv6hdr) > skb->len) - goto out; - if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr))) - goto out; - ip6h = ipv6_hdr(skb); - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - uh = udp_hdr(skb); - offset = (unsigned char *)(uh + 1) - skb->data; - ulen = ntohs(uh->len); - data_len = skb->len - offset; - source = ntohs(uh->source); - if (ulen != skb->len) - goto out; - if (udp6_csum_init(skb, uh, IPPROTO_UDP)) - goto out; - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr)) - continue; - if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr)) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; - - np->rx_skb_hook(np, source, skb, offset, data_len); - hits++; - } -#endif - } - - if (!hits) - goto out; - - kfree_skb(skb); - return 1; - -out: - if (netpoll_trap()) { - kfree_skb(skb); - return 1; - } - - return 0; -} - -static void netpoll_trap_setup_info(struct netpoll_info *npinfo) -{ - INIT_LIST_HEAD(&npinfo->rx_np); - spin_lock_init(&npinfo->rx_lock); - skb_queue_head_init(&npinfo->neigh_tx); -} - -static void netpoll_trap_cleanup_info(struct netpoll_info *npinfo) -{ - skb_queue_purge(&npinfo->neigh_tx); -} - -static void netpoll_trap_setup(struct netpoll *np, struct netpoll_info *npinfo) -{ - unsigned long flags; - if (np->rx_skb_hook) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_add_tail(&np->rx, &npinfo->rx_np); - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } -} - -static void netpoll_trap_cleanup(struct netpoll *np, struct netpoll_info *npinfo) -{ - unsigned long flags; - if (!list_empty(&npinfo->rx_np)) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_del(&np->rx); - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } -} - -#else /* !CONFIG_NETPOLL_TRAP */ -static inline void netpoll_trap_setup_info(struct netpoll_info *npinfo) -{ -} -static inline void netpoll_trap_cleanup_info(struct netpoll_info *npinfo) -{ -} -static inline -void netpoll_trap_setup(struct netpoll *np, struct netpoll_info *npinfo) -{ -} -static inline -void netpoll_trap_cleanup(struct netpoll *np, struct netpoll_info *npinfo) -{ -} -#endif /* CONFIG_NETPOLL_TRAP */ - void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); @@ -1103,8 +609,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) goto out; } - netpoll_trap_setup_info(npinfo); - sema_init(&npinfo->dev_lock, 1); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -1124,8 +628,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) npinfo->netpoll = np; - netpoll_trap_setup(np, npinfo); - /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); @@ -1274,7 +776,6 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) struct netpoll_info *npinfo = container_of(rcu_head, struct netpoll_info, rcu); - netpoll_trap_cleanup_info(npinfo); skb_queue_purge(&npinfo->txq); /* we can't call cancel_delayed_work_sync here, as we are in softirq */ @@ -1299,8 +800,6 @@ void __netpoll_cleanup(struct netpoll *np) if (!npinfo) return; - netpoll_trap_cleanup(np, npinfo); - synchronize_srcu(&netpoll_srcu); if (atomic_dec_and_test(&npinfo->refcnt)) { @@ -1344,20 +843,3 @@ out: rtnl_unlock(); } EXPORT_SYMBOL(netpoll_cleanup); - -#ifdef CONFIG_NETPOLL_TRAP -int netpoll_trap(void) -{ - return atomic_read(&trapped); -} -EXPORT_SYMBOL(netpoll_trap); - -void netpoll_set_trap(int trap) -{ - if (trap) - atomic_inc(&trapped); - else - atomic_dec(&trapped); -} -EXPORT_SYMBOL(netpoll_set_trap); -#endif /* CONFIG_NETPOLL_TRAP */ -- cgit From 61b905da33ae25edb6b9d2a5de21e34c3a77efe3 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 24 Mar 2014 15:34:47 -0700 Subject: net: Rename skb->rxhash to skb->hash The packet hash can be considered a property of the packet, not just on RX path. This patch changes name of rxhash and l4_rxhash skbuff fields to be hash and l4_hash respectively. This includes changing uses of the field in the code which don't call the access functions. Signed-off-by: Tom Herbert Signed-off-by: Eric Dumazet Cc: Mahesh Bandewar Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 4 ++-- arch/powerpc/net/bpf_jit_comp.c | 4 ++-- arch/s390/net/bpf_jit_comp.c | 8 ++++---- arch/sparc/net/bpf_jit_comp.c | 2 +- arch/x86/net/bpf_jit_comp.c | 8 ++++---- include/linux/skbuff.h | 28 ++++++++++++++-------------- include/net/sock.h | 4 ++-- include/trace/events/net.h | 12 ++++++------ net/core/dev.c | 13 +++++++------ net/core/filter.c | 2 +- net/core/flow_dissector.c | 10 +++++----- net/packet/af_packet.c | 3 +-- 12 files changed, 49 insertions(+), 49 deletions(-) (limited to 'net/core/dev.c') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 271b5e971568..7ddb9c83cdfc 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -825,8 +825,8 @@ b_epilogue: break; case BPF_S_ANC_RXHASH: ctx->seen |= SEEN_SKB; - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); - off = offsetof(struct sk_buff, rxhash); + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + off = offsetof(struct sk_buff, hash); emit(ARM_LDR_I(r_A, r_skb, off), ctx); break; case BPF_S_ANC_VLAN_TAG: diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 555034f8505e..4afad6c17d50 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -390,9 +390,9 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, mark)); break; case BPF_S_ANC_RXHASH: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - rxhash)); + hash)); break; case BPF_S_ANC_VLAN_TAG: case BPF_S_ANC_VLAN_TAG_PRESENT: diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 708d60e40066..153f8f2cfd56 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -737,10 +737,10 @@ call_fn: /* lg %r1,(%r13) */ /* icm %r5,3,(%r1) */ EMIT4_DISP(0xbf531000, offsetof(struct net_device, type)); break; - case BPF_S_ANC_RXHASH: /* A = skb->rxhash */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); - /* l %r5,(%r2) */ - EMIT4_DISP(0x58502000, offsetof(struct sk_buff, rxhash)); + case BPF_S_ANC_RXHASH: /* A = skb->hash */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + /* l %r5,(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash)); break; case BPF_S_ANC_VLAN_TAG: case BPF_S_ANC_VLAN_TAG_PRESENT: diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 01fe9946d388..d96d2a7c78ee 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -618,7 +618,7 @@ void bpf_jit_compile(struct sk_filter *fp) emit_load16(r_A, struct net_device, type, r_A); break; case BPF_S_ANC_RXHASH: - emit_skb_load32(rxhash, r_A); + emit_skb_load32(hash, r_A); break; case BPF_S_ANC_VLAN_TAG: case BPF_S_ANC_VLAN_TAG_PRESENT: diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4ed75dd81d05..293c57b74edc 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -553,13 +553,13 @@ void bpf_jit_compile(struct sk_filter *fp) } break; case BPF_S_ANC_RXHASH: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); - if (is_imm8(offsetof(struct sk_buff, rxhash))) { + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + if (is_imm8(offsetof(struct sk_buff, hash))) { /* mov off8(%rdi),%eax */ - EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash)); + EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash)); } else { EMIT2(0x8b, 0x87); - EMIT(offsetof(struct sk_buff, rxhash), 4); + EMIT(offsetof(struct sk_buff, hash), 4); } break; case BPF_S_ANC_QUEUE: diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 03db95ab8a8c..aa2c22cb8158 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -444,11 +444,11 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index * @tc_verd: traffic control verdict - * @rxhash: the packet hash computed on receive + * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed - * @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport + * @l4_hash: indicate hash is a canonical 4-tuple hash over transport * ports. * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not @@ -537,7 +537,7 @@ struct sk_buff { int skb_iif; - __u32 rxhash; + __u32 hash; __be16 vlan_proto; __u16 vlan_tci; @@ -556,7 +556,7 @@ struct sk_buff { #endif __u8 pfmemalloc:1; __u8 ooo_okay:1; - __u8 l4_rxhash:1; + __u8 l4_hash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; __u8 no_fcs:1; @@ -815,40 +815,40 @@ enum pkt_hash_types { static inline void skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) { - skb->l4_rxhash = (type == PKT_HASH_TYPE_L4); - skb->rxhash = hash; + skb->l4_hash = (type == PKT_HASH_TYPE_L4); + skb->hash = hash; } void __skb_get_hash(struct sk_buff *skb); static inline __u32 skb_get_hash(struct sk_buff *skb) { - if (!skb->l4_rxhash) + if (!skb->l4_hash) __skb_get_hash(skb); - return skb->rxhash; + return skb->hash; } static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { - return skb->rxhash; + return skb->hash; } static inline void skb_clear_hash(struct sk_buff *skb) { - skb->rxhash = 0; - skb->l4_rxhash = 0; + skb->hash = 0; + skb->l4_hash = 0; } static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb) { - if (!skb->l4_rxhash) + if (!skb->l4_hash) skb_clear_hash(skb); } static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) { - to->rxhash = from->rxhash; - to->l4_rxhash = from->l4_rxhash; + to->hash = from->hash; + to->l4_hash = from->l4_hash; }; #ifdef NET_SKBUFF_DATA_USES_OFFSET diff --git a/include/net/sock.h b/include/net/sock.h index 625e65b12366..8d7c431a0660 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -862,9 +862,9 @@ static inline void sock_rps_save_rxhash(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_RPS - if (unlikely(sk->sk_rxhash != skb->rxhash)) { + if (unlikely(sk->sk_rxhash != skb->hash)) { sock_rps_reset_flow(sk); - sk->sk_rxhash = skb->rxhash; + sk->sk_rxhash = skb->hash; } #endif } diff --git a/include/trace/events/net.h b/include/trace/events/net.h index a34f27b2e394..1de256b35807 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -153,8 +153,8 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __field( u16, vlan_tci ) __field( u16, protocol ) __field( u8, ip_summed ) - __field( u32, rxhash ) - __field( bool, l4_rxhash ) + __field( u32, hash ) + __field( bool, l4_hash ) __field( unsigned int, len ) __field( unsigned int, data_len ) __field( unsigned int, truesize ) @@ -179,8 +179,8 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __entry->vlan_tci = vlan_tx_tag_get(skb); __entry->protocol = ntohs(skb->protocol); __entry->ip_summed = skb->ip_summed; - __entry->rxhash = skb->rxhash; - __entry->l4_rxhash = skb->l4_rxhash; + __entry->hash = skb->hash; + __entry->l4_hash = skb->l4_hash; __entry->len = skb->len; __entry->data_len = skb->data_len; __entry->truesize = skb->truesize; @@ -191,11 +191,11 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __entry->gso_type = skb_shinfo(skb)->gso_type; ), - TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d rxhash=0x%08x l4_rxhash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", + TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", __get_str(name), __entry->napi_id, __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, - __entry->rxhash, __entry->l4_rxhash, __entry->len, + __entry->hash, __entry->l4_hash, __entry->len, __entry->data_len, __entry->truesize, __entry->mac_header_valid, __entry->mac_header, __entry->nr_frags, __entry->gso_size, __entry->gso_type) diff --git a/net/core/dev.c b/net/core/dev.c index 55f8e64c03a2..48dd323d5918 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2952,7 +2952,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (!flow_table) goto out; - flow_id = skb->rxhash & flow_table->mask; + flow_id = skb_get_hash(skb) & flow_table->mask; rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id); if (rc < 0) @@ -2986,6 +2986,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_sock_flow_table *sock_flow_table; int cpu = -1; u16 tcpu; + u32 hash; if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); @@ -3014,7 +3015,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } skb_reset_network_header(skb); - if (!skb_get_hash(skb)) + hash = skb_get_hash(skb); + if (!hash) goto done; flow_table = rcu_dereference(rxqueue->rps_flow_table); @@ -3023,11 +3025,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, u16 next_cpu; struct rps_dev_flow *rflow; - rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; + rflow = &flow_table->flows[hash & flow_table->mask]; tcpu = rflow->cpu; - next_cpu = sock_flow_table->ents[skb->rxhash & - sock_flow_table->mask]; + next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask]; /* * If the desired CPU (where last recvmsg was done) is @@ -3056,7 +3057,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } if (map) { - tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; + tcpu = map->cpus[((u64) hash * map->len) >> 32]; if (cpu_online(tcpu)) { cpu = tcpu; diff --git a/net/core/filter.c b/net/core/filter.c index ad30d626a5bd..65b75966e206 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -336,7 +336,7 @@ load_b: A = skb->dev->type; continue; case BPF_S_ANC_RXHASH: - A = skb->rxhash; + A = skb->hash; continue; case BPF_S_ANC_CPU: A = raw_smp_processor_id(); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 80201bf69d59..107ed12a5323 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -203,8 +203,8 @@ static __always_inline u32 __flow_hash_1word(u32 a) /* * __skb_get_hash: calculate a flow hash based on src/dst addresses - * and src/dst port numbers. Sets rxhash in skb to non-zero hash value - * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb + * and src/dst port numbers. Sets hash in skb to non-zero hash value + * on success, zero indicates no valid hash. Also, sets l4_hash in skb * if hash is a canonical 4-tuple hash over transport ports. */ void __skb_get_hash(struct sk_buff *skb) @@ -216,7 +216,7 @@ void __skb_get_hash(struct sk_buff *skb) return; if (keys.ports) - skb->l4_rxhash = 1; + skb->l4_hash = 1; /* get a consistent hash (same value on both flow directions) */ if (((__force u32)keys.dst < (__force u32)keys.src) || @@ -232,7 +232,7 @@ void __skb_get_hash(struct sk_buff *skb) if (!hash) hash = 1; - skb->rxhash = hash; + skb->hash = hash; } EXPORT_SYMBOL(__skb_get_hash); @@ -344,7 +344,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) hash = skb->sk->sk_hash; else hash = (__force u16) skb->protocol ^ - skb->rxhash; + skb->hash; hash = __flow_hash_1word(hash); queue_index = map->queues[ ((u64)hash * map->len) >> 32]; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 292304404fda..097a354ec8cd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1277,7 +1277,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_scale(skb->rxhash, num); + return reciprocal_scale(skb_get_hash(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, @@ -1362,7 +1362,6 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; } - skb_get_hash(skb); idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: -- cgit From 015f0688f57ca4d499047d335b8052a733e17a4d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Mar 2014 08:45:56 -0700 Subject: net: net: add a core netdev->tx_dropped counter Dropping packets in __dev_queue_xmit() when transmit queue is stopped (NIC TX ring buffer full or BQL limit reached) currently outputs a syslog message. It would be better to get a precise count of such events available in netdevice stats so that monitoring tools can have a clue. This extends the work done in caf586e5f23ce ("net: add a core netdev->rx_dropped counter") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 ++++--- net/core/dev.c | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4b6d12c7b803..159c7e7945f8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1311,9 +1311,10 @@ struct net_device { int iflink; struct net_device_stats stats; - atomic_long_t rx_dropped; /* dropped packets by core network - * Do not use this in drivers. - */ + + /* dropped packets by core network, Do not use this in drivers */ + atomic_long_t rx_dropped; + atomic_long_t tx_dropped; #ifdef CONFIG_WIRELESS_EXT /* List of functions to handle Wireless Extensions (instead of ioctl). diff --git a/net/core/dev.c b/net/core/dev.c index 48dd323d5918..98ba581b89f0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2880,6 +2880,7 @@ recursion_alert: rc = -ENETDOWN; rcu_read_unlock_bh(); + atomic_long_inc(&dev->tx_dropped); kfree_skb(skb); return rc; out: @@ -6238,6 +6239,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, netdev_stats_to_stats64(storage, &dev->stats); } storage->rx_dropped += atomic_long_read(&dev->rx_dropped); + storage->tx_dropped += atomic_long_read(&dev->tx_dropped); return storage; } EXPORT_SYMBOL(dev_get_stats); -- cgit From 53d6471cef17262d3ad1c7ce8982a234244f68ec Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 17:26:18 -0400 Subject: net: Account for all vlan headers in skb_mac_gso_segment skb_network_protocol() already accounts for multiple vlan headers that may be present in the skb. However, skb_mac_gso_segment() doesn't know anything about it and assumes that skb->mac_len is set correctly to skip all mac headers. That may not always be the case. If we are simply forwarding the packet (via bridge or macvtap), all vlan headers may not be accounted for. A simple solution is to allow skb_network_protocol to return the vlan depth it has calculated. This way skb_mac_gso_segment will correctly skip all mac headers. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/dev.c | 13 +++++++++---- net/core/skbuff.c | 3 ++- 3 files changed, 12 insertions(+), 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e8eeebd49a98..daafd9561cbc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3014,7 +3014,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { return __skb_gso_segment(skb, features, true); } -__be16 skb_network_protocol(struct sk_buff *skb); +__be16 skb_network_protocol(struct sk_buff *skb, int *depth); static inline bool can_checksum_protocol(netdev_features_t features, __be16 protocol) diff --git a/net/core/dev.c b/net/core/dev.c index b1b0c8d4d7df..45fa2f11f84d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2286,7 +2286,7 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -__be16 skb_network_protocol(struct sk_buff *skb) +__be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; @@ -2313,6 +2313,8 @@ __be16 skb_network_protocol(struct sk_buff *skb) vlan_depth += VLAN_HLEN; } + *depth = vlan_depth; + return type; } @@ -2326,12 +2328,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; - __be16 type = skb_network_protocol(skb); + int vlan_depth = skb->mac_len; + __be16 type = skb_network_protocol(skb, &vlan_depth); if (unlikely(!type)) return ERR_PTR(-EINVAL); - __skb_pull(skb, skb->mac_len); + __skb_pull(skb, vlan_depth); rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { @@ -2498,8 +2501,10 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, const struct net_device *dev, netdev_features_t features) { + int tmp; + if (skb->ip_summed != CHECKSUM_NONE && - !can_checksum_protocol(features, skb_network_protocol(skb))) { + !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { features &= ~NETIF_F_ALL_CSUM; } else if (illegal_highdma(dev, skb)) { features &= ~NETIF_F_SG; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 97e5a2c3d947..90b96a11b974 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2879,8 +2879,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int err = -ENOMEM; int i = 0; int pos; + int dummy; - proto = skb_network_protocol(head_skb); + proto = skb_network_protocol(head_skb, &dummy); if (unlikely(!proto)) return ERR_PTR(-EINVAL); -- cgit From 3f4df2066b4e02cb609fa33b2eae8403b5821f4f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Mar 2014 15:38:17 -0700 Subject: netpoll: Move rx enable/disable into __dev_close_many Today netpoll_rx_enable and netpoll_rx_disable are called from dev_close and and __dev_close, and not from dev_close_many. Move the calls into __dev_close_many so that we have a single call site to maintain, and so that dev_close_many gains this protection as well. Which importantly makes batched network device deletes safe. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/dev.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 98ba581b89f0..8d55fe780e3f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1313,6 +1313,9 @@ static int __dev_close_many(struct list_head *head) might_sleep(); list_for_each_entry(dev, head, close_list) { + /* Temporarily disable netpoll until the interface is down */ + netpoll_rx_disable(dev); + call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); clear_bit(__LINK_STATE_START, &dev->state); @@ -1343,6 +1346,7 @@ static int __dev_close_many(struct list_head *head) dev->flags &= ~IFF_UP; net_dmaengine_put(); + netpoll_rx_enable(dev); } return 0; @@ -1353,14 +1357,10 @@ static int __dev_close(struct net_device *dev) int retval; LIST_HEAD(single); - /* Temporarily disable netpoll until the interface is down */ - netpoll_rx_disable(dev); - list_add(&dev->close_list, &single); retval = __dev_close_many(&single); list_del(&single); - netpoll_rx_enable(dev); return retval; } @@ -1398,14 +1398,9 @@ int dev_close(struct net_device *dev) if (dev->flags & IFF_UP) { LIST_HEAD(single); - /* Block netpoll rx while the interface is going down */ - netpoll_rx_disable(dev); - list_add(&dev->close_list, &single); dev_close_many(&single); list_del(&single); - - netpoll_rx_enable(dev); } return 0; } -- cgit From 66b5552fc2dfbaa6445b1bdadd10c9305ce261bd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Mar 2014 15:39:03 -0700 Subject: netpoll: Rename netpoll_rx_enable/disable to netpoll_poll_disable/enable The netpoll_rx_enable and netpoll_rx_disable functions have always controlled polling the network drivers transmit and receive queues. Rename them to netpoll_poll_enable and netpoll_poll_disable to make their functionality clear. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netpoll.h | 8 ++++---- net/core/dev.c | 8 ++++---- net/core/netpoll.c | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 893b9e66060e..b25ee9ffdbe6 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -47,11 +47,11 @@ struct netpoll_info { }; #ifdef CONFIG_NETPOLL -extern void netpoll_rx_disable(struct net_device *dev); -extern void netpoll_rx_enable(struct net_device *dev); +extern void netpoll_poll_disable(struct net_device *dev); +extern void netpoll_poll_enable(struct net_device *dev); #else -static inline void netpoll_rx_disable(struct net_device *dev) { return; } -static inline void netpoll_rx_enable(struct net_device *dev) { return; } +static inline void netpoll_poll_disable(struct net_device *dev) { return; } +static inline void netpoll_poll_enable(struct net_device *dev) { return; } #endif void netpoll_send_udp(struct netpoll *np, const char *msg, int len); diff --git a/net/core/dev.c b/net/core/dev.c index 8d55fe780e3f..778b2036a9e7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1245,7 +1245,7 @@ static int __dev_open(struct net_device *dev) * If we don't do this there is a chance ndo_poll_controller * or ndo_poll may be running while we open the device */ - netpoll_rx_disable(dev); + netpoll_poll_disable(dev); ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); @@ -1260,7 +1260,7 @@ static int __dev_open(struct net_device *dev) if (!ret && ops->ndo_open) ret = ops->ndo_open(dev); - netpoll_rx_enable(dev); + netpoll_poll_enable(dev); if (ret) clear_bit(__LINK_STATE_START, &dev->state); @@ -1314,7 +1314,7 @@ static int __dev_close_many(struct list_head *head) list_for_each_entry(dev, head, close_list) { /* Temporarily disable netpoll until the interface is down */ - netpoll_rx_disable(dev); + netpoll_poll_disable(dev); call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); @@ -1346,7 +1346,7 @@ static int __dev_close_many(struct list_head *head) dev->flags &= ~IFF_UP; net_dmaengine_put(); - netpoll_rx_enable(dev); + netpoll_poll_enable(dev); } return 0; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e2492d176ae7..9fd88875faff 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -214,7 +214,7 @@ static void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } -void netpoll_rx_disable(struct net_device *dev) +void netpoll_poll_disable(struct net_device *dev) { struct netpoll_info *ni; int idx; @@ -225,9 +225,9 @@ void netpoll_rx_disable(struct net_device *dev) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); } -EXPORT_SYMBOL(netpoll_rx_disable); +EXPORT_SYMBOL(netpoll_poll_disable); -void netpoll_rx_enable(struct net_device *dev) +void netpoll_poll_enable(struct net_device *dev) { struct netpoll_info *ni; rcu_read_lock(); @@ -236,7 +236,7 @@ void netpoll_rx_enable(struct net_device *dev) up(&ni->dev_lock); rcu_read_unlock(); } -EXPORT_SYMBOL(netpoll_rx_enable); +EXPORT_SYMBOL(netpoll_poll_enable); static void refill_skbs(void) { -- cgit From 1ee481fb4cf8044632fa869bafc41345afa5957b Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 17:32:29 -0400 Subject: net: Allow modules to use is_skb_forwardable Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 06287c110241..29b579fb5196 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2629,6 +2629,7 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); +bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); extern int netdev_budget; diff --git a/net/core/dev.c b/net/core/dev.c index cf92139b229c..a923eed976ae 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1640,8 +1640,7 @@ static inline void net_timestamp_set(struct sk_buff *skb) __net_timestamp(SKB); \ } \ -static inline bool is_skb_forwardable(struct net_device *dev, - struct sk_buff *skb) +bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) { unsigned int len; @@ -1660,6 +1659,7 @@ static inline bool is_skb_forwardable(struct net_device *dev, return false; } +EXPORT_SYMBOL_GPL(is_skb_forwardable); /** * dev_forward_skb - loopback an skb to another netif -- cgit From a50e233c50dbc881abaa0e4070789064e8d12d70 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 29 Mar 2014 21:28:21 -0700 Subject: net-gro: restore frag0 optimization Main difference between napi_frags_skb() and napi_gro_receive() is that the later is called while ethernet header was already pulled by the NIC driver (eth_type_trans() was called before napi_gro_receive()) Jerry Chu in commit 299603e8370a ("net-gro: Prepare GRO stack for the upcoming tunneling support") tried to remove this difference by calling eth_type_trans() from napi_frags_skb() instead of doing this later from napi_frags_finish() Goal was that napi_gro_complete() could call ptype->callbacks.gro_complete(skb, 0) (offset of first network header = 0) Also, xxx_gro_receive() handlers all use off = skb_gro_offset(skb) to point to their own header, for the current skb and ones held in gro_list Problem is this cleanup work defeated the frag0 optimization: It turns out the consecutive pskb_may_pull() calls are too expensive. This patch brings back the frag0 stuff in napi_frags_skb(). As all skb have their mac header in skb head, we no longer need skb_gro_mac_header() Reported-by: Michal Schmidt Fixes: 299603e8370a ("net-gro: Prepare GRO stack for the upcoming tunneling support") Signed-off-by: Eric Dumazet Cc: Jerry Chu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 --- net/core/dev.c | 96 +++++++++++++++++++++++++++++++---------------- 2 files changed, 64 insertions(+), 37 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45537ed7a5b3..775cc956ff78 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2014,11 +2014,6 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, return skb->data + offset; } -static inline void *skb_gro_mac_header(struct sk_buff *skb) -{ - return NAPI_GRO_CB(skb)->frag0 ?: skb_mac_header(skb); -} - static inline void *skb_gro_network_header(struct sk_buff *skb) { return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + diff --git a/net/core/dev.c b/net/core/dev.c index a923eed976ae..48d81e4a256e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3833,10 +3833,10 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) diffs |= p->vlan_tci ^ skb->vlan_tci; if (maclen == ETH_HLEN) diffs |= compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); + skb_mac_header(skb)); else if (!diffs) diffs = memcmp(skb_mac_header(p), - skb_gro_mac_header(skb), + skb_mac_header(skb), maclen); NAPI_GRO_CB(p)->same_flow = !diffs; } @@ -3859,6 +3859,27 @@ static void skb_gro_reset_offset(struct sk_buff *skb) } } +static void gro_pull_from_frag0(struct sk_buff *skb, int grow) +{ + struct skb_shared_info *pinfo = skb_shinfo(skb); + + BUG_ON(skb->end - skb->tail < grow); + + memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); + + skb->data_len -= grow; + skb->tail += grow; + + pinfo->frags[0].page_offset += grow; + skb_frag_size_sub(&pinfo->frags[0], grow); + + if (unlikely(!skb_frag_size(&pinfo->frags[0]))) { + skb_frag_unref(skb, 0); + memmove(pinfo->frags, pinfo->frags + 1, + --pinfo->nr_frags * sizeof(pinfo->frags[0])); + } +} + static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -3867,6 +3888,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff struct list_head *head = &offload_base; int same_flow; enum gro_result ret; + int grow; if (!(skb->dev->features & NETIF_F_GRO)) goto normal; @@ -3874,7 +3896,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; - skb_gro_reset_offset(skb); gro_list_prepare(napi, skb); NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ @@ -3938,27 +3959,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff ret = GRO_HELD; pull: - if (skb_headlen(skb) < skb_gro_offset(skb)) { - int grow = skb_gro_offset(skb) - skb_headlen(skb); - - BUG_ON(skb->end - skb->tail < grow); - - memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); - - skb->tail += grow; - skb->data_len -= grow; - - skb_shinfo(skb)->frags[0].page_offset += grow; - skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow); - - if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) { - skb_frag_unref(skb, 0); - memmove(skb_shinfo(skb)->frags, - skb_shinfo(skb)->frags + 1, - --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); - } - } - + grow = skb_gro_offset(skb) - skb_headlen(skb); + if (grow > 0) + gro_pull_from_frag0(skb, grow); ok: return ret; @@ -4026,6 +4029,8 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { trace_napi_gro_receive_entry(skb); + skb_gro_reset_offset(skb); + return napi_skb_finish(dev_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -4054,12 +4059,16 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_get_frags); -static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, - gro_result_t ret) +static gro_result_t napi_frags_finish(struct napi_struct *napi, + struct sk_buff *skb, + gro_result_t ret) { switch (ret) { case GRO_NORMAL: - if (netif_receive_skb_internal(skb)) + case GRO_HELD: + __skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + if (ret == GRO_NORMAL && netif_receive_skb_internal(skb)) ret = GRO_DROP; break; @@ -4068,7 +4077,6 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * napi_reuse_skb(napi, skb); break; - case GRO_HELD: case GRO_MERGED: break; } @@ -4076,17 +4084,41 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * return ret; } +/* Upper GRO stack assumes network header starts at gro_offset=0 + * Drivers could call both napi_gro_frags() and napi_gro_receive() + * We copy ethernet header into skb->data to have a common layout. + */ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; + const struct ethhdr *eth; + unsigned int hlen = sizeof(*eth); napi->skb = NULL; - if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) { - napi_reuse_skb(napi, skb); - return NULL; + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + eth = skb_gro_header_fast(skb, 0); + if (unlikely(skb_gro_header_hard(skb, hlen))) { + eth = skb_gro_header_slow(skb, hlen, 0); + if (unlikely(!eth)) { + napi_reuse_skb(napi, skb); + return NULL; + } + } else { + gro_pull_from_frag0(skb, hlen); + NAPI_GRO_CB(skb)->frag0 += hlen; + NAPI_GRO_CB(skb)->frag0_len -= hlen; } - skb->protocol = eth_type_trans(skb, skb->dev); + __skb_pull(skb, hlen); + + /* + * This works because the only protocols we care about don't require + * special handling. + * We'll fix it up properly in napi_frags_finish() + */ + skb->protocol = eth->h_proto; return skb; } -- cgit From d0290214de712150b118a532ded378a29255893b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 2 Apr 2014 23:09:31 +0200 Subject: net: add busy_poll device feature Currently there is no way how to find out if a device supports busy polling. So add a feature and make it dependent on ndo_busy_poll existence. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 ++ net/core/dev.c | 7 +++++++ net/core/ethtool.c | 1 + 3 files changed, 10 insertions(+) (limited to 'net/core/dev.c') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 5a09a48f2658..c26d0ec2ef3a 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -63,6 +63,7 @@ enum { NETIF_F_HW_VLAN_STAG_RX_BIT, /* Receive VLAN STAG HW acceleration */ NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ + NETIF_F_BUSY_POLL_BIT, /* Busy poll */ /* * Add your fresh new feature above and remember to update @@ -118,6 +119,7 @@ enum { #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) +#define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/net/core/dev.c b/net/core/dev.c index 757063420ce0..75e88e0372cb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5696,6 +5696,13 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } +#ifdef CONFIG_NET_RX_BUSY_POLL + if (dev->netdev_ops->ndo_busy_poll) + features |= NETIF_F_BUSY_POLL; + else +#endif + features &= ~NETIF_F_BUSY_POLL; + return features; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 30071dec287a..640ba0e5831c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -97,6 +97,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXFCS_BIT] = "rx-fcs", [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", + [NETIF_F_BUSY_POLL_BIT] = "busy-poll", }; static int ethtool_get_features(struct net_device *dev, void __user *useraddr) -- cgit From e33d0ba8047b049c9262fdb1fcafb93cb52ceceb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Apr 2014 09:28:10 -0700 Subject: net-gro: reset skb->truesize in napi_reuse_skb() Recycling skb always had been very tough... This time it appears GRO layer can accumulate skb->truesize adjustments made by drivers when they attach a fragment to skb. skb_gro_receive() can only subtract from skb->truesize the used part of a fragment. I spotted this problem seeing TcpExtPruneCalled and TcpExtTCPRcvCollapsed that were unexpected with a recent kernel, where TCP receive window should be sized properly to accept traffic coming from a driver not overshooting skb->truesize. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 75e88e0372cb..577701839f52 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4043,6 +4043,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; + skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); napi->skb = skb; } -- cgit From 6859e7df6d9045a461412777e63bd8cef12f9705 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Mon, 7 Apr 2014 11:25:12 +0200 Subject: netdev: remove potentially harmful checks Currently we're checking a variable for != NULL after actually dereferencing it, in netdev_lower_get_next_private*(). It's counter-intuitive at best, and can lead to faulty usage (as it implies that the variable can be NULL), so fix it by removing the useless checks. Reported-by: Daniel Borkmann CC: "David S. Miller" CC: Eric Dumazet CC: Nicolas Dichtel CC: Jiri Pirko CC: stephen hemminger CC: Jerry Chu Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 577701839f52..14dac0654f28 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4589,8 +4589,7 @@ void *netdev_lower_get_next_private(struct net_device *dev, if (&lower->list == &dev->adj_list.lower) return NULL; - if (iter) - *iter = lower->list.next; + *iter = lower->list.next; return lower->private; } @@ -4618,8 +4617,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, if (&lower->list == &dev->adj_list.lower) return NULL; - if (iter) - *iter = &lower->list; + *iter = &lower->list; return lower->private; } -- cgit