diff options
Diffstat (limited to 'net/core')
| -rw-r--r-- | net/core/Makefile | 1 | ||||
| -rw-r--r-- | net/core/datagram.c | 14 | ||||
| -rw-r--r-- | net/core/dev.c | 1321 | ||||
| -rw-r--r-- | net/core/drop_monitor.c | 16 | ||||
| -rw-r--r-- | net/core/ethtool.c | 113 | ||||
| -rw-r--r-- | net/core/fib_rules.c | 107 | ||||
| -rw-r--r-- | net/core/filter.c | 6 | ||||
| -rw-r--r-- | net/core/gen_stats.c | 8 | ||||
| -rw-r--r-- | net/core/link_watch.c | 94 | ||||
| -rw-r--r-- | net/core/neighbour.c | 144 | ||||
| -rw-r--r-- | net/core/net-sysfs.c | 77 | ||||
| -rw-r--r-- | net/core/net_namespace.c | 305 | ||||
| -rw-r--r-- | net/core/netpoll.c | 6 | ||||
| -rw-r--r-- | net/core/pktgen.c | 835 | ||||
| -rw-r--r-- | net/core/rtnetlink.c | 204 | ||||
| -rw-r--r-- | net/core/skb_dma_map.c | 65 | ||||
| -rw-r--r-- | net/core/skbuff.c | 11 | ||||
| -rw-r--r-- | net/core/sock.c | 107 | ||||
| -rw-r--r-- | net/core/sysctl_net_core.c | 25 | ||||
| -rw-r--r-- | net/core/utils.c | 2 |
20 files changed, 1946 insertions, 1515 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 796f46eece5f..08791ac3e05a 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -6,7 +6,6 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ gen_stats.o gen_estimator.o net_namespace.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-$(CONFIG_HAS_DMA) += skb_dma_map.o obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o diff --git a/net/core/datagram.c b/net/core/datagram.c index b0fe69211eef..95c2e0840d0d 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -55,6 +55,7 @@ #include <net/checksum.h> #include <net/sock.h> #include <net/tcp_states.h> +#include <trace/events/skb.h> /* * Is a socket 'connection oriented' ? @@ -223,6 +224,15 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) consume_skb(skb); sk_mem_reclaim_partial(sk); } +EXPORT_SYMBOL(skb_free_datagram); + +void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) +{ + lock_sock(sk); + skb_free_datagram(sk, skb); + release_sock(sk); +} +EXPORT_SYMBOL(skb_free_datagram_locked); /** * skb_kill_datagram - Free a datagram skbuff forcibly @@ -261,6 +271,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) } kfree_skb(skb); + atomic_inc(&sk->sk_drops); sk_mem_reclaim_partial(sk); return err; @@ -284,6 +295,8 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, int i, copy = start - offset; struct sk_buff *frag_iter; + trace_skb_copy_datagram_iovec(skb, len); + /* Copy header. */ if (copy > 0) { if (copy > len) @@ -749,5 +762,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, EXPORT_SYMBOL(datagram_poll); EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec); EXPORT_SYMBOL(skb_copy_datagram_iovec); -EXPORT_SYMBOL(skb_free_datagram); EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index 6a94475aee85..c36a17aafcf3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -79,6 +79,7 @@ #include <linux/cpu.h> #include <linux/types.h> #include <linux/kernel.h> +#include <linux/hash.h> #include <linux/sched.h> #include <linux/mutex.h> #include <linux/string.h> @@ -104,6 +105,7 @@ #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> +#include <net/xfrm.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/kmod.h> @@ -175,7 +177,7 @@ static struct list_head ptype_all __read_mostly; /* Taps */ * The @dev_base_head list is protected by @dev_base_lock and the rtnl * semaphore. * - * Pure readers hold dev_base_lock for reading. + * Pure readers hold dev_base_lock for reading, or rcu_read_lock() * * Writers must hold the rtnl semaphore while they loop through the * dev_base_head list, and hold dev_base_lock for writing when they do the @@ -191,21 +193,17 @@ static struct list_head ptype_all __read_mostly; /* Taps */ * semaphore held. */ DEFINE_RWLOCK(dev_base_lock); - EXPORT_SYMBOL(dev_base_lock); -#define NETDEV_HASHBITS 8 -#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) - static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); - return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; + return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)]; } static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) { - return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; + return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; } /* Device list insertion */ @@ -216,23 +214,26 @@ static int list_netdevice(struct net_device *dev) ASSERT_RTNL(); write_lock_bh(&dev_base_lock); - list_add_tail(&dev->dev_list, &net->dev_base_head); - hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); - hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); + list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); + hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); + hlist_add_head_rcu(&dev->index_hlist, + dev_index_hash(net, dev->ifindex)); write_unlock_bh(&dev_base_lock); return 0; } -/* Device list removal */ +/* Device list removal + * caller must respect a RCU grace period before freeing/reusing dev + */ static void unlist_netdevice(struct net_device *dev) { ASSERT_RTNL(); /* Unlink dev from the device chain */ write_lock_bh(&dev_base_lock); - list_del(&dev->dev_list); - hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); + list_del_rcu(&dev->dev_list); + hlist_del_rcu(&dev->name_hlist); + hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock); } @@ -248,6 +249,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain); */ DEFINE_PER_CPU(struct softnet_data, softnet_data); +EXPORT_PER_CPU_SYMBOL(softnet_data); #ifdef CONFIG_LOCKDEP /* @@ -269,10 +271,10 @@ static const unsigned short netdev_lock_type[] = ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, - ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY, + ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; -static const char *netdev_lock_name[] = +static const char *const netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", @@ -287,7 +289,7 @@ static const char *netdev_lock_name[] = "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", - "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY", + "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; @@ -381,6 +383,7 @@ void dev_add_pack(struct packet_type *pt) } spin_unlock_bh(&ptype_lock); } +EXPORT_SYMBOL(dev_add_pack); /** * __dev_remove_pack - remove packet handler @@ -418,6 +421,8 @@ void __dev_remove_pack(struct packet_type *pt) out: spin_unlock_bh(&ptype_lock); } +EXPORT_SYMBOL(__dev_remove_pack); + /** * dev_remove_pack - remove packet handler * @pt: packet type declaration @@ -436,6 +441,7 @@ void dev_remove_pack(struct packet_type *pt) synchronize_net(); } +EXPORT_SYMBOL(dev_remove_pack); /****************************************************************************** @@ -499,6 +505,7 @@ int netdev_boot_setup_check(struct net_device *dev) } return 0; } +EXPORT_SYMBOL(netdev_boot_setup_check); /** @@ -582,15 +589,42 @@ __setup("netdev=", netdev_boot_setup); struct net_device *__dev_get_by_name(struct net *net, const char *name) { struct hlist_node *p; + struct net_device *dev; + struct hlist_head *head = dev_name_hash(net, name); - hlist_for_each(p, dev_name_hash(net, name)) { - struct net_device *dev - = hlist_entry(p, struct net_device, name_hlist); + hlist_for_each_entry(dev, p, head, name_hlist) if (!strncmp(dev->name, name, IFNAMSIZ)) return dev; - } + + return NULL; +} +EXPORT_SYMBOL(__dev_get_by_name); + +/** + * dev_get_by_name_rcu - find a device by its name + * @net: the applicable net namespace + * @name: name to find + * + * Find an interface by name. + * If the name is found a pointer to the device is returned. + * If the name is not found then %NULL is returned. + * The reference counters are not incremented so the caller must be + * careful with locks. The caller must hold RCU lock. + */ + +struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) +{ + struct hlist_node *p; + struct net_device *dev; + struct hlist_head *head = dev_name_hash(net, name); + + hlist_for_each_entry_rcu(dev, p, head, name_hlist) + if (!strncmp(dev->name, name, IFNAMSIZ)) + return dev; + return NULL; } +EXPORT_SYMBOL(dev_get_by_name_rcu); /** * dev_get_by_name - find a device by its name @@ -608,13 +642,14 @@ struct net_device *dev_get_by_name(struct net *net, const char *name) { struct net_device *dev; - read_lock(&dev_base_lock); - dev = __dev_get_by_name(net, name); + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, name); if (dev) dev_hold(dev); - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } +EXPORT_SYMBOL(dev_get_by_name); /** * __dev_get_by_index - find a device by its ifindex @@ -631,15 +666,41 @@ struct net_device *dev_get_by_name(struct net *net, const char *name) struct net_device *__dev_get_by_index(struct net *net, int ifindex) { struct hlist_node *p; + struct net_device *dev; + struct hlist_head *head = dev_index_hash(net, ifindex); - hlist_for_each(p, dev_index_hash(net, ifindex)) { - struct net_device *dev - = hlist_entry(p, struct net_device, index_hlist); + hlist_for_each_entry(dev, p, head, index_hlist) if (dev->ifindex == ifindex) return dev; - } + + return NULL; +} +EXPORT_SYMBOL(__dev_get_by_index); + +/** + * dev_get_by_index_rcu - find a device by its ifindex + * @net: the applicable net namespace + * @ifindex: index of device + * + * Search for an interface by index. Returns %NULL if the device + * is not found or a pointer to the device. The device has not + * had its reference counter increased so the caller must be careful + * about locking. The caller must hold RCU lock. + */ + +struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) +{ + struct hlist_node *p; + struct net_device *dev; + struct hlist_head *head = dev_index_hash(net, ifindex); + + hlist_for_each_entry_rcu(dev, p, head, index_hlist) + if (dev->ifindex == ifindex) + return dev; + return NULL; } +EXPORT_SYMBOL(dev_get_by_index_rcu); /** @@ -657,13 +718,14 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) { struct net_device *dev; - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifindex); if (dev) dev_hold(dev); - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } +EXPORT_SYMBOL(dev_get_by_index); /** * dev_getbyhwaddr - find a device by its hardware address @@ -693,7 +755,6 @@ struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *h return NULL; } - EXPORT_SYMBOL(dev_getbyhwaddr); struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) @@ -707,7 +768,6 @@ struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) return NULL; } - EXPORT_SYMBOL(__dev_getfirstbyhwtype); struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) @@ -721,7 +781,6 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) rtnl_unlock(); return dev; } - EXPORT_SYMBOL(dev_getfirstbyhwtype); /** @@ -736,22 +795,24 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype); * dev_put to indicate they have finished with it. */ -struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask) +struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, + unsigned short mask) { struct net_device *dev, *ret; ret = NULL; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { dev_hold(dev); ret = dev; break; } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return ret; } +EXPORT_SYMBOL(dev_get_by_flags); /** * dev_valid_name - check if name is okay for network device @@ -777,6 +838,7 @@ int dev_valid_name(const char *name) } return 1; } +EXPORT_SYMBOL(dev_valid_name); /** * __dev_alloc_name - allocate a name for a device @@ -832,7 +894,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) free_page((unsigned long) inuse); } - snprintf(buf, IFNAMSIZ, name, i); + if (buf != name) + snprintf(buf, IFNAMSIZ, name, i); if (!__dev_get_by_name(net, buf)) return i; @@ -870,7 +933,23 @@ int dev_alloc_name(struct net_device *dev, const char *name) strlcpy(dev->name, buf, IFNAMSIZ); return ret; } +EXPORT_SYMBOL(dev_alloc_name); + +static int dev_get_valid_name(struct net *net, const char *name, char *buf, + bool fmt) +{ + if (!dev_valid_name(name)) + return -EINVAL; + + if (fmt && strchr(name, '%')) + return __dev_alloc_name(net, name, buf); + else if (__dev_get_by_name(net, name)) + return -EEXIST; + else if (buf != name) + strlcpy(buf, name, IFNAMSIZ); + return 0; +} /** * dev_change_name - change name of a device @@ -894,29 +973,20 @@ int dev_change_name(struct net_device *dev, const char *newname) if (dev->flags & IFF_UP) return -EBUSY; - if (!dev_valid_name(newname)) - return -EINVAL; - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) return 0; memcpy(oldname, dev->name, IFNAMSIZ); - if (strchr(newname, '%')) { - err = dev_alloc_name(dev, newname); - if (err < 0) - return err; - } - else if (__dev_get_by_name(net, newname)) - return -EEXIST; - else - strlcpy(dev->name, newname, IFNAMSIZ); + err = dev_get_valid_name(net, newname, dev->name, 1); + if (err < 0) + return err; rollback: /* For now only devices in the initial network namespace * are in sysfs. */ - if (net == &init_net) { + if (net_eq(net, &init_net)) { ret = device_rename(&dev->dev, dev->name); if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); @@ -926,21 +996,27 @@ rollback: write_lock_bh(&dev_base_lock); hlist_del(&dev->name_hlist); - hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); + write_unlock_bh(&dev_base_lock); + + synchronize_rcu(); + + write_lock_bh(&dev_base_lock); + hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); write_unlock_bh(&dev_base_lock); ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); ret = notifier_to_errno(ret); if (ret) { - if (err) { - printk(KERN_ERR - "%s: name change rollback failed: %d.\n", - dev->name, ret); - } else { + /* err >= 0 after dev_alloc_name() or stores the first errno */ + if (err >= 0) { err = ret; memcpy(dev->name, oldname, IFNAMSIZ); goto rollback; + } else { + printk(KERN_ERR + "%s: name change rollback failed: %d.\n", + dev->name, ret); } } @@ -970,7 +1046,7 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return 0; } - dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL); + dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); if (!dev->ifalias) return -ENOMEM; @@ -1006,10 +1082,11 @@ void netdev_state_change(struct net_device *dev) rtmsg_ifinfo(RTM_NEWLINK, dev, 0); } } +EXPORT_SYMBOL(netdev_state_change); -void netdev_bonding_change(struct net_device *dev) +void netdev_bonding_change(struct net_device *dev, unsigned long event) { - call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev); + call_netdevice_notifiers(event, dev); } EXPORT_SYMBOL(netdev_bonding_change); @@ -1027,13 +1104,14 @@ void dev_load(struct net *net, const char *name) { struct net_device *dev; - read_lock(&dev_base_lock); - dev = __dev_get_by_name(net, name); - read_unlock(&dev_base_lock); + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, name); + rcu_read_unlock(); - if (!dev && capable(CAP_SYS_MODULE)) + if (!dev && capable(CAP_NET_ADMIN)) request_module("%s", name); } +EXPORT_SYMBOL(dev_load); /** * dev_open - prepare an interface for use. @@ -1118,6 +1196,7 @@ int dev_open(struct net_device *dev) return ret; } +EXPORT_SYMBOL(dev_open); /** * dev_close - shutdown an interface. @@ -1184,6 +1263,7 @@ int dev_close(struct net_device *dev) return 0; } +EXPORT_SYMBOL(dev_close); /** @@ -1273,12 +1353,14 @@ rollback: nb->notifier_call(nb, NETDEV_DOWN, dev); } nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); } } raw_notifier_chain_unregister(&netdev_chain, nb); goto unlock; } +EXPORT_SYMBOL(register_netdevice_notifier); /** * unregister_netdevice_notifier - unregister a network notifier block @@ -1299,6 +1381,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb) rtnl_unlock(); return err; } +EXPORT_SYMBOL(unregister_netdevice_notifier); /** * call_netdevice_notifiers - call all network notifier blocks @@ -1321,11 +1404,13 @@ void net_enable_timestamp(void) { atomic_inc(&netstamp_needed); } +EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { atomic_dec(&netstamp_needed); } +EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp(struct sk_buff *skb) { @@ -1335,6 +1420,45 @@ static inline void net_timestamp(struct sk_buff *skb) skb->tstamp.tv64 = 0; } +/** + * dev_forward_skb - loopback an skb to another netif + * + * @dev: destination network device + * @skb: buffer to forward + * + * return values: + * NET_RX_SUCCESS (no congestion) + * NET_RX_DROP (packet was dropped) + * + * dev_forward_skb can be used for injecting an skb from the + * start_xmit function of one device into the receive queue + * of another device. + * + * The receiving device may be in another namespace, so + * we have to clear all information in the skb that could + * impact namespace isolation. + */ +int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) +{ + skb_orphan(skb); + + if (!(dev->flags & IFF_UP)) + return NET_RX_DROP; + + if (skb->len > (dev->mtu + dev->hard_header_len)) + return NET_RX_DROP; + + skb_dst_drop(skb); + skb->tstamp.tv64 = 0; + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, dev); + skb->mark = 0; + secpath_reset(skb); + nf_reset(skb); + return netif_rx(skb); +} +EXPORT_SYMBOL_GPL(dev_forward_skb); + /* * Support routine. Sends outgoing frames to any network * taps currently in use. @@ -1359,7 +1483,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) if ((ptype->dev == dev || !ptype->dev) && (ptype->af_packet_priv == NULL || (struct sock *)ptype->af_packet_priv != skb->sk)) { - struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) break; @@ -1527,6 +1651,7 @@ out_set_summed: out: return ret; } +EXPORT_SYMBOL(skb_checksum_help); /** * skb_gso_segment - Perform segmentation on skb. @@ -1589,7 +1714,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) return segs; } - EXPORT_SYMBOL(skb_gso_segment); /* Take action when hardware reception checksum errors are detected. */ @@ -1683,7 +1807,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { const struct net_device_ops *ops = dev->netdev_ops; - int rc; + int rc = NETDEV_TX_OK; if (likely(!skb->next)) { if (!list_empty(&ptype_all)) @@ -1704,7 +1828,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb_dst_drop(skb); rc = ops->ndo_start_xmit(skb, dev); - if (rc == 0) + if (rc == NETDEV_TX_OK) txq_trans_update(txq); /* * TODO: if skb_orphan() was called by @@ -1730,7 +1854,9 @@ gso: skb->next = nskb->next; nskb->next = NULL; rc = ops->ndo_start_xmit(nskb, dev); - if (unlikely(rc)) { + if (unlikely(rc != NETDEV_TX_OK)) { + if (rc & ~NETDEV_TX_MASK) + goto out_kfree_gso_skb; nskb->next = skb->next; skb->next = nskb; return rc; @@ -1740,11 +1866,12 @@ gso: return NETDEV_TX_BUSY; } while (skb->next); - skb->destructor = DEV_GSO_CB(skb)->destructor; - +out_kfree_gso_skb: + if (likely(skb->next == NULL)) + skb->destructor = DEV_GSO_CB(skb)->destructor; out_kfree_skb: kfree_skb(skb); - return 0; + return rc; } static u32 skb_tx_hashrnd; @@ -1755,7 +1882,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); - while (unlikely (hash >= dev->real_num_tx_queues)) + while (unlikely(hash >= dev->real_num_tx_queues)) hash -= dev->real_num_tx_queues; return hash; } @@ -1771,21 +1898,82 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) } EXPORT_SYMBOL(skb_tx_hash); +static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) +{ + if (unlikely(queue_index >= dev->real_num_tx_queues)) { + if (net_ratelimit()) { + WARN(1, "%s selects TX queue %d, but " + "real number of TX queues is %d\n", + dev->name, queue_index, + dev->real_num_tx_queues); + } + return 0; + } + return queue_index; +} + static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { - const struct net_device_ops *ops = dev->netdev_ops; - u16 queue_index = 0; + u16 queue_index; + struct sock *sk = skb->sk; - if (ops->ndo_select_queue) - queue_index = ops->ndo_select_queue(dev, skb); - else if (dev->real_num_tx_queues > 1) - queue_index = skb_tx_hash(dev, skb); + if (sk_tx_queue_recorded(sk)) { + queue_index = sk_tx_queue_get(sk); + } else { + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_select_queue) { + queue_index = ops->ndo_select_queue(dev, skb); + queue_index = dev_cap_txqueue(dev, queue_index); + } else { + queue_index = 0; + if (dev->real_num_tx_queues > 1) + queue_index = skb_tx_hash(dev, skb); + + if (sk && sk->sk_dst_cache) + sk_tx_queue_set(sk, queue_index); + } + } skb_set_queue_mapping(skb, queue_index); return netdev_get_tx_queue(dev, queue_index); } +static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, + struct net_device *dev, + struct netdev_queue *txq) +{ + spinlock_t *root_lock = qdisc_lock(q); + int rc; + + spin_lock(root_lock); + if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { + kfree_skb(skb); + rc = NET_XMIT_DROP; + } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && + !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { + /* + * This is a work-conserving queue; there are no old skbs + * waiting to be sent out; and the qdisc is not running - + * xmit the skb directly. + */ + __qdisc_update_bstats(q, skb->len); + if (sch_direct_xmit(skb, q, dev, txq, root_lock)) + __qdisc_run(q); + else + clear_bit(__QDISC_STATE_RUNNING, &q->state); + + rc = NET_XMIT_SUCCESS; + } else { + rc = qdisc_enqueue_root(skb, q); + qdisc_run(q); + } + spin_unlock(root_lock); + + return rc; +} + /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -1856,22 +2044,10 @@ gso: q = rcu_dereference(txq->qdisc); #ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); + skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); #endif if (q->enqueue) { - spinlock_t *root_lock = qdisc_lock(q); - - spin_lock(root_lock); - - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { - kfree_skb(skb); - rc = NET_XMIT_DROP; - } else { - rc = qdisc_enqueue_root(skb, q); - qdisc_run(q); - } - spin_unlock(root_lock); - + rc = __dev_xmit_skb(skb, q, dev, txq); goto out; } @@ -1895,8 +2071,8 @@ gso: HARD_TX_LOCK(dev, txq, cpu); if (!netif_tx_queue_stopped(txq)) { - rc = 0; - if (!dev_hard_start_xmit(skb, dev, txq)) { + rc = dev_hard_start_xmit(skb, dev, txq); + if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); goto out; } @@ -1924,6 +2100,7 @@ out: rcu_read_unlock_bh(); return rc; } +EXPORT_SYMBOL(dev_queue_xmit); /*======================================================================= @@ -1990,6 +2167,7 @@ enqueue: kfree_skb(skb); return NET_RX_DROP; } +EXPORT_SYMBOL(netif_rx); int netif_rx_ni(struct sk_buff *skb) { @@ -2003,7 +2181,6 @@ int netif_rx_ni(struct sk_buff *skb) return err; } - EXPORT_SYMBOL(netif_rx_ni); static void net_tx_action(struct softirq_action *h) @@ -2076,7 +2253,7 @@ static inline int deliver_skb(struct sk_buff *skb, /* This hook is defined here for ATM LANE */ int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) __read_mostly; -EXPORT_SYMBOL(br_fdb_test_addr_hook); +EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); #endif /* @@ -2085,7 +2262,7 @@ EXPORT_SYMBOL(br_fdb_test_addr_hook); */ struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff *skb) __read_mostly; -EXPORT_SYMBOL(br_handle_frame_hook); +EXPORT_SYMBOL_GPL(br_handle_frame_hook); static inline struct sk_buff *handle_bridge(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, @@ -2150,7 +2327,7 @@ static int ing_filter(struct sk_buff *skb) if (MAX_RED_LOOP < ttl++) { printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", - skb->iif, dev->ifindex); + skb->skb_iif, dev->ifindex); return TC_ACT_SHOT; } @@ -2248,18 +2425,18 @@ int netif_receive_skb(struct sk_buff *skb) int ret = NET_RX_DROP; __be16 type; - if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) + if (!skb->tstamp.tv64) + net_timestamp(skb); + + if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) return NET_RX_SUCCESS; /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) return NET_RX_DROP; - if (!skb->tstamp.tv64) - net_timestamp(skb); - - if (!skb->iif) - skb->iif = skb->dev->ifindex; + if (!skb->skb_iif) + skb->skb_iif = skb->dev->ifindex; null_or_orig = NULL; orig_dev = skb->dev; @@ -2336,6 +2513,7 @@ out: rcu_read_unlock(); return ret; } +EXPORT_SYMBOL(netif_receive_skb); /* Network device is going away, flush any packets still pending */ static void flush_backlog(void *arg) @@ -2398,7 +2576,7 @@ void napi_gro_flush(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_flush); -int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; struct packet_type *ptype; @@ -2406,7 +2584,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; int same_flow; int mac_len; - int ret; + enum gro_result ret; if (!(skb->dev->features & NETIF_F_GRO)) goto normal; @@ -2490,7 +2668,8 @@ normal: } EXPORT_SYMBOL(dev_gro_receive); -static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +static gro_result_t +__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; @@ -2498,33 +2677,35 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) return GRO_NORMAL; for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) - && !compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); + NAPI_GRO_CB(p)->same_flow = + (p->dev == skb->dev) && + !compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); NAPI_GRO_CB(p)->flush = 0; } return dev_gro_receive(napi, skb); } -int napi_skb_finish(int ret, struct sk_buff *skb) +gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { - int err = NET_RX_SUCCESS; - switch (ret) { case GRO_NORMAL: - return netif_receive_skb(skb); + if (netif_receive_skb(skb)) + ret = GRO_DROP; + break; case GRO_DROP: - err = NET_RX_DROP; - /* fall through */ - case GRO_MERGED_FREE: kfree_skb(skb); break; + + case GRO_HELD: + case GRO_MERGED: + break; } - return err; + return ret; } EXPORT_SYMBOL(napi_skb_finish); @@ -2544,7 +2725,7 @@ void skb_gro_reset_offset(struct sk_buff *skb) } EXPORT_SYMBOL(skb_gro_reset_offset); -int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { skb_gro_reset_offset(skb); @@ -2563,49 +2744,41 @@ EXPORT_SYMBOL(napi_reuse_skb); struct sk_buff *napi_get_frags(struct napi_struct *napi) { - struct net_device *dev = napi->dev; struct sk_buff *skb = napi->skb; if (!skb) { - skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); - if (!skb) - goto out; - - skb_reserve(skb, NET_IP_ALIGN); - - napi->skb = skb; + skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); + if (skb) + napi->skb = skb; } - -out: return skb; } EXPORT_SYMBOL(napi_get_frags); -int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) +gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, + gro_result_t ret) { - int err = NET_RX_SUCCESS; - switch (ret) { case GRO_NORMAL: case GRO_HELD: skb->protocol = eth_type_trans(skb, napi->dev); - if (ret == GRO_NORMAL) - return netif_receive_skb(skb); - - skb_gro_pull(skb, -ETH_HLEN); + if (ret == GRO_HELD) + skb_gro_pull(skb, -ETH_HLEN); + else if (netif_receive_skb(skb)) + ret = GRO_DROP; break; case GRO_DROP: - err = NET_RX_DROP; - /* fall through */ - case GRO_MERGED_FREE: napi_reuse_skb(napi, skb); break; + + case GRO_MERGED: + break; } - return err; + return ret; } EXPORT_SYMBOL(napi_frags_finish); @@ -2646,12 +2819,12 @@ out: } EXPORT_SYMBOL(napi_frags_skb); -int napi_gro_frags(struct napi_struct *napi) +gro_result_t napi_gro_frags(struct napi_struct *napi) { struct sk_buff *skb = napi_frags_skb(napi); if (!skb) - return NET_RX_DROP; + return GRO_DROP; return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); } @@ -2852,7 +3025,7 @@ softnet_break: goto out; } -static gifconf_func_t * gifconf_list [NPROTO]; +static gifconf_func_t *gifconf_list[NPROTO]; /** * register_gifconf - register a SIOCGIF handler @@ -2863,13 +3036,14 @@ static gifconf_func_t * gifconf_list [NPROTO]; * that is passed must not be freed or reused until it has been replaced * by another handler. */ -int register_gifconf(unsigned int family, gifconf_func_t * gifconf) +int register_gifconf(unsigned int family, gifconf_func_t *gifconf) { if (family >= NPROTO) return -EINVAL; gifconf_list[family] = gifconf; return 0; } +EXPORT_SYMBOL(register_gifconf); /* @@ -2895,15 +3069,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifr.ifr_ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); if (!dev) { - read_unlock(&dev_base_lock); + rcu_read_unlock(); return -ENODEV; } strcpy(ifr.ifr_name, dev->name); - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; @@ -2973,18 +3147,18 @@ static int dev_ifconf(struct net *net, char __user *arg) * in detail. */ void *dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { struct net *net = seq_file_net(seq); loff_t off; struct net_device *dev; - read_lock(&dev_base_lock); + rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; off = 1; - for_each_netdev(net, dev) + for_each_netdev_rcu(net, dev) if (off++ == *pos) return dev; @@ -2993,16 +3167,18 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net *net = seq_file_net(seq); + struct net_device *dev = (v == SEQ_START_TOKEN) ? + first_net_device(seq_file_net(seq)) : + next_net_device((struct net_device *)v); + ++*pos; - return v == SEQ_START_TOKEN ? - first_net_device(net) : next_net_device((struct net_device *)v); + return rcu_dereference(dev); } void dev_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) @@ -3080,7 +3256,7 @@ static int softnet_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", s->total, s->dropped, s->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ - s->cpu_collision ); + s->cpu_collision); return 0; } @@ -3316,6 +3492,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); return 0; } +EXPORT_SYMBOL(netdev_set_master); static void dev_change_rx_flags(struct net_device *dev, int flags) { @@ -3394,6 +3571,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc) dev_set_rx_mode(dev); return err; } +EXPORT_SYMBOL(dev_set_promiscuity); /** * dev_set_allmulti - update allmulti count on a device @@ -3437,6 +3615,7 @@ int dev_set_allmulti(struct net_device *dev, int inc) } return 0; } +EXPORT_SYMBOL(dev_set_allmulti); /* * Upload unicast and multicast address lists to device and @@ -3927,6 +4106,7 @@ int __dev_addr_sync(struct dev_addr_list **to, int *to_count, } return err; } +EXPORT_SYMBOL_GPL(__dev_addr_sync); void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count) @@ -3946,6 +4126,7 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, da = next; } } +EXPORT_SYMBOL_GPL(__dev_addr_unsync); /** * dev_unicast_sync - Synchronize device's unicast list to another device @@ -4064,6 +4245,7 @@ unsigned dev_get_flags(const struct net_device *dev) return flags; } +EXPORT_SYMBOL(dev_get_flags); /** * dev_change_flags - change device settings @@ -4114,12 +4296,13 @@ int dev_change_flags(struct net_device *dev, unsigned flags) } if (dev->flags & IFF_UP && - ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | + ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) call_netdevice_notifiers(NETDEV_CHANGE, dev); if ((flags ^ dev->gflags) & IFF_PROMISC) { - int inc = (flags & IFF_PROMISC) ? +1 : -1; + int inc = (flags & IFF_PROMISC) ? 1 : -1; + dev->gflags ^= IFF_PROMISC; dev_set_promiscuity(dev, inc); } @@ -4129,7 +4312,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) IFF_ALLMULTI is requested not asking us and not reporting. */ if ((flags ^ dev->gflags) & IFF_ALLMULTI) { - int inc = (flags & IFF_ALLMULTI) ? +1 : -1; + int inc = (flags & IFF_ALLMULTI) ? 1 : -1; + dev->gflags ^= IFF_ALLMULTI; dev_set_allmulti(dev, inc); } @@ -4141,6 +4325,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) return ret; } +EXPORT_SYMBOL(dev_change_flags); /** * dev_set_mtu - Change maximum transfer unit @@ -4174,6 +4359,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; } +EXPORT_SYMBOL(dev_set_mtu); /** * dev_set_mac_address - Change Media Access Control Address @@ -4198,69 +4384,70 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; } +EXPORT_SYMBOL(dev_set_mac_address); /* - * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) + * Perform the SIOCxIFxxx calls, inside rcu_read_lock() */ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; - struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); if (!dev) return -ENODEV; switch (cmd) { - case SIOCGIFFLAGS: /* Get interface flags */ - ifr->ifr_flags = (short) dev_get_flags(dev); - return 0; + case SIOCGIFFLAGS: /* Get interface flags */ + ifr->ifr_flags = (short) dev_get_flags(dev); + return 0; - case SIOCGIFMETRIC: /* Get the metric on the interface - (currently unused) */ - ifr->ifr_metric = 0; - return 0; + case SIOCGIFMETRIC: /* Get the metric on the interface + (currently unused) */ + ifr->ifr_metric = 0; + return 0; - case SIOCGIFMTU: /* Get the MTU of a device */ - ifr->ifr_mtu = dev->mtu; - return 0; + case SIOCGIFMTU: /* Get the MTU of a device */ + ifr->ifr_mtu = dev->mtu; + return 0; - case SIOCGIFHWADDR: - if (!dev->addr_len) - memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); - else - memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - ifr->ifr_hwaddr.sa_family = dev->type; - return 0; + case SIOCGIFHWADDR: + if (!dev->addr_len) + memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); + else + memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, + min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + ifr->ifr_hwaddr.sa_family = dev->type; + return 0; - case SIOCGIFSLAVE: - err = -EINVAL; - break; + case SIOCGIFSLAVE: + err = -EINVAL; + break; - case SIOCGIFMAP: - ifr->ifr_map.mem_start = dev->mem_start; - ifr->ifr_map.mem_end = dev->mem_end; - ifr->ifr_map.base_addr = dev->base_addr; - ifr->ifr_map.irq = dev->irq; - ifr->ifr_map.dma = dev->dma; - ifr->ifr_map.port = dev->if_port; - return 0; + case SIOCGIFMAP: + ifr->ifr_map.mem_start = dev->mem_start; + ifr->ifr_map.mem_end = dev->mem_end; + ifr->ifr_map.base_addr = dev->base_addr; + ifr->ifr_map.irq = dev->irq; + ifr->ifr_map.dma = dev->dma; + ifr->ifr_map.port = dev->if_port; + return 0; - case SIOCGIFINDEX: - ifr->ifr_ifindex = dev->ifindex; - return 0; + case SIOCGIFINDEX: + ifr->ifr_ifindex = dev->ifindex; + return 0; - case SIOCGIFTXQLEN: - ifr->ifr_qlen = dev->tx_queue_len; - return 0; + case SIOCGIFTXQLEN: + ifr->ifr_qlen = dev->tx_queue_len; + return 0; - default: - /* dev_ioctl() should ensure this case - * is never reached - */ - WARN_ON(1); - err = -EINVAL; - break; + default: + /* dev_ioctl() should ensure this case + * is never reached + */ + WARN_ON(1); + err = -EINVAL; + break; } return err; @@ -4281,92 +4468,91 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) ops = dev->netdev_ops; switch (cmd) { - case SIOCSIFFLAGS: /* Set interface flags */ - return dev_change_flags(dev, ifr->ifr_flags); - - case SIOCSIFMETRIC: /* Set the metric on the interface - (currently unused) */ - return -EOPNOTSUPP; + case SIOCSIFFLAGS: /* Set interface flags */ + return dev_change_flags(dev, ifr->ifr_flags); - case SIOCSIFMTU: /* Set the MTU of a device */ - return dev_set_mtu(dev, ifr->ifr_mtu); + case SIOCSIFMETRIC: /* Set the metric on the interface + (currently unused) */ + return -EOPNOTSUPP; - case SIOCSIFHWADDR: - return dev_set_mac_address(dev, &ifr->ifr_hwaddr); + case SIOCSIFMTU: /* Set the MTU of a device */ + return dev_set_mtu(dev, ifr->ifr_mtu); - case SIOCSIFHWBROADCAST: - if (ifr->ifr_hwaddr.sa_family != dev->type) - return -EINVAL; - memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - return 0; + case SIOCSIFHWADDR: + return dev_set_mac_address(dev, &ifr->ifr_hwaddr); - case SIOCSIFMAP: - if (ops->ndo_set_config) { - if (!netif_device_present(dev)) - return -ENODEV; - return ops->ndo_set_config(dev, &ifr->ifr_map); - } - return -EOPNOTSUPP; - - case SIOCADDMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || - ifr->ifr_hwaddr.sa_family != AF_UNSPEC) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, - dev->addr_len, 1); + case SIOCSIFHWBROADCAST: + if (ifr->ifr_hwaddr.sa_family != dev->type) + return -EINVAL; + memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, + min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return 0; - case SIOCDELMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || - ifr->ifr_hwaddr.sa_family != AF_UNSPEC) - return -EINVAL; + case SIOCSIFMAP: + if (ops->ndo_set_config) { if (!netif_device_present(dev)) return -ENODEV; - return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, - dev->addr_len, 1); + return ops->ndo_set_config(dev, &ifr->ifr_map); + } + return -EOPNOTSUPP; - case SIOCSIFTXQLEN: - if (ifr->ifr_qlen < 0) - return -EINVAL; - dev->tx_queue_len = ifr->ifr_qlen; - return 0; + case SIOCADDMULTI: + if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || + ifr->ifr_hwaddr.sa_family != AF_UNSPEC) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, + dev->addr_len, 1); + + case SIOCDELMULTI: + if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || + ifr->ifr_hwaddr.sa_family != AF_UNSPEC) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, + dev->addr_len, 1); - case SIOCSIFNAME: - ifr->ifr_newname[IFNAMSIZ-1] = '\0'; - return dev_change_name(dev, ifr->ifr_newname); + case SIOCSIFTXQLEN: + if (ifr->ifr_qlen < 0) + return -EINVAL; + dev->tx_queue_len = ifr->ifr_qlen; + return 0; - /* - * Unknown or private ioctl - */ + case SIOCSIFNAME: + ifr->ifr_newname[IFNAMSIZ-1] = '\0'; + return dev_change_name(dev, ifr->ifr_newname); - default: - if ((cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15) || - cmd == SIOCBONDENSLAVE || - cmd == SIOCBONDRELEASE || - cmd == SIOCBONDSETHWADDR || - cmd == SIOCBONDSLAVEINFOQUERY || - cmd == SIOCBONDINFOQUERY || - cmd == SIOCBONDCHANGEACTIVE || - cmd == SIOCGMIIPHY || - cmd == SIOCGMIIREG || - cmd == SIOCSMIIREG || - cmd == SIOCBRADDIF || - cmd == SIOCBRDELIF || - cmd == SIOCSHWTSTAMP || - cmd == SIOCWANDEV) { - err = -EOPNOTSUPP; - if (ops->ndo_do_ioctl) { - if (netif_device_present(dev)) - err = ops->ndo_do_ioctl(dev, ifr, cmd); - else - err = -ENODEV; - } - } else - err = -EINVAL; + /* + * Unknown or private ioctl + */ + default: + if ((cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15) || + cmd == SIOCBONDENSLAVE || + cmd == SIOCBONDRELEASE || + cmd == SIOCBONDSETHWADDR || + cmd == SIOCBONDSLAVEINFOQUERY || + cmd == SIOCBONDINFOQUERY || + cmd == SIOCBONDCHANGEACTIVE || + cmd == SIOCGMIIPHY || + cmd == SIOCGMIIREG || + cmd == SIOCSMIIREG || + cmd == SIOCBRADDIF || + cmd == SIOCBRDELIF || + cmd == SIOCSHWTSTAMP || + cmd == SIOCWANDEV) { + err = -EOPNOTSUPP; + if (ops->ndo_do_ioctl) { + if (netif_device_present(dev)) + err = ops->ndo_do_ioctl(dev, ifr, cmd); + else + err = -ENODEV; + } + } else + err = -EINVAL; } return err; @@ -4423,135 +4609,135 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) */ switch (cmd) { - /* - * These ioctl calls: - * - can be done by all. - * - atomic and do not require locking. - * - return a value - */ - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFHWADDR: - case SIOCGIFSLAVE: - case SIOCGIFMAP: - case SIOCGIFINDEX: - case SIOCGIFTXQLEN: - dev_load(net, ifr.ifr_name); - read_lock(&dev_base_lock); - ret = dev_ifsioc_locked(net, &ifr, cmd); - read_unlock(&dev_base_lock); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; + /* + * These ioctl calls: + * - can be done by all. + * - atomic and do not require locking. + * - return a value + */ + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFHWADDR: + case SIOCGIFSLAVE: + case SIOCGIFMAP: + case SIOCGIFINDEX: + case SIOCGIFTXQLEN: + dev_load(net, ifr.ifr_name); + rcu_read_lock(); + ret = dev_ifsioc_locked(net, &ifr, cmd); + rcu_read_unlock(); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; + } + return ret; - case SIOCETHTOOL: - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ethtool(net, &ifr); - rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; + case SIOCETHTOOL: + dev_load(net, ifr.ifr_name); + rtnl_lock(); + ret = dev_ethtool(net, &ifr); + rtnl_unlock(); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; + } + return ret; - /* - * These ioctl calls: - * - require superuser power. - * - require strict serialization. - * - return a value - */ - case SIOCGMIIPHY: - case SIOCGMIIREG: - case SIOCSIFNAME: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; + /* + * These ioctl calls: + * - require superuser power. + * - require strict serialization. + * - return a value + */ + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSIFNAME: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + dev_load(net, ifr.ifr_name); + rtnl_lock(); + ret = dev_ifsioc(net, &ifr, cmd); + rtnl_unlock(); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; + } + return ret; - /* - * These ioctl calls: - * - require superuser power. - * - require strict serialization. - * - do not return a value - */ - case SIOCSIFFLAGS: - case SIOCSIFMETRIC: - case SIOCSIFMTU: - case SIOCSIFMAP: - case SIOCSIFHWADDR: - case SIOCSIFSLAVE: - case SIOCADDMULTI: - case SIOCDELMULTI: - case SIOCSIFHWBROADCAST: - case SIOCSIFTXQLEN: - case SIOCSMIIREG: - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - case SIOCBRADDIF: - case SIOCBRDELIF: - case SIOCSHWTSTAMP: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - /* fall through */ - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: + /* + * These ioctl calls: + * - require superuser power. + * - require strict serialization. + * - do not return a value + */ + case SIOCSIFFLAGS: + case SIOCSIFMETRIC: + case SIOCSIFMTU: + case SIOCSIFMAP: + case SIOCSIFHWADDR: + case SIOCSIFSLAVE: + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCSIFHWBROADCAST: + case SIOCSIFTXQLEN: + case SIOCSMIIREG: + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: + case SIOCBRADDIF: + case SIOCBRDELIF: + case SIOCSHWTSTAMP: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + /* fall through */ + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: + dev_load(net, ifr.ifr_name); + rtnl_lock(); + ret = dev_ifsioc(net, &ifr, cmd); + rtnl_unlock(); + return ret; + + case SIOCGIFMEM: + /* Get the per device memory space. We can add this but + * currently do not support it */ + case SIOCSIFMEM: + /* Set the per device memory buffer space. + * Not applicable in our case */ + case SIOCSIFLINK: + return -EINVAL; + + /* + * Unknown or private ioctl. + */ + default: + if (cmd == SIOCWANDEV || + (cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15)) { dev_load(net, ifr.ifr_name); rtnl_lock(); ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); + if (!ret && copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; return ret; - - case SIOCGIFMEM: - /* Get the per device memory space. We can add this but - * currently do not support it */ - case SIOCSIFMEM: - /* Set the per device memory buffer space. - * Not applicable in our case */ - case SIOCSIFLINK: - return -EINVAL; - - /* - * Unknown or private ioctl. - */ - default: - if (cmd == SIOCWANDEV || - (cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - if (!ret && copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - return ret; - } - /* Take care of Wireless Extensions */ - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) - return wext_handle_ioctl(net, &ifr, cmd, arg); - return -EINVAL; + } + /* Take care of Wireless Extensions */ + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) + return wext_handle_ioctl(net, &ifr, cmd, arg); + return -EINVAL; } } @@ -4583,59 +4769,80 @@ static void net_set_todo(struct net_device *dev) list_add_tail(&dev->todo_list, &net_todo_list); } -static void rollback_registered(struct net_device *dev) +static void rollback_registered_many(struct list_head *head) { + struct net_device *dev; + BUG_ON(dev_boot_phase); ASSERT_RTNL(); - /* Some devices call without registering for initialization unwind. */ - if (dev->reg_state == NETREG_UNINITIALIZED) { - printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " - "was registered\n", dev->name, dev); + list_for_each_entry(dev, head, unreg_list) { + /* Some devices call without registering + * for initialization unwind. + */ + if (dev->reg_state == NETREG_UNINITIALIZED) { + pr_debug("unregister_netdevice: device %s/%p never " + "was registered\n", dev->name, dev); - WARN_ON(1); - return; - } + WARN_ON(1); + return; + } - BUG_ON(dev->reg_state != NETREG_REGISTERED); + BUG_ON(dev->reg_state != NETREG_REGISTERED); - /* If device is running, close it first. */ - dev_close(dev); + /* If device is running, close it first. */ + dev_close(dev); - /* And unlink it from device chain. */ - unlist_netdevice(dev); + /* And unlink it from device chain. */ + unlist_netdevice(dev); - dev->reg_state = NETREG_UNREGISTERING; + dev->reg_state = NETREG_UNREGISTERING; + } synchronize_net(); - /* Shutdown queueing discipline. */ - dev_shutdown(dev); + list_for_each_entry(dev, head, unreg_list) { + /* Shutdown queueing discipline. */ + dev_shutdown(dev); - /* Notify protocols, that we are about to destroy - this device. They should clean all the things. - */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - /* - * Flush the unicast and multicast chains - */ - dev_unicast_flush(dev); - dev_addr_discard(dev); + /* + * Flush the unicast and multicast chains + */ + dev_unicast_flush(dev); + dev_addr_discard(dev); - if (dev->netdev_ops->ndo_uninit) - dev->netdev_ops->ndo_uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); - /* Notifier chain MUST detach us from master device. */ - WARN_ON(dev->master); + /* Notifier chain MUST detach us from master device. */ + WARN_ON(dev->master); - /* Remove entries from kobject tree */ - netdev_unregister_kobject(dev); + /* Remove entries from kobject tree */ + netdev_unregister_kobject(dev); + } + + /* Process any work delayed until the end of the batch */ + dev = list_entry(head->next, struct net_device, unreg_list); + call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); synchronize_net(); - dev_put(dev); + list_for_each_entry(dev, head, unreg_list) + dev_put(dev); +} + +static void rollback_registered(struct net_device *dev) +{ + LIST_HEAD(single); + + list_add(&dev->unreg_list, &single); + rollback_registered_many(&single); } static void __netdev_init_queue_locks_one(struct net_device *dev, @@ -4694,6 +4901,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) EXPORT_SYMBOL(netdev_fix_features); /** + * netif_stacked_transfer_operstate - transfer operstate + * @rootdev: the root or lower level device to transfer state from + * @dev: the device to transfer operstate to + * + * Transfer operational state from root to device. This is normally + * called when a stacking relationship exists between the root + * device and the device(a leaf device). + */ +void netif_stacked_transfer_operstate(const struct net_device *rootdev, + struct net_device *dev) +{ + if (rootdev->operstate == IF_OPER_DORMANT) + netif_dormant_on(dev); + else + netif_dormant_off(dev); + + if (netif_carrier_ok(rootdev)) { + if (!netif_carrier_ok(dev)) + netif_carrier_on(dev); + } else { + if (netif_carrier_ok(dev)) + netif_carrier_off(dev); + } +} +EXPORT_SYMBOL(netif_stacked_transfer_operstate); + +/** * register_netdevice - register a network device * @dev: device to register * @@ -4712,8 +4946,6 @@ EXPORT_SYMBOL(netdev_fix_features); int register_netdevice(struct net_device *dev) { - struct hlist_head *head; - struct hlist_node *p; int ret; struct net *net = dev_net(dev); @@ -4742,26 +4974,14 @@ int register_netdevice(struct net_device *dev) } } - if (!dev_valid_name(dev->name)) { - ret = -EINVAL; + ret = dev_get_valid_name(net, dev->name, dev->name, 0); + if (ret) goto err_uninit; - } dev->ifindex = dev_new_index(net); if (dev->iflink == -1) dev->iflink = dev->ifindex; - /* Check for existence of name */ - head = dev_name_hash(net, dev->name); - hlist_for_each(p, head) { - struct net_device *d - = hlist_entry(p, struct net_device, name_hlist); - if (!strncmp(d->name, dev->name, IFNAMSIZ)) { - ret = -EEXIST; - goto err_uninit; - } - } - /* Fix illegal checksum combinations */ if ((dev->features & NETIF_F_HW_CSUM) && (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { @@ -4784,6 +5004,12 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_GSO; netdev_initialize_kobject(dev); + + ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); + ret = notifier_to_errno(ret); + if (ret) + goto err_uninit; + ret = netdev_register_kobject(dev); if (ret) goto err_uninit; @@ -4816,6 +5042,7 @@ err_uninit: dev->netdev_ops->ndo_uninit(dev); goto out; } +EXPORT_SYMBOL(register_netdevice); /** * init_dummy_netdev - init a dummy network device for NAPI @@ -4907,6 +5134,8 @@ static void netdev_wait_allrefs(struct net_device *dev) { unsigned long rebroadcast_time, warning_time; + linkwatch_forget_dev(dev); + rebroadcast_time = warning_time = jiffies; while (atomic_read(&dev->refcnt) != 0) { if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { @@ -4914,6 +5143,8 @@ static void netdev_wait_allrefs(struct net_device *dev) /* Rebroadcast unregister notification */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users + * should have already handle it the first time */ if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { @@ -5009,6 +5240,32 @@ void netdev_run_todo(void) } /** + * dev_txq_stats_fold - fold tx_queues stats + * @dev: device to get statistics from + * @stats: struct net_device_stats to hold results + */ +void dev_txq_stats_fold(const struct net_device *dev, + struct net_device_stats *stats) +{ + unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; + unsigned int i; + struct netdev_queue *txq; + + for (i = 0; i < dev->num_tx_queues; i++) { + txq = netdev_get_tx_queue(dev, i); + tx_bytes += txq->tx_bytes; + tx_packets += txq->tx_packets; + tx_dropped += txq->tx_dropped; + } + if (tx_bytes || tx_packets || tx_dropped) { + stats->tx_bytes = tx_bytes; + stats->tx_packets = tx_packets; + stats->tx_dropped = tx_dropped; + } +} +EXPORT_SYMBOL(dev_txq_stats_fold); + +/** * dev_get_stats - get network device statistics * @dev: device to get statistics from * @@ -5022,25 +5279,9 @@ const struct net_device_stats *dev_get_stats(struct net_device *dev) if (ops->ndo_get_stats) return ops->ndo_get_stats(dev); - else { - unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; - struct net_device_stats *stats = &dev->stats; - unsigned int i; - struct netdev_queue *txq; - - for (i = 0; i < dev->num_tx_queues; i++) { - txq = netdev_get_tx_queue(dev, i); - tx_bytes += txq->tx_bytes; - tx_packets += txq->tx_packets; - tx_dropped += txq->tx_dropped; - } - if (tx_bytes || tx_packets || tx_dropped) { - stats->tx_bytes = tx_bytes; - stats->tx_packets = tx_packets; - stats->tx_dropped = tx_dropped; - } - return stats; - } + + dev_txq_stats_fold(dev, &dev->stats); + return &dev->stats; } EXPORT_SYMBOL(dev_get_stats); @@ -5120,6 +5361,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, netdev_init_queues(dev); INIT_LIST_HEAD(&dev->napi_list); + INIT_LIST_HEAD(&dev->unreg_list); + INIT_LIST_HEAD(&dev->link_watch_list); dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); strcpy(dev->name, name); @@ -5168,6 +5411,7 @@ void free_netdev(struct net_device *dev) /* will free via device release */ put_device(&dev->dev); } +EXPORT_SYMBOL(free_netdev); /** * synchronize_net - Synchronize with packet receive processing @@ -5180,26 +5424,50 @@ void synchronize_net(void) might_sleep(); synchronize_rcu(); } +EXPORT_SYMBOL(synchronize_net); /** - * unregister_netdevice - remove device from the kernel + * unregister_netdevice_queue - remove device from the kernel * @dev: device + * @head: list * * This function shuts down a device interface and removes it * from the kernel tables. + * If head not NULL, device is queued to be unregistered later. * * Callers must hold the rtnl semaphore. You may want * unregister_netdev() instead of this. */ -void unregister_netdevice(struct net_device *dev) +void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) { ASSERT_RTNL(); - rollback_registered(dev); - /* Finish processing unregister after unlock */ - net_set_todo(dev); + if (head) { + list_move_tail(&dev->unreg_list, head); + } else { + rollback_registered(dev); + /* Finish processing unregister after unlock */ + net_set_todo(dev); + } } +EXPORT_SYMBOL(unregister_netdevice_queue); + +/** + * unregister_netdevice_many - unregister many devices + * @head: list of devices + */ +void unregister_netdevice_many(struct list_head *head) +{ + struct net_device *dev; + + if (!list_empty(head)) { + rollback_registered_many(head); + list_for_each_entry(dev, head, unreg_list) + net_set_todo(dev); + } +} +EXPORT_SYMBOL(unregister_netdevice_many); /** * unregister_netdev - remove device from the kernel @@ -5218,7 +5486,6 @@ void unregister_netdev(struct net_device *dev) unregister_netdevice(dev); rtnl_unlock(); } - EXPORT_SYMBOL(unregister_netdev); /** @@ -5237,8 +5504,6 @@ EXPORT_SYMBOL(unregister_netdev); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) { - char buf[IFNAMSIZ]; - const char *destname; int err; ASSERT_RTNL(); @@ -5271,20 +5536,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char * we can use it in the destination network namespace. */ err = -EEXIST; - destname = dev->name; - if (__dev_get_by_name(net, destname)) { + if (__dev_get_by_name(net, dev->name)) { /* We get here if we can't use the current device name */ if (!pat) goto out; - if (!dev_valid_name(pat)) - goto out; - if (strchr(pat, '%')) { - if (__dev_alloc_name(net, pat, buf) < 0) - goto out; - destname = buf; - } else - destname = pat; - if (__dev_get_by_name(net, destname)) + if (dev_get_valid_name(net, pat, dev->name, 1)) goto out; } @@ -5308,6 +5564,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char this device. They should clean all the things. */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); /* * Flush the unicast and multicast chains @@ -5320,10 +5577,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Actually switch the network namespace */ dev_net_set(dev, net); - /* Assign the new device name */ - if (destname != dev->name) - strcpy(dev->name, destname); - /* If there is an ifindex conflict assign a new one */ if (__dev_get_by_index(net, dev->ifindex)) { int iflink = (dev->iflink == dev->ifindex); @@ -5347,6 +5600,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char out: return err; } +EXPORT_SYMBOL_GPL(dev_change_net_namespace); static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, @@ -5407,7 +5661,7 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one, unsigned long mask) { /* If device needs checksumming, downgrade to it. */ - if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) + if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); else if (mask & NETIF_F_ALL_CSUM) { /* If one device supports v4/v6 checksumming, set for all. */ @@ -5427,7 +5681,7 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one, one |= NETIF_F_ALL_CSUM; one |= all & NETIF_F_ONE_FOR_ALL; - all &= one | NETIF_F_LLTX | NETIF_F_GSO; + all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO; all |= one & mask & NETIF_F_ONE_FOR_ALL; return all; @@ -5509,14 +5763,13 @@ static struct pernet_operations __net_initdata netdev_net_ops = { static void __net_exit default_device_exit(struct net *net) { - struct net_device *dev; + struct net_device *dev, *aux; /* - * Push all migratable of the network devices back to the + * Push all migratable network devices back to the * initial network namespace */ rtnl_lock(); -restart: - for_each_netdev(net, dev) { + for_each_netdev_safe(net, dev, aux) { int err; char fb_name[IFNAMSIZ]; @@ -5524,11 +5777,9 @@ restart: if (dev->features & NETIF_F_NETNS_LOCAL) continue; - /* Delete virtual devices */ - if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { - dev->rtnl_link_ops->dellink(dev); - goto restart; - } + /* Leave virtual devices for the generic cleanup */ + if (dev->rtnl_link_ops) + continue; /* Push remaing network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); @@ -5538,13 +5789,37 @@ restart: __func__, dev->name, err); BUG(); } - goto restart; } rtnl_unlock(); } +static void __net_exit default_device_exit_batch(struct list_head *net_list) +{ + /* At exit all network devices most be removed from a network + * namespace. Do this in the reverse order of registeration. + * Do this across as many network namespaces as possible to + * improve batching efficiency. + */ + struct net_device *dev; + struct net *net; + LIST_HEAD(dev_kill_list); + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { + for_each_netdev_reverse(net, dev) { + if (dev->rtnl_link_ops) + dev->rtnl_link_ops->dellink(dev, &dev_kill_list); + else + unregister_netdevice_queue(dev, &dev_kill_list); + } + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); +} + static struct pernet_operations __net_initdata default_device_ops = { .exit = default_device_exit, + .exit_batch = default_device_exit_batch, }; /* @@ -5633,41 +5908,3 @@ static int __init initialize_hashrnd(void) late_initcall_sync(initialize_hashrnd); -EXPORT_SYMBOL(__dev_get_by_index); -EXPORT_SYMBOL(__dev_get_by_name); -EXPORT_SYMBOL(__dev_remove_pack); -EXPORT_SYMBOL(dev_valid_name); -EXPORT_SYMBOL(dev_add_pack); -EXPORT_SYMBOL(dev_alloc_name); -EXPORT_SYMBOL(dev_close); -EXPORT_SYMBOL(dev_get_by_flags); -EXPORT_SYMBOL(dev_get_by_index); -EXPORT_SYMBOL(dev_get_by_name); -EXPORT_SYMBOL(dev_open); -EXPORT_SYMBOL(dev_queue_xmit); -EXPORT_SYMBOL(dev_remove_pack); -EXPORT_SYMBOL(dev_set_allmulti); -EXPORT_SYMBOL(dev_set_promiscuity); -EXPORT_SYMBOL(dev_change_flags); -EXPORT_SYMBOL(dev_set_mtu); -EXPORT_SYMBOL(dev_set_mac_address); -EXPORT_SYMBOL(free_netdev); -EXPORT_SYMBOL(netdev_boot_setup_check); -EXPORT_SYMBOL(netdev_set_master); -EXPORT_SYMBOL(netdev_state_change); -EXPORT_SYMBOL(netif_receive_skb); -EXPORT_SYMBOL(netif_rx); -EXPORT_SYMBOL(register_gifconf); -EXPORT_SYMBOL(register_netdevice); -EXPORT_SYMBOL(register_netdevice_notifier); -EXPORT_SYMBOL(skb_checksum_help); -EXPORT_SYMBOL(synchronize_net); -EXPORT_SYMBOL(unregister_netdevice); -EXPORT_SYMBOL(unregister_netdevice_notifier); -EXPORT_SYMBOL(net_enable_timestamp); -EXPORT_SYMBOL(net_disable_timestamp); -EXPORT_SYMBOL(dev_get_flags); - -EXPORT_SYMBOL(dev_load); - -EXPORT_PER_CPU_SYMBOL(softnet_data); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 9d66fa953ab7..b8e9d3a86887 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -41,7 +41,7 @@ static void send_dm_alert(struct work_struct *unused); * netlink alerts */ static int trace_state = TRACE_OFF; -static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(trace_state_lock); struct per_cpu_dm_data { struct work_struct dm_alert_work; @@ -52,6 +52,7 @@ struct per_cpu_dm_data { struct dm_hw_stat_delta { struct net_device *dev; + unsigned long last_rx; struct list_head list; struct rcu_head rcu; unsigned long last_drop_val; @@ -180,17 +181,25 @@ static void trace_napi_poll_hit(struct napi_struct *napi) struct dm_hw_stat_delta *new_stat; /* - * Ratelimit our check time to dm_hw_check_delta jiffies + * Don't check napi structures with no associated device */ - if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta)) + if (!napi->dev) return; rcu_read_lock(); list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { + /* + * only add a note to our monitor buffer if: + * 1) this is the dev we received on + * 2) its after the last_rx delta + * 3) our rx_dropped count has gone up + */ if ((new_stat->dev == napi->dev) && + (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) && (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { trace_drop_common(NULL, NULL); new_stat->last_drop_val = napi->dev->stats.rx_dropped; + new_stat->last_rx = jiffies; break; } } @@ -286,6 +295,7 @@ static int dropmon_net_event(struct notifier_block *ev_block, goto out; new_stat->dev = dev; + new_stat->last_rx = jiffies; INIT_RCU_HEAD(&new_stat->rcu); spin_lock(&trace_state_lock); list_add_rcu(&new_stat->list, &hw_stats_list); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d9d5160610d5..d8aee584e8d1 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -30,10 +30,17 @@ u32 ethtool_op_get_link(struct net_device *dev) return netif_carrier_ok(dev) ? 1 : 0; } +u32 ethtool_op_get_rx_csum(struct net_device *dev) +{ + return (dev->features & NETIF_F_ALL_CSUM) != 0; +} +EXPORT_SYMBOL(ethtool_op_get_rx_csum); + u32 ethtool_op_get_tx_csum(struct net_device *dev) { return (dev->features & NETIF_F_ALL_CSUM) != 0; } +EXPORT_SYMBOL(ethtool_op_get_tx_csum); int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) { @@ -191,13 +198,6 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); if (rc >= 0) info.n_priv_flags = rc; - } else { - /* code path for obsolete hooks */ - - if (ops->self_test_count) - info.testinfo_len = ops->self_test_count(dev); - if (ops->get_stats_count) - info.n_stats = ops->get_stats_count(dev); } if (ops->get_regs_len) info.regdump_len = ops->get_regs_len(dev); @@ -302,6 +302,26 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) return ret; } +static int ethtool_reset(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value reset; + int ret; + + if (!dev->ethtool_ops->reset) + return -EOPNOTSUPP; + + if (copy_from_user(&reset, useraddr, sizeof(reset))) + return -EFAULT; + + ret = dev->ethtool_ops->reset(dev, &reset.data); + if (ret) + return ret; + + if (copy_to_user(useraddr, &reset, sizeof(reset))) + return -EFAULT; + return 0; +} + static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) { struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; @@ -677,16 +697,10 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) u64 *data; int ret, test_len; - if (!ops->self_test) - return -EOPNOTSUPP; - if (!ops->get_sset_count && !ops->self_test_count) + if (!ops->self_test || !ops->get_sset_count) return -EOPNOTSUPP; - if (ops->get_sset_count) - test_len = ops->get_sset_count(dev, ETH_SS_TEST); - else - /* code path for obsolete hook */ - test_len = ops->self_test_count(dev); + test_len = ops->get_sset_count(dev, ETH_SS_TEST); if (test_len < 0) return test_len; WARN_ON(test_len == 0); @@ -721,36 +735,17 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) u8 *data; int ret; - if (!ops->get_strings) + if (!ops->get_strings || !ops->get_sset_count) return -EOPNOTSUPP; if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) return -EFAULT; - if (ops->get_sset_count) { - ret = ops->get_sset_count(dev, gstrings.string_set); - if (ret < 0) - return ret; - - gstrings.len = ret; - } else { - /* code path for obsolete hooks */ - - switch (gstrings.string_set) { - case ETH_SS_TEST: - if (!ops->self_test_count) - return -EOPNOTSUPP; - gstrings.len = ops->self_test_count(dev); - break; - case ETH_SS_STATS: - if (!ops->get_stats_count) - return -EOPNOTSUPP; - gstrings.len = ops->get_stats_count(dev); - break; - default: - return -EINVAL; - } - } + ret = ops->get_sset_count(dev, gstrings.string_set); + if (ret < 0) + return ret; + + gstrings.len = ret; data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); if (!data) @@ -791,16 +786,10 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) u64 *data; int ret, n_stats; - if (!ops->get_ethtool_stats) - return -EOPNOTSUPP; - if (!ops->get_sset_count && !ops->get_stats_count) + if (!ops->get_ethtool_stats || !ops->get_sset_count) return -EOPNOTSUPP; - if (ops->get_sset_count) - n_stats = ops->get_sset_count(dev, ETH_SS_STATS); - else - /* code path for obsolete hook */ - n_stats = ops->get_stats_count(dev); + n_stats = ops->get_sset_count(dev, ETH_SS_STATS); if (n_stats < 0) return n_stats; WARN_ON(n_stats == 0); @@ -891,6 +880,19 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr, return actor(dev, edata.data); } +static int ethtool_flash_device(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_flash efl; + + if (copy_from_user(&efl, useraddr, sizeof(efl))) + return -EFAULT; + + if (!dev->ethtool_ops->flash_device) + return -EOPNOTSUPP; + + return dev->ethtool_ops->flash_device(dev, &efl); +} + /* The main entry point in this file. Called from net/core/dev.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) @@ -1004,7 +1006,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) break; case ETHTOOL_GRXCSUM: rc = ethtool_get_value(dev, useraddr, ethcmd, - dev->ethtool_ops->get_rx_csum); + (dev->ethtool_ops->get_rx_csum ? + dev->ethtool_ops->get_rx_csum : + ethtool_op_get_rx_csum)); break; case ETHTOOL_SRXCSUM: rc = ethtool_set_rx_csum(dev, useraddr); @@ -1068,7 +1072,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) break; case ETHTOOL_GFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, - dev->ethtool_ops->get_flags); + (dev->ethtool_ops->get_flags ? + dev->ethtool_ops->get_flags : + ethtool_op_get_flags)); break; case ETHTOOL_SFLAGS: rc = ethtool_set_value(dev, useraddr, @@ -1100,6 +1106,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SGRO: rc = ethtool_set_gro(dev, useraddr); break; + case ETHTOOL_FLASHDEV: + rc = ethtool_flash_device(dev, useraddr); + break; + case ETHTOOL_RESET: + rc = ethtool_reset(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } @@ -1116,7 +1128,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) EXPORT_SYMBOL(ethtool_op_get_link); EXPORT_SYMBOL(ethtool_op_get_sg); EXPORT_SYMBOL(ethtool_op_get_tso); -EXPORT_SYMBOL(ethtool_op_get_tx_csum); EXPORT_SYMBOL(ethtool_op_set_sg); EXPORT_SYMBOL(ethtool_op_set_tso); EXPORT_SYMBOL(ethtool_op_set_tx_csum); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index bd309384f8b8..02a3b2c69c1e 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -72,7 +72,7 @@ static void flush_route_cache(struct fib_rules_ops *ops) ops->flush_cache(ops); } -int fib_rules_register(struct fib_rules_ops *ops) +static int __fib_rules_register(struct fib_rules_ops *ops) { int err = -EEXIST; struct fib_rules_ops *o; @@ -102,6 +102,28 @@ errout: return err; } +struct fib_rules_ops * +fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) +{ + struct fib_rules_ops *ops; + int err; + + ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL); + if (ops == NULL) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&ops->rules_list); + ops->fro_net = net; + + err = __fib_rules_register(ops); + if (err) { + kfree(ops); + ops = ERR_PTR(err); + } + + return ops; +} + EXPORT_SYMBOL_GPL(fib_rules_register); void fib_rules_cleanup_ops(struct fib_rules_ops *ops) @@ -115,6 +137,15 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops); +static void fib_rules_put_rcu(struct rcu_head *head) +{ + struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu); + struct net *net = ops->fro_net; + + release_net(net); + kfree(ops); +} + void fib_rules_unregister(struct fib_rules_ops *ops) { struct net *net = ops->fro_net; @@ -124,8 +155,7 @@ void fib_rules_unregister(struct fib_rules_ops *ops) fib_rules_cleanup_ops(ops); spin_unlock(&net->rules_mod_lock); - synchronize_rcu(); - release_net(net); + call_rcu(&ops->rcu, fib_rules_put_rcu); } EXPORT_SYMBOL_GPL(fib_rules_unregister); @@ -135,7 +165,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->ifindex && (rule->ifindex != fl->iif)) + if (rule->iifindex && (rule->iifindex != fl->iif)) + goto out; + + if (rule->oifindex && (rule->oifindex != fl->oif)) goto out; if ((rule->mark ^ fl->mark) & rule->mark_mask) @@ -248,14 +281,24 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (tb[FRA_PRIORITY]) rule->pref = nla_get_u32(tb[FRA_PRIORITY]); - if (tb[FRA_IFNAME]) { + if (tb[FRA_IIFNAME]) { + struct net_device *dev; + + rule->iifindex = -1; + nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); + dev = __dev_get_by_name(net, rule->iifname); + if (dev) + rule->iifindex = dev->ifindex; + } + + if (tb[FRA_OIFNAME]) { struct net_device *dev; - rule->ifindex = -1; - nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, rule->ifname); + rule->oifindex = -1; + nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); + dev = __dev_get_by_name(net, rule->oifname); if (dev) - rule->ifindex = dev->ifindex; + rule->oifindex = dev->ifindex; } if (tb[FRA_FWMARK]) { @@ -274,7 +317,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->flags = frh->flags; rule->table = frh_get_table(frh, tb); - if (!rule->pref && ops->default_pref) + if (!tb[FRA_PRIORITY] && ops->default_pref) rule->pref = ops->default_pref(ops); err = -EINVAL; @@ -388,8 +431,12 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) continue; - if (tb[FRA_IFNAME] && - nla_strcmp(tb[FRA_IFNAME], rule->ifname)) + if (tb[FRA_IIFNAME] && + nla_strcmp(tb[FRA_IIFNAME], rule->iifname)) + continue; + + if (tb[FRA_OIFNAME] && + nla_strcmp(tb[FRA_OIFNAME], rule->oifname)) continue; if (tb[FRA_FWMARK] && @@ -447,7 +494,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, struct fib_rule *rule) { size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)) - + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */ + + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */ + + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */ + nla_total_size(4) /* FRA_PRIORITY */ + nla_total_size(4) /* FRA_TABLE */ + nla_total_size(4) /* FRA_FWMARK */ @@ -481,11 +529,18 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL) frh->flags |= FIB_RULE_UNRESOLVED; - if (rule->ifname[0]) { - NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); + if (rule->iifname[0]) { + NLA_PUT_STRING(skb, FRA_IIFNAME, rule->iifname); - if (rule->ifindex == -1) - frh->flags |= FIB_RULE_DEV_DETACHED; + if (rule->iifindex == -1) + frh->flags |= FIB_RULE_IIF_DETACHED; + } + + if (rule->oifname[0]) { + NLA_PUT_STRING(skb, FRA_OIFNAME, rule->oifname); + + if (rule->oifindex == -1) + frh->flags |= FIB_RULE_OIF_DETACHED; } if (rule->pref) @@ -600,9 +655,12 @@ static void attach_rules(struct list_head *rules, struct net_device *dev) struct fib_rule *rule; list_for_each_entry(rule, rules, list) { - if (rule->ifindex == -1 && - strcmp(dev->name, rule->ifname) == 0) - rule->ifindex = dev->ifindex; + if (rule->iifindex == -1 && + strcmp(dev->name, rule->iifname) == 0) + rule->iifindex = dev->ifindex; + if (rule->oifindex == -1 && + strcmp(dev->name, rule->oifname) == 0) + rule->oifindex = dev->ifindex; } } @@ -610,9 +668,12 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) { struct fib_rule *rule; - list_for_each_entry(rule, rules, list) - if (rule->ifindex == dev->ifindex) - rule->ifindex = -1; + list_for_each_entry(rule, rules, list) { + if (rule->iifindex == dev->ifindex) + rule->iifindex = -1; + if (rule->oifindex == dev->ifindex) + rule->oifindex = -1; + } } diff --git a/net/core/filter.c b/net/core/filter.c index d1d779ca096d..08db7b9143a3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -303,6 +303,12 @@ load_b: case SKF_AD_IFINDEX: A = skb->dev->ifindex; continue; + case SKF_AD_MARK: + A = skb->mark; + continue; + case SKF_AD_QUEUE: + A = skb->queue_mapping; + continue; case SKF_AD_NLATTR: { struct nlattr *nla; diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 8569310268ab..393b1d8618e2 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -127,6 +127,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b) /** * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV * @d: dumping handle + * @b: basic statistics * @r: rate estimator statistics * * Appends the rate estimator statistics to the top level TLV created by @@ -136,8 +137,13 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b) * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r) +gnet_stats_copy_rate_est(struct gnet_dump *d, + const struct gnet_stats_basic_packed *b, + struct gnet_stats_rate_est *r) { + if (b && !gen_estimator_active(b, r)) + return 0; + if (d->compat_tc_stats) { d->tc_stats.bps = r->bps; d->tc_stats.pps = r->pps; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index bf8f7af699d7..5910b555a54a 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -35,7 +35,7 @@ static unsigned long linkwatch_nextevent; static void linkwatch_event(struct work_struct *dummy); static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); -static struct net_device *lweventlist; +static LIST_HEAD(lweventlist); static DEFINE_SPINLOCK(lweventlist_lock); static unsigned char default_operstate(const struct net_device *dev) @@ -89,8 +89,10 @@ static void linkwatch_add_event(struct net_device *dev) unsigned long flags; spin_lock_irqsave(&lweventlist_lock, flags); - dev->link_watch_next = lweventlist; - lweventlist = dev; + if (list_empty(&dev->link_watch_list)) { + list_add_tail(&dev->link_watch_list, &lweventlist); + dev_hold(dev); + } spin_unlock_irqrestore(&lweventlist_lock, flags); } @@ -133,9 +135,35 @@ static void linkwatch_schedule_work(int urgent) } +static void linkwatch_do_dev(struct net_device *dev) +{ + /* + * Make sure the above read is complete since it can be + * rewritten as soon as we clear the bit below. + */ + smp_mb__before_clear_bit(); + + /* We are about to handle this device, + * so new events can be accepted + */ + clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); + + rfc2863_policy(dev); + if (dev->flags & IFF_UP) { + if (netif_carrier_ok(dev)) + dev_activate(dev); + else + dev_deactivate(dev); + + netdev_state_change(dev); + } + dev_put(dev); +} + static void __linkwatch_run_queue(int urgent_only) { - struct net_device *next; + struct net_device *dev; + LIST_HEAD(wrk); /* * Limit the number of linkwatch events to one @@ -153,46 +181,40 @@ static void __linkwatch_run_queue(int urgent_only) clear_bit(LW_URGENT, &linkwatch_flags); spin_lock_irq(&lweventlist_lock); - next = lweventlist; - lweventlist = NULL; - spin_unlock_irq(&lweventlist_lock); + list_splice_init(&lweventlist, &wrk); - while (next) { - struct net_device *dev = next; + while (!list_empty(&wrk)) { - next = dev->link_watch_next; + dev = list_first_entry(&wrk, struct net_device, link_watch_list); + list_del_init(&dev->link_watch_list); if (urgent_only && !linkwatch_urgent_event(dev)) { - linkwatch_add_event(dev); + list_add_tail(&dev->link_watch_list, &lweventlist); continue; } - - /* - * Make sure the above read is complete since it can be - * rewritten as soon as we clear the bit below. - */ - smp_mb__before_clear_bit(); - - /* We are about to handle this device, - * so new events can be accepted - */ - clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); - - rfc2863_policy(dev); - if (dev->flags & IFF_UP) { - if (netif_carrier_ok(dev)) - dev_activate(dev); - else - dev_deactivate(dev); - - netdev_state_change(dev); - } - - dev_put(dev); + spin_unlock_irq(&lweventlist_lock); + linkwatch_do_dev(dev); + spin_lock_irq(&lweventlist_lock); } - if (lweventlist) + if (!list_empty(&lweventlist)) linkwatch_schedule_work(0); + spin_unlock_irq(&lweventlist_lock); +} + +void linkwatch_forget_dev(struct net_device *dev) +{ + unsigned long flags; + int clean = 0; + + spin_lock_irqsave(&lweventlist_lock, flags); + if (!list_empty(&dev->link_watch_list)) { + list_del_init(&dev->link_watch_list); + clean = 1; + } + spin_unlock_irqrestore(&lweventlist_lock, flags); + if (clean) + linkwatch_do_dev(dev); } @@ -216,8 +238,6 @@ void linkwatch_fire_event(struct net_device *dev) bool urgent = linkwatch_urgent_event(dev); if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { - dev_hold(dev); - linkwatch_add_event(dev); } else if (!urgent) return; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 163b4f5b0365..f35377b643e4 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -692,75 +692,74 @@ static void neigh_connect(struct neighbour *neigh) hh->hh_output = neigh->ops->hh_output; } -static void neigh_periodic_timer(unsigned long arg) +static void neigh_periodic_work(struct work_struct *work) { - struct neigh_table *tbl = (struct neigh_table *)arg; + struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); struct neighbour *n, **np; - unsigned long expire, now = jiffies; + unsigned int i; NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); - write_lock(&tbl->lock); + write_lock_bh(&tbl->lock); /* * periodically recompute ReachableTime from random function */ - if (time_after(now, tbl->last_rand + 300 * HZ)) { + if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { struct neigh_parms *p; - tbl->last_rand = now; + tbl->last_rand = jiffies; for (p = &tbl->parms; p; p = p->next) p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); } - np = &tbl->hash_buckets[tbl->hash_chain_gc]; - tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask); + for (i = 0 ; i <= tbl->hash_mask; i++) { + np = &tbl->hash_buckets[i]; - while ((n = *np) != NULL) { - unsigned int state; + while ((n = *np) != NULL) { + unsigned int state; - write_lock(&n->lock); + write_lock(&n->lock); - state = n->nud_state; - if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { - write_unlock(&n->lock); - goto next_elt; - } + state = n->nud_state; + if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { + write_unlock(&n->lock); + goto next_elt; + } - if (time_before(n->used, n->confirmed)) - n->used = n->confirmed; + if (time_before(n->used, n->confirmed)) + n->used = n->confirmed; - if (atomic_read(&n->refcnt) == 1 && - (state == NUD_FAILED || - time_after(now, n->used + n->parms->gc_staletime))) { - *np = n->next; - n->dead = 1; + if (atomic_read(&n->refcnt) == 1 && + (state == NUD_FAILED || + time_after(jiffies, n->used + n->parms->gc_staletime))) { + *np = n->next; + n->dead = 1; + write_unlock(&n->lock); + neigh_cleanup_and_release(n); + continue; + } write_unlock(&n->lock); - neigh_cleanup_and_release(n); - continue; - } - write_unlock(&n->lock); next_elt: - np = &n->next; + np = &n->next; + } + /* + * It's fine to release lock here, even if hash table + * grows while we are preempted. + */ + write_unlock_bh(&tbl->lock); + cond_resched(); + write_lock_bh(&tbl->lock); } - /* Cycle through all hash buckets every base_reachable_time/2 ticks. * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 * base_reachable_time. */ - expire = tbl->parms.base_reachable_time >> 1; - expire /= (tbl->hash_mask + 1); - if (!expire) - expire = 1; - - if (expire>HZ) - mod_timer(&tbl->gc_timer, round_jiffies(now + expire)); - else - mod_timer(&tbl->gc_timer, now + expire); - - write_unlock(&tbl->lock); + schedule_delayed_work(&tbl->gc_work, + tbl->parms.base_reachable_time >> 1); + write_unlock_bh(&tbl->lock); } static __inline__ int neigh_max_probes(struct neighbour *n) @@ -1316,7 +1315,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, } EXPORT_SYMBOL(pneigh_enqueue); -static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, +static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, struct net *net, int ifindex) { struct neigh_parms *p; @@ -1337,7 +1336,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct net *net = dev_net(dev); const struct net_device_ops *ops = dev->netdev_ops; - ref = lookup_neigh_params(tbl, net, 0); + ref = lookup_neigh_parms(tbl, net, 0); if (!ref) return NULL; @@ -1442,10 +1441,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); rwlock_init(&tbl->lock); - setup_timer(&tbl->gc_timer, neigh_periodic_timer, (unsigned long)tbl); - tbl->gc_timer.expires = now + 1; - add_timer(&tbl->gc_timer); - + INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work); + schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); skb_queue_head_init_class(&tbl->proxy_queue, &neigh_table_proxy_queue_class); @@ -1482,7 +1479,8 @@ int neigh_table_clear(struct neigh_table *tbl) struct neigh_table **tp; /* It is not clean... Fix it to unload IPv6 module safely */ - del_timer_sync(&tbl->gc_timer); + cancel_delayed_work(&tbl->gc_work); + flush_scheduled_work(); del_timer_sync(&tbl->proxy_timer); pneigh_queue_purge(&tbl->proxy_queue); neigh_ifdown(tbl, NULL); @@ -1752,7 +1750,6 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, .ndtc_last_rand = jiffies_to_msecs(rand_delta), .ndtc_hash_rnd = tbl->hash_rnd, .ndtc_hash_mask = tbl->hash_mask, - .ndtc_hash_chain_gc = tbl->hash_chain_gc, .ndtc_proxy_qlen = tbl->proxy_queue.qlen, }; @@ -1906,7 +1903,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (tbp[NDTPA_IFINDEX]) ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); - p = lookup_neigh_params(tbl, net, ifindex); + p = lookup_neigh_parms(tbl, net, ifindex); if (p == NULL) { err = -ENOENT; goto errout_tbl_lock; @@ -2095,7 +2092,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (h > s_h) s_idx = 0; for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { - if (dev_net(n->dev) != net) + if (!net_eq(dev_net(n->dev), net)) continue; if (idx < s_idx) goto next; @@ -2569,21 +2566,18 @@ static struct neigh_sysctl_table { } neigh_sysctl_template __read_mostly = { .neigh_vars = { { - .ctl_name = NET_NEIGH_MCAST_SOLICIT, .procname = "mcast_solicit", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_UCAST_SOLICIT, .procname = "ucast_solicit", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_APP_SOLICIT, .procname = "app_solicit", .maxlen = sizeof(int), .mode = 0644, @@ -2596,38 +2590,30 @@ static struct neigh_sysctl_table { .proc_handler = proc_dointvec_userhz_jiffies, }, { - .ctl_name = NET_NEIGH_REACHABLE_TIME, .procname = "base_reachable_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_NEIGH_DELAY_PROBE_TIME, .procname = "delay_first_probe_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_NEIGH_GC_STALE_TIME, .procname = "gc_stale_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_NEIGH_UNRES_QLEN, .procname = "unres_qlen", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_PROXY_QLEN, .procname = "proxy_qlen", .maxlen = sizeof(int), .mode = 0644, @@ -2652,45 +2638,36 @@ static struct neigh_sysctl_table { .proc_handler = proc_dointvec_userhz_jiffies, }, { - .ctl_name = NET_NEIGH_RETRANS_TIME_MS, .procname = "retrans_time_ms", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies, }, { - .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, .procname = "base_reachable_time_ms", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies, }, { - .ctl_name = NET_NEIGH_GC_INTERVAL, .procname = "gc_interval", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_NEIGH_GC_THRESH1, .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_GC_THRESH2, .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_GC_THRESH3, .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, @@ -2702,7 +2679,7 @@ static struct neigh_sysctl_table { int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, int p_id, int pdev_id, char *p_name, - proc_handler *handler, ctl_handler *strategy) + proc_handler *handler) { struct neigh_sysctl_table *t; const char *dev_name_source = NULL; @@ -2713,10 +2690,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, #define NEIGH_CTL_PATH_DEV 3 struct ctl_path neigh_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "proto", .ctl_name = 0, }, - { .procname = "neigh", .ctl_name = 0, }, - { .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, }, + { .procname = "net", }, + { .procname = "proto", }, + { .procname = "neigh", }, + { .procname = "default", }, { }, }; @@ -2741,7 +2718,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, if (dev) { dev_name_source = dev->name; - neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex; /* Terminate the table early */ memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); } else { @@ -2753,31 +2729,19 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, } - if (handler || strategy) { + if (handler) { /* RetransTime */ t->neigh_vars[3].proc_handler = handler; - t->neigh_vars[3].strategy = strategy; t->neigh_vars[3].extra1 = dev; - if (!strategy) - t->neigh_vars[3].ctl_name = CTL_UNNUMBERED; /* ReachableTime */ t->neigh_vars[4].proc_handler = handler; - t->neigh_vars[4].strategy = strategy; t->neigh_vars[4].extra1 = dev; - if (!strategy) - t->neigh_vars[4].ctl_name = CTL_UNNUMBERED; /* RetransTime (in milliseconds)*/ t->neigh_vars[12].proc_handler = handler; - t->neigh_vars[12].strategy = strategy; t->neigh_vars[12].extra1 = dev; - if (!strategy) - t->neigh_vars[12].ctl_name = CTL_UNNUMBERED; /* ReachableTime (in milliseconds) */ t->neigh_vars[13].proc_handler = handler; - t->neigh_vars[13].strategy = strategy; t->neigh_vars[13].extra1 = dev; - if (!strategy) - t->neigh_vars[13].ctl_name = CTL_UNNUMBERED; } t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); @@ -2785,9 +2749,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, goto free; neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name; - neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id; neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name; - neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id; t->sysctl_header = register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 3994680c08b9..fbc1c7472c5e 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -16,7 +16,7 @@ #include <net/sock.h> #include <linux/rtnetlink.h> #include <linux/wireless.h> -#include <net/iw_handler.h> +#include <net/wext.h> #include "net-sysfs.h" @@ -130,6 +130,48 @@ static ssize_t show_carrier(struct device *dev, return -EINVAL; } +static ssize_t show_speed(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + int ret = -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (netif_running(netdev) && + netdev->ethtool_ops && + netdev->ethtool_ops->get_settings) { + struct ethtool_cmd cmd = { ETHTOOL_GSET }; + + if (!netdev->ethtool_ops->get_settings(netdev, &cmd)) + ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd)); + } + rtnl_unlock(); + return ret; +} + +static ssize_t show_duplex(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + int ret = -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (netif_running(netdev) && + netdev->ethtool_ops && + netdev->ethtool_ops->get_settings) { + struct ethtool_cmd cmd = { ETHTOOL_GSET }; + + if (!netdev->ethtool_ops->get_settings(netdev, &cmd)) + ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half"); + } + rtnl_unlock(); + return ret; +} + static ssize_t show_dormant(struct device *dev, struct device_attribute *attr, char *buf) { @@ -141,7 +183,7 @@ static ssize_t show_dormant(struct device *dev, return -EINVAL; } -static const char *operstates[] = { +static const char *const operstates[] = { "unknown", "notpresent", /* currently unused */ "down", @@ -259,6 +301,8 @@ static struct device_attribute net_class_attributes[] = { __ATTR(address, S_IRUGO, show_address, NULL), __ATTR(broadcast, S_IRUGO, show_broadcast, NULL), __ATTR(carrier, S_IRUGO, show_carrier, NULL), + __ATTR(speed, S_IRUGO, show_speed, NULL), + __ATTR(duplex, S_IRUGO, show_duplex, NULL), __ATTR(dormant, S_IRUGO, show_dormant, NULL), __ATTR(operstate, S_IRUGO, show_operstate, NULL), __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu), @@ -363,18 +407,16 @@ static ssize_t wireless_show(struct device *d, char *buf, char *)) { struct net_device *dev = to_net_dev(d); - const struct iw_statistics *iw = NULL; + const struct iw_statistics *iw; ssize_t ret = -EINVAL; - read_lock(&dev_base_lock); + rtnl_lock(); if (dev_isalive(dev)) { - if (dev->wireless_handlers && - dev->wireless_handlers->get_wireless_stats) - iw = dev->wireless_handlers->get_wireless_stats(dev); - if (iw != NULL) + iw = get_wireless_stats(dev); + if (iw) ret = (*format)(iw, buf); } - read_unlock(&dev_base_lock); + rtnl_unlock(); return ret; } @@ -483,7 +525,7 @@ void netdev_unregister_kobject(struct net_device * net) kobject_get(&dev->kobj); - if (dev_net(net) != &init_net) + if (!net_eq(dev_net(net), &init_net)) return; device_del(dev); @@ -493,7 +535,7 @@ void netdev_unregister_kobject(struct net_device * net) int netdev_register_kobject(struct net_device *net) { struct device *dev = &(net->dev); - struct attribute_group **groups = net->sysfs_groups; + const struct attribute_group **groups = net->sysfs_groups; dev->class = &net_class; dev->platform_data = net; @@ -502,15 +544,22 @@ int netdev_register_kobject(struct net_device *net) dev_set_name(dev, "%s", net->name); #ifdef CONFIG_SYSFS - *groups++ = &netstat_group; + /* Allow for a device specific group */ + if (*groups) + groups++; + *groups++ = &netstat_group; #ifdef CONFIG_WIRELESS_EXT_SYSFS - if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) + if (net->ieee80211_ptr) *groups++ = &wireless_group; +#ifdef CONFIG_WIRELESS_EXT + else if (net->wireless_handlers) + *groups++ = &wireless_group; +#endif #endif #endif /* CONFIG_SYSFS */ - if (dev_net(net) != &init_net) + if (!net_eq(dev_net(net), &init_net)) return 0; return device_add(dev); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 197283072cc8..bd8c4712ea24 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -6,6 +6,8 @@ #include <linux/delay.h> #include <linux/sched.h> #include <linux/idr.h> +#include <linux/rculist.h> +#include <linux/nsproxy.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -25,14 +27,64 @@ EXPORT_SYMBOL(init_net); #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ +static int ops_init(const struct pernet_operations *ops, struct net *net) +{ + int err; + if (ops->id && ops->size) { + void *data = kzalloc(ops->size, GFP_KERNEL); + if (!data) + return -ENOMEM; + + err = net_assign_generic(net, *ops->id, data); + if (err) { + kfree(data); + return err; + } + } + if (ops->init) + return ops->init(net); + return 0; +} + +static void ops_free(const struct pernet_operations *ops, struct net *net) +{ + if (ops->id && ops->size) { + int id = *ops->id; + kfree(net_generic(net, id)); + } +} + +static void ops_exit_list(const struct pernet_operations *ops, + struct list_head *net_exit_list) +{ + struct net *net; + if (ops->exit) { + list_for_each_entry(net, net_exit_list, exit_list) + ops->exit(net); + } + if (ops->exit_batch) + ops->exit_batch(net_exit_list); +} + +static void ops_free_list(const struct pernet_operations *ops, + struct list_head *net_exit_list) +{ + struct net *net; + if (ops->size && ops->id) { + list_for_each_entry(net, net_exit_list, exit_list) + ops_free(ops, net); + } +} + /* * setup_net runs the initializers for the network namespace object. */ static __net_init int setup_net(struct net *net) { /* Must be called with net_mutex held */ - struct pernet_operations *ops; + const struct pernet_operations *ops, *saved_ops; int error = 0; + LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); @@ -41,11 +93,9 @@ static __net_init int setup_net(struct net *net) #endif list_for_each_entry(ops, &pernet_list, list) { - if (ops->init) { - error = ops->init(net); - if (error < 0) - goto out_undo; - } + error = ops_init(ops, net); + if (error < 0) + goto out_undo; } out: return error; @@ -54,10 +104,14 @@ out_undo: /* Walk through the list backwards calling the exit functions * for the pernet modules whose init functions did not fail. */ - list_for_each_entry_continue_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); - } + list_add(&net->exit_list, &net_exit_list); + saved_ops = ops; + list_for_each_entry_continue_reverse(ops, &pernet_list, list) + ops_exit_list(ops, &net_exit_list); + + ops = saved_ops; + list_for_each_entry_continue_reverse(ops, &pernet_list, list) + ops_free_list(ops, &net_exit_list); rcu_barrier(); goto out; @@ -127,7 +181,7 @@ static struct net *net_create(void) rv = setup_net(net); if (rv == 0) { rtnl_lock(); - list_add_tail(&net->list, &net_namespace_list); + list_add_tail_rcu(&net->list, &net_namespace_list); rtnl_unlock(); } mutex_unlock(&net_mutex); @@ -145,25 +199,45 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) return net_create(); } +static DEFINE_SPINLOCK(cleanup_list_lock); +static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ + static void cleanup_net(struct work_struct *work) { - struct pernet_operations *ops; - struct net *net; + const struct pernet_operations *ops; + struct net *net, *tmp; + LIST_HEAD(net_kill_list); + LIST_HEAD(net_exit_list); - net = container_of(work, struct net, work); + /* Atomically snapshot the list of namespaces to cleanup */ + spin_lock_irq(&cleanup_list_lock); + list_replace_init(&cleanup_list, &net_kill_list); + spin_unlock_irq(&cleanup_list_lock); mutex_lock(&net_mutex); /* Don't let anyone else find us. */ rtnl_lock(); - list_del(&net->list); + list_for_each_entry(net, &net_kill_list, cleanup_list) { + list_del_rcu(&net->list); + list_add_tail(&net->exit_list, &net_exit_list); + } rtnl_unlock(); + /* + * Another CPU might be rcu-iterating the list, wait for it. + * This needs to be before calling the exit() notifiers, so + * the rcu_barrier() below isn't sufficient alone. + */ + synchronize_rcu(); + /* Run all of the network namespace exit methods */ - list_for_each_entry_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); - } + list_for_each_entry_reverse(ops, &pernet_list, list) + ops_exit_list(ops, &net_exit_list); + + /* Free the net generic variables */ + list_for_each_entry_reverse(ops, &pernet_list, list) + ops_free_list(ops, &net_exit_list); mutex_unlock(&net_mutex); @@ -173,14 +247,23 @@ static void cleanup_net(struct work_struct *work) rcu_barrier(); /* Finally it is safe to free my network namespace structure */ - net_free(net); + list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { + list_del_init(&net->exit_list); + net_free(net); + } } +static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) { /* Cleanup the network namespace in process context */ - INIT_WORK(&net->work, cleanup_net); - queue_work(netns_wq, &net->work); + unsigned long flags; + + spin_lock_irqsave(&cleanup_list_lock, flags); + list_add(&net->cleanup_list, &cleanup_list); + spin_unlock_irqrestore(&cleanup_list_lock, flags); + + queue_work(netns_wq, &net_cleanup_work); } EXPORT_SYMBOL_GPL(__put_net); @@ -193,6 +276,26 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) } #endif +struct net *get_net_ns_by_pid(pid_t pid) +{ + struct task_struct *tsk; + struct net *net; + + /* Lookup the network namespace */ + net = ERR_PTR(-ESRCH); + rcu_read_lock(); + tsk = find_task_by_vpid(pid); + if (tsk) { + struct nsproxy *nsproxy; + nsproxy = task_nsproxy(tsk); + if (nsproxy) + net = get_net(nsproxy->net_ns); + } + rcu_read_unlock(); + return net; +} +EXPORT_SYMBOL_GPL(get_net_ns_by_pid); + static int __init net_ns_init(void) { struct net_generic *ng; @@ -219,7 +322,7 @@ static int __init net_ns_init(void) panic("Could not setup the initial network namespace"); rtnl_lock(); - list_add_tail(&init_net.list, &net_namespace_list); + list_add_tail_rcu(&init_net.list, &net_namespace_list); rtnl_unlock(); mutex_unlock(&net_mutex); @@ -230,18 +333,20 @@ static int __init net_ns_init(void) pure_initcall(net_ns_init); #ifdef CONFIG_NET_NS -static int register_pernet_operations(struct list_head *list, - struct pernet_operations *ops) +static int __register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) { - struct net *net, *undo_net; + struct net *net; int error; + LIST_HEAD(net_exit_list); list_add_tail(&ops->list, list); - if (ops->init) { + if (ops->init || (ops->id && ops->size)) { for_each_net(net) { - error = ops->init(net); + error = ops_init(ops, net); if (error) goto out_undo; + list_add_tail(&net->exit_list, &net_exit_list); } } return 0; @@ -249,45 +354,82 @@ static int register_pernet_operations(struct list_head *list, out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); - if (ops->exit) { - for_each_net(undo_net) { - if (undo_net == net) - goto undone; - ops->exit(undo_net); - } - } -undone: + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); return error; } -static void unregister_pernet_operations(struct pernet_operations *ops) +static void __unregister_pernet_operations(struct pernet_operations *ops) { struct net *net; + LIST_HEAD(net_exit_list); list_del(&ops->list); - if (ops->exit) - for_each_net(net) - ops->exit(net); + for_each_net(net) + list_add_tail(&net->exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); } #else +static int __register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + int err = 0; + err = ops_init(ops, &init_net); + if (err) + ops_free(ops, &init_net); + return err; + +} + +static void __unregister_pernet_operations(struct pernet_operations *ops) +{ + LIST_HEAD(net_exit_list); + list_add(&init_net.exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); +} + +#endif /* CONFIG_NET_NS */ + +static DEFINE_IDA(net_generic_ids); + static int register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { - if (ops->init == NULL) - return 0; - return ops->init(&init_net); + int error; + + if (ops->id) { +again: + error = ida_get_new_above(&net_generic_ids, 1, ops->id); + if (error < 0) { + if (error == -EAGAIN) { + ida_pre_get(&net_generic_ids, GFP_KERNEL); + goto again; + } + return error; + } + } + error = __register_pernet_operations(list, ops); + if (error) { + rcu_barrier(); + if (ops->id) + ida_remove(&net_generic_ids, *ops->id); + } + + return error; } static void unregister_pernet_operations(struct pernet_operations *ops) { - if (ops->exit) - ops->exit(&init_net); + + __unregister_pernet_operations(ops); + rcu_barrier(); + if (ops->id) + ida_remove(&net_generic_ids, *ops->id); } -#endif - -static DEFINE_IDA(net_generic_ids); /** * register_pernet_subsys - register a network namespace subsystem @@ -335,38 +477,6 @@ void unregister_pernet_subsys(struct pernet_operations *module) } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); -int register_pernet_gen_subsys(int *id, struct pernet_operations *ops) -{ - int rv; - - mutex_lock(&net_mutex); -again: - rv = ida_get_new_above(&net_generic_ids, 1, id); - if (rv < 0) { - if (rv == -EAGAIN) { - ida_pre_get(&net_generic_ids, GFP_KERNEL); - goto again; - } - goto out; - } - rv = register_pernet_operations(first_device, ops); - if (rv < 0) - ida_remove(&net_generic_ids, *id); -out: - mutex_unlock(&net_mutex); - return rv; -} -EXPORT_SYMBOL_GPL(register_pernet_gen_subsys); - -void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops) -{ - mutex_lock(&net_mutex); - unregister_pernet_operations(ops); - ida_remove(&net_generic_ids, id); - mutex_unlock(&net_mutex); -} -EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys); - /** * register_pernet_device - register a network namespace device * @ops: pernet operations structure for the subsystem @@ -398,30 +508,6 @@ int register_pernet_device(struct pernet_operations *ops) } EXPORT_SYMBOL_GPL(register_pernet_device); -int register_pernet_gen_device(int *id, struct pernet_operations *ops) -{ - int error; - mutex_lock(&net_mutex); -again: - error = ida_get_new_above(&net_generic_ids, 1, id); - if (error) { - if (error == -EAGAIN) { - ida_pre_get(&net_generic_ids, GFP_KERNEL); - goto again; - } - goto out; - } - error = register_pernet_operations(&pernet_list, ops); - if (error) - ida_remove(&net_generic_ids, *id); - else if (first_device == &pernet_list) - first_device = &ops->list; -out: - mutex_unlock(&net_mutex); - return error; -} -EXPORT_SYMBOL_GPL(register_pernet_gen_device); - /** * unregister_pernet_device - unregister a network namespace netdevice * @ops: pernet operations structure to manipulate @@ -441,17 +527,6 @@ void unregister_pernet_device(struct pernet_operations *ops) } EXPORT_SYMBOL_GPL(unregister_pernet_device); -void unregister_pernet_gen_device(int id, struct pernet_operations *ops) -{ - mutex_lock(&net_mutex); - if (&ops->list == first_device) - first_device = first_device->next; - unregister_pernet_operations(ops); - ida_remove(&net_generic_ids, id); - mutex_unlock(&net_mutex); -} -EXPORT_SYMBOL_GPL(unregister_pernet_gen_device); - static void net_generic_release(struct rcu_head *rcu) { struct net_generic *ng; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 1b76eb11deb4..0b4d0d35ef40 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -9,6 +9,7 @@ * Copyright (C) 2002 Red Hat, Inc. */ +#include <linux/moduleparam.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/string.h> @@ -50,6 +51,9 @@ static atomic_t trapped; static void zap_completion_queue(void); static void arp_reply(struct sk_buff *skb); +static unsigned int carrier_timeout = 4; +module_param(carrier_timeout, uint, 0644); + static void queue_process(struct work_struct *work) { struct netpoll_info *npinfo = @@ -737,7 +741,7 @@ int netpoll_setup(struct netpoll *np) } atleast = jiffies + HZ/10; - atmost = jiffies + 4*HZ; + atmost = jiffies + carrier_timeout * HZ; while (!netif_carrier_ok(ndev)) { if (time_after(jiffies, atmost)) { printk(KERN_NOTICE diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 19b8c20e98a4..a23b45f08ec9 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -131,6 +131,7 @@ #include <linux/ioport.h> #include <linux/interrupt.h> #include <linux/capability.h> +#include <linux/hrtimer.h> #include <linux/freezer.h> #include <linux/delay.h> #include <linux/timer.h> @@ -162,14 +163,13 @@ #include <asm/byteorder.h> #include <linux/rcupdate.h> #include <linux/bitops.h> -#include <asm/io.h> +#include <linux/io.h> +#include <linux/timex.h> +#include <linux/uaccess.h> #include <asm/dma.h> -#include <asm/uaccess.h> #include <asm/div64.h> /* do_div */ -#include <asm/timex.h> - -#define VERSION "pktgen v2.70: Packet Generator for packet performance testing.\n" +#define VERSION "2.72" #define IP_NAME_SZ 32 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ #define MPLS_STACK_BOTTOM htonl(0x00000100) @@ -192,11 +192,10 @@ #define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ /* Thread control flag bits */ -#define T_TERMINATE (1<<0) -#define T_STOP (1<<1) /* Stop run */ -#define T_RUN (1<<2) /* Start run */ -#define T_REMDEVALL (1<<3) /* Remove all devs */ -#define T_REMDEV (1<<4) /* Remove one dev */ +#define T_STOP (1<<0) /* Stop run */ +#define T_RUN (1<<1) /* Start run */ +#define T_REMDEVALL (1<<2) /* Remove all devs */ +#define T_REMDEV (1<<3) /* Remove one dev */ /* If lock -- can be removed after some work */ #define if_lock(t) spin_lock(&(t->if_lock)); @@ -206,7 +205,7 @@ #define PKTGEN_MAGIC 0xbe9be955 #define PG_PROC_DIR "pktgen" #define PGCTRL "pgctrl" -static struct proc_dir_entry *pg_proc_dir = NULL; +static struct proc_dir_entry *pg_proc_dir; #define MAX_CFLOWS 65536 @@ -231,9 +230,9 @@ struct pktgen_dev { */ struct proc_dir_entry *entry; /* proc file */ struct pktgen_thread *pg_thread;/* the owner */ - struct list_head list; /* Used for chaining in the thread's run-queue */ + struct list_head list; /* chaining in the thread's run-queue */ - int running; /* if this changes to false, the test will stop */ + int running; /* if false, the test will stop */ /* If min != max, then we will either do a linear iteration, or * we will do a random selection from within the range. @@ -246,33 +245,37 @@ struct pktgen_dev { int max_pkt_size; /* = ETH_ZLEN; */ int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; - __u32 delay_us; /* Default delay */ - __u32 delay_ns; + u64 delay; /* nano-seconds */ + __u64 count; /* Default No packets to send */ __u64 sofar; /* How many pkts we've sent so far */ __u64 tx_bytes; /* How many bytes we've transmitted */ - __u64 errors; /* Errors when trying to transmit, pkts will be re-sent */ + __u64 errors; /* Errors when trying to transmit, + pkts will be re-sent */ /* runtime counters relating to clone_skb */ - __u64 next_tx_us; /* timestamp of when to tx next */ - __u32 next_tx_ns; __u64 allocated_skbs; __u32 clone_count; int last_ok; /* Was last skb sent? - * Or a failed transmit of some sort? This will keep - * sequence numbers in order, for example. + * Or a failed transmit of some sort? + * This will keep sequence numbers in order */ - __u64 started_at; /* micro-seconds */ - __u64 stopped_at; /* micro-seconds */ - __u64 idle_acc; /* micro-seconds */ + ktime_t next_tx; + ktime_t started_at; + ktime_t stopped_at; + u64 idle_acc; /* nano-seconds */ + __u32 seq_num; - int clone_skb; /* Use multiple SKBs during packet gen. If this number - * is greater than 1, then that many copies of the same - * packet will be sent before a new packet is allocated. - * For instance, if you want to send 1024 identical packets - * before creating a new packet, set clone_skb to 1024. + int clone_skb; /* + * Use multiple SKBs during packet gen. + * If this number is greater than 1, then + * that many copies of the same packet will be + * sent before a new packet is allocated. + * If you want to send 1024 identical packets + * before creating a new packet, + * set clone_skb to 1024. */ char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ @@ -304,8 +307,10 @@ struct pktgen_dev { __u16 udp_dst_max; /* exclusive, dest UDP port */ /* DSCP + ECN */ - __u8 tos; /* six most significant bits of (former) IPv4 TOS are for dscp codepoint */ - __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6 (see RFC 3260, sec. 4) */ + __u8 tos; /* six MSB of (former) IPv4 TOS + are for dscp codepoint */ + __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6 + (see RFC 3260, sec. 4) */ /* MPLS */ unsigned nr_labels; /* Depth of stack, 0 = no MPLS */ @@ -330,10 +335,12 @@ struct pktgen_dev { __u32 cur_src_mac_offset; __be32 cur_saddr; __be32 cur_daddr; + __u16 ip_id; __u16 cur_udp_dst; __u16 cur_udp_src; __u16 cur_queue_map; __u32 cur_pkt_size; + __u32 last_pkt_size; __u8 hh[14]; /* = { @@ -346,15 +353,18 @@ struct pktgen_dev { */ __u16 pad; /* pad out the hh struct to an even 16 bytes */ - struct sk_buff *skb; /* skb we are to transmit next, mainly used for when we + struct sk_buff *skb; /* skb we are to transmit next, used for when we * are transmitting the same one multiple times */ - struct net_device *odev; /* The out-going device. Note that the device should - * have it's pg_info pointer pointing back to this - * device. This will be set when the user specifies - * the out-going device name (not when the inject is - * started as it used to do.) - */ + struct net_device *odev; /* The out-going device. + * Note that the device should have it's + * pg_info pointer pointing back to this + * device. + * Set when the user specifies the out-going + * device name (not when the inject is + * started as it used to do.) + */ + char odevname[32]; struct flow_state *flows; unsigned cflows; /* Concurrent flows (config) */ unsigned lflow; /* Flow length (config) */ @@ -379,13 +389,14 @@ struct pktgen_hdr { }; struct pktgen_thread { - spinlock_t if_lock; + spinlock_t if_lock; /* for list of devices */ struct list_head if_list; /* All device here */ struct list_head th_list; struct task_struct *tsk; char result[512]; - /* Field for thread to receive "posted" events terminate, stop ifs etc. */ + /* Field for thread to receive "posted" events terminate, + stop ifs etc. */ u32 control; int cpu; @@ -397,34 +408,32 @@ struct pktgen_thread { #define REMOVE 1 #define FIND 0 -/** Convert to micro-seconds */ -static inline __u64 tv_to_us(const struct timeval *tv) +static inline ktime_t ktime_now(void) { - __u64 us = tv->tv_usec; - us += (__u64) tv->tv_sec * (__u64) 1000000; - return us; + struct timespec ts; + ktime_get_ts(&ts); + + return timespec_to_ktime(ts); } -static __u64 getCurUs(void) +/* This works even if 32 bit because of careful byte order choice */ +static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2) { - struct timeval tv; - do_gettimeofday(&tv); - return tv_to_us(&tv); + return cmp1.tv64 < cmp2.tv64; } -/* old include end */ - -static char version[] __initdata = VERSION; +static const char version[] = + "pktgen " VERSION ": Packet Generator for packet performance testing.\n"; static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, - const char *ifname); + const char *ifname, bool exact); static int pktgen_device_event(struct notifier_block *, unsigned long, void *); static void pktgen_run_all_threads(void); static void pktgen_reset_all_threads(void); static void pktgen_stop_all_threads_ifs(void); -static int pktgen_stop_device(struct pktgen_dev *pkt_dev); + static void pktgen_stop(struct pktgen_thread *t); static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); @@ -432,10 +441,10 @@ static unsigned int scan_ip6(const char *s, char ip[16]); static unsigned int fmt_ip6(char *s, const char ip[16]); /* Module parameters, defaults. */ -static int pg_count_d = 1000; /* 1000 pkts by default */ -static int pg_delay_d; -static int pg_clone_skb_d; -static int debug; +static int pg_count_d __read_mostly = 1000; +static int pg_delay_d __read_mostly; +static int pg_clone_skb_d __read_mostly; +static int debug __read_mostly; static DEFINE_MUTEX(pktgen_thread_lock); static LIST_HEAD(pktgen_threads); @@ -451,12 +460,12 @@ static struct notifier_block pktgen_notifier_block = { static int pgctrl_show(struct seq_file *seq, void *v) { - seq_puts(seq, VERSION); + seq_puts(seq, version); return 0; } -static ssize_t pgctrl_write(struct file *file, const char __user * buf, - size_t count, loff_t * ppos) +static ssize_t pgctrl_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { int err = 0; char data[128]; @@ -509,10 +518,9 @@ static const struct file_operations pktgen_fops = { static int pktgen_if_show(struct seq_file *seq, void *v) { - struct pktgen_dev *pkt_dev = seq->private; - __u64 sa; - __u64 stopped; - __u64 now = getCurUs(); + const struct pktgen_dev *pkt_dev = seq->private; + ktime_t stopped; + u64 idle; seq_printf(seq, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", @@ -520,10 +528,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->max_pkt_size); seq_printf(seq, - " frags: %d delay: %u clone_skb: %d ifname: %s\n", - pkt_dev->nfrags, - 1000 * pkt_dev->delay_us + pkt_dev->delay_ns, - pkt_dev->clone_skb, pkt_dev->odev->name); + " frags: %d delay: %llu clone_skb: %d ifname: %s\n", + pkt_dev->nfrags, (unsigned long long) pkt_dev->delay, + pkt_dev->clone_skb, pkt_dev->odevname); seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); @@ -549,11 +556,14 @@ static int pktgen_if_show(struct seq_file *seq, void *v) " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3); - } else + } else { seq_printf(seq, - " dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n", - pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, - pkt_dev->src_max); + " dst_min: %s dst_max: %s\n", + pkt_dev->dst_min, pkt_dev->dst_max); + seq_printf(seq, + " src_min: %s src_max: %s\n", + pkt_dev->src_min, pkt_dev->src_max); + } seq_puts(seq, " src_mac: "); @@ -565,7 +575,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_printf(seq, "%pM\n", pkt_dev->dst_mac); seq_printf(seq, - " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", + " udp_src_min: %d udp_src_max: %d" + " udp_dst_min: %d udp_dst_max: %d\n", pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min, pkt_dev->udp_dst_max); @@ -581,23 +592,21 @@ static int pktgen_if_show(struct seq_file *seq, void *v) i == pkt_dev->nr_labels-1 ? "\n" : ", "); } - if (pkt_dev->vlan_id != 0xffff) { + if (pkt_dev->vlan_id != 0xffff) seq_printf(seq, " vlan_id: %u vlan_p: %u vlan_cfi: %u\n", - pkt_dev->vlan_id, pkt_dev->vlan_p, pkt_dev->vlan_cfi); - } + pkt_dev->vlan_id, pkt_dev->vlan_p, + pkt_dev->vlan_cfi); - if (pkt_dev->svlan_id != 0xffff) { + if (pkt_dev->svlan_id != 0xffff) seq_printf(seq, " svlan_id: %u vlan_p: %u vlan_cfi: %u\n", - pkt_dev->svlan_id, pkt_dev->svlan_p, pkt_dev->svlan_cfi); - } + pkt_dev->svlan_id, pkt_dev->svlan_p, + pkt_dev->svlan_cfi); - if (pkt_dev->tos) { + if (pkt_dev->tos) seq_printf(seq, " tos: 0x%02x\n", pkt_dev->tos); - } - if (pkt_dev->traffic_class) { + if (pkt_dev->traffic_class) seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class); - } seq_printf(seq, " Flags: "); @@ -654,17 +663,21 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, "\n"); - sa = pkt_dev->started_at; - stopped = pkt_dev->stopped_at; - if (pkt_dev->running) - stopped = now; /* not really stopped, more like last-running-at */ + /* not really stopped, more like last-running-at */ + stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at; + idle = pkt_dev->idle_acc; + do_div(idle, NSEC_PER_USEC); seq_printf(seq, - "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", + "Current:\n pkts-sofar: %llu errors: %llu\n", (unsigned long long)pkt_dev->sofar, - (unsigned long long)pkt_dev->errors, (unsigned long long)sa, - (unsigned long long)stopped, - (unsigned long long)pkt_dev->idle_acc); + (unsigned long long)pkt_dev->errors); + + seq_printf(seq, + " started: %lluus stopped: %lluus idle: %lluus\n", + (unsigned long long) ktime_to_us(pkt_dev->started_at), + (unsigned long long) ktime_to_us(stopped), + (unsigned long long) idle); seq_printf(seq, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", @@ -696,7 +709,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v) } -static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, __u32 *num) +static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, + __u32 *num) { int i = 0; *num = 0; @@ -846,9 +860,9 @@ static ssize_t pktgen_if_write(struct file *file, /* Read variable name */ len = strn_len(&user_buffer[i], sizeof(name) - 1); - if (len < 0) { + if (len < 0) return len; - } + memset(name, 0, sizeof(name)); if (copy_from_user(name, &user_buffer[i], len)) return -EFAULT; @@ -872,9 +886,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "min_pkt_size")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (value < 14 + 20 + 8) value = 14 + 20 + 8; @@ -889,9 +903,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "max_pkt_size")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (value < 14 + 20 + 8) value = 14 + 20 + 8; @@ -908,9 +922,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "pkt_size")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (value < 14 + 20 + 8) value = 14 + 20 + 8; @@ -925,9 +939,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "debug")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; debug = value; sprintf(pg_result, "OK: debug=%u", debug); @@ -936,9 +950,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "frags")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; pkt_dev->nfrags = value; sprintf(pg_result, "OK: frags=%u", pkt_dev->nfrags); @@ -946,26 +960,24 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "delay")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; - if (value == 0x7FFFFFFF) { - pkt_dev->delay_us = 0x7FFFFFFF; - pkt_dev->delay_ns = 0; - } else { - pkt_dev->delay_us = value / 1000; - pkt_dev->delay_ns = value % 1000; - } - sprintf(pg_result, "OK: delay=%u", - 1000 * pkt_dev->delay_us + pkt_dev->delay_ns); + if (value == 0x7FFFFFFF) + pkt_dev->delay = ULLONG_MAX; + else + pkt_dev->delay = (u64)value; + + sprintf(pg_result, "OK: delay=%llu", + (unsigned long long) pkt_dev->delay); return count; } if (!strcmp(name, "udp_src_min")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (value != pkt_dev->udp_src_min) { pkt_dev->udp_src_min = value; @@ -976,9 +988,9 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "udp_dst_min")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (value != pkt_dev->udp_dst_min) { pkt_dev->udp_dst_min = value; @@ -989,9 +1001,9 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "udp_src_max")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (value != pkt_dev->udp_src_max) { pkt_dev->udp_src_max = value; @@ -1002,9 +1014,9 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "udp_dst_max")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (value != pkt_dev->udp_dst_max) { pkt_dev->udp_dst_max = value; @@ -1015,9 +1027,9 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "clone_skb")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; pkt_dev->clone_skb = value; @@ -1026,9 +1038,9 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "count")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; pkt_dev->count = value; sprintf(pg_result, "OK: count=%llu", @@ -1037,9 +1049,9 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "src_mac_count")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (pkt_dev->src_mac_count != value) { pkt_dev->src_mac_count = value; @@ -1051,9 +1063,9 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "dst_mac_count")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (pkt_dev->dst_mac_count != value) { pkt_dev->dst_mac_count = value; @@ -1067,9 +1079,9 @@ static ssize_t pktgen_if_write(struct file *file, char f[32]; memset(f, 0, 32); len = strn_len(&user_buffer[i], sizeof(f) - 1); - if (len < 0) { + if (len < 0) return len; - } + if (copy_from_user(f, &user_buffer[i], len)) return -EFAULT; i += len; @@ -1168,9 +1180,8 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) { len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_min) - 1); - if (len < 0) { + if (len < 0) return len; - } if (copy_from_user(buf, &user_buffer[i], len)) return -EFAULT; @@ -1190,9 +1201,9 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "dst_max")) { len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_max) - 1); - if (len < 0) { + if (len < 0) return len; - } + if (copy_from_user(buf, &user_buffer[i], len)) return -EFAULT; @@ -1303,9 +1314,9 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "src_min")) { len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_min) - 1); - if (len < 0) { + if (len < 0) return len; - } + if (copy_from_user(buf, &user_buffer[i], len)) return -EFAULT; buf[len] = 0; @@ -1324,9 +1335,9 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "src_max")) { len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_max) - 1); - if (len < 0) { + if (len < 0) return len; - } + if (copy_from_user(buf, &user_buffer[i], len)) return -EFAULT; buf[len] = 0; @@ -1350,9 +1361,9 @@ static ssize_t pktgen_if_write(struct file *file, memcpy(old_dmac, pkt_dev->dst_mac, ETH_ALEN); len = strn_len(&user_buffer[i], sizeof(valstr) - 1); - if (len < 0) { + if (len < 0) return len; - } + memset(valstr, 0, sizeof(valstr)); if (copy_from_user(valstr, &user_buffer[i], len)) return -EFAULT; @@ -1392,9 +1403,9 @@ static ssize_t pktgen_if_write(struct file *file, memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN); len = strn_len(&user_buffer[i], sizeof(valstr) - 1); - if (len < 0) { + if (len < 0) return len; - } + memset(valstr, 0, sizeof(valstr)); if (copy_from_user(valstr, &user_buffer[i], len)) return -EFAULT; @@ -1435,9 +1446,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "flows")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (value > MAX_CFLOWS) value = MAX_CFLOWS; @@ -1449,9 +1460,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "flowlen")) { len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; pkt_dev->lflow = value; sprintf(pg_result, "OK: flowlen=%u", pkt_dev->lflow); @@ -1460,9 +1471,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "queue_map_min")) { len = num_arg(&user_buffer[i], 5, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; pkt_dev->queue_map_min = value; sprintf(pg_result, "OK: queue_map_min=%u", pkt_dev->queue_map_min); @@ -1471,9 +1482,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "queue_map_max")) { len = num_arg(&user_buffer[i], 5, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; pkt_dev->queue_map_max = value; sprintf(pg_result, "OK: queue_map_max=%u", pkt_dev->queue_map_max); @@ -1505,9 +1516,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "vlan_id")) { len = num_arg(&user_buffer[i], 4, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (value <= 4095) { pkt_dev->vlan_id = value; /* turn on VLAN */ @@ -1532,9 +1543,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "vlan_p")) { len = num_arg(&user_buffer[i], 1, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if ((value <= 7) && (pkt_dev->vlan_id != 0xffff)) { pkt_dev->vlan_p = value; @@ -1547,9 +1558,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "vlan_cfi")) { len = num_arg(&user_buffer[i], 1, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if ((value <= 1) && (pkt_dev->vlan_id != 0xffff)) { pkt_dev->vlan_cfi = value; @@ -1562,9 +1573,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "svlan_id")) { len = num_arg(&user_buffer[i], 4, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if ((value <= 4095) && ((pkt_dev->vlan_id != 0xffff))) { pkt_dev->svlan_id = value; /* turn on SVLAN */ @@ -1589,9 +1600,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "svlan_p")) { len = num_arg(&user_buffer[i], 1, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if ((value <= 7) && (pkt_dev->svlan_id != 0xffff)) { pkt_dev->svlan_p = value; @@ -1604,9 +1615,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "svlan_cfi")) { len = num_arg(&user_buffer[i], 1, &value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if ((value <= 1) && (pkt_dev->svlan_id != 0xffff)) { pkt_dev->svlan_cfi = value; @@ -1620,9 +1631,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "tos")) { __u32 tmp_value = 0; len = hex32_arg(&user_buffer[i], 2, &tmp_value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (len == 2) { pkt_dev->tos = tmp_value; @@ -1636,9 +1647,9 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "traffic_class")) { __u32 tmp_value = 0; len = hex32_arg(&user_buffer[i], 2, &tmp_value); - if (len < 0) { + if (len < 0) return len; - } + i += len; if (len == 2) { pkt_dev->traffic_class = tmp_value; @@ -1670,7 +1681,7 @@ static const struct file_operations pktgen_if_fops = { static int pktgen_thread_show(struct seq_file *seq, void *v) { struct pktgen_thread *t = seq->private; - struct pktgen_dev *pkt_dev; + const struct pktgen_dev *pkt_dev; BUG_ON(!t); @@ -1679,13 +1690,13 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) if_lock(t); list_for_each_entry(pkt_dev, &t->if_list, list) if (pkt_dev->running) - seq_printf(seq, "%s ", pkt_dev->odev->name); + seq_printf(seq, "%s ", pkt_dev->odevname); seq_printf(seq, "\nStopped: "); list_for_each_entry(pkt_dev, &t->if_list, list) if (!pkt_dev->running) - seq_printf(seq, "%s ", pkt_dev->odev->name); + seq_printf(seq, "%s ", pkt_dev->odevname); if (t->result[0]) seq_printf(seq, "\nResult: %s\n", t->result); @@ -1808,9 +1819,10 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) { struct pktgen_thread *t; struct pktgen_dev *pkt_dev = NULL; + bool exact = (remove == FIND); list_for_each_entry(t, &pktgen_threads, th_list) { - pkt_dev = pktgen_find_dev(t, ifname); + pkt_dev = pktgen_find_dev(t, ifname, exact); if (pkt_dev) { if (remove) { if_lock(t); @@ -1873,8 +1885,10 @@ static void pktgen_change_name(struct net_device *dev) remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); - pkt_dev->entry = create_proc_entry(dev->name, 0600, - pg_proc_dir); + pkt_dev->entry = proc_create_data(dev->name, 0600, + pg_proc_dir, + &pktgen_if_fops, + pkt_dev); if (!pkt_dev->entry) printk(KERN_ERR "pktgen: can't move proc " " entry for '%s'\n", dev->name); @@ -1908,13 +1922,14 @@ static int pktgen_device_event(struct notifier_block *unused, return NOTIFY_DONE; } -static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, const char *ifname) +static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, + const char *ifname) { char b[IFNAMSIZ+5]; int i = 0; - for(i=0; ifname[i] != '@'; i++) { - if(i == IFNAMSIZ) + for (i = 0; ifname[i] != '@'; i++) { + if (i == IFNAMSIZ) break; b[i] = ifname[i]; @@ -1981,16 +1996,16 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) printk(KERN_WARNING "pktgen: WARNING: Requested " "queue_map_min (zero-based) (%d) exceeds valid range " "[0 - %d] for (%d) queues on %s, resetting\n", - pkt_dev->queue_map_min, (ntxq ?: 1)- 1, ntxq, - pkt_dev->odev->name); + pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq, + pkt_dev->odevname); pkt_dev->queue_map_min = ntxq - 1; } if (pkt_dev->queue_map_max >= ntxq) { printk(KERN_WARNING "pktgen: WARNING: Requested " "queue_map_max (zero-based) (%d) exceeds valid range " "[0 - %d] for (%d) queues on %s, resetting\n", - pkt_dev->queue_map_max, (ntxq ?: 1)- 1, ntxq, - pkt_dev->odev->name); + pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq, + pkt_dev->odevname); pkt_dev->queue_map_max = ntxq - 1; } @@ -2030,15 +2045,15 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) */ rcu_read_lock(); - if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) { + idev = __in6_dev_get(pkt_dev->odev); + if (idev) { struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) { - if (ifp->scope == IFA_LINK - && !(ifp-> - flags & IFA_F_TENTATIVE)) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & IFA_F_TENTATIVE)) { ipv6_addr_copy(&pkt_dev-> cur_in6_saddr, &ifp->addr); @@ -2089,27 +2104,45 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) pkt_dev->nflows = 0; } -static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) + +static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) { - __u64 start; - __u64 now; + ktime_t start_time, end_time; + s64 remaining; + struct hrtimer_sleeper t; - start = now = getCurUs(); - while (now < spin_until_us) { - /* TODO: optimize sleeping behavior */ - if (spin_until_us - now > jiffies_to_usecs(1) + 1) - schedule_timeout_interruptible(1); - else if (spin_until_us - now > 100) { - if (!pkt_dev->running) - return; - if (need_resched()) + hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_set_expires(&t.timer, spin_until); + + remaining = ktime_to_us(hrtimer_expires_remaining(&t.timer)); + if (remaining <= 0) { + pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); + return; + } + + start_time = ktime_now(); + if (remaining < 100) + udelay(remaining); /* really small just spin */ + else { + /* see do_nanosleep */ + hrtimer_init_sleeper(&t, current); + do { + set_current_state(TASK_INTERRUPTIBLE); + hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); + if (!hrtimer_active(&t.timer)) + t.task = NULL; + + if (likely(t.task)) schedule(); - } - now = getCurUs(); + hrtimer_cancel(&t.timer); + } while (t.task && pkt_dev->running && !signal_pending(current)); + __set_current_state(TASK_RUNNING); } + end_time = ktime_now(); - pkt_dev->idle_acc += now - start; + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); + pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay); } static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) @@ -2120,13 +2153,9 @@ static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev); } -static inline int f_seen(struct pktgen_dev *pkt_dev, int flow) +static inline int f_seen(const struct pktgen_dev *pkt_dev, int flow) { - - if (pkt_dev->flows[flow].flags & F_INIT) - return 1; - else - return 0; + return !!(pkt_dev->flows[flow].flags & F_INIT); } static inline int f_pick(struct pktgen_dev *pkt_dev) @@ -2174,7 +2203,7 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) if (x) { pkt_dev->flows[flow].x = x; set_pkt_overhead(pkt_dev); - pkt_dev->pkt_overhead+=x->props.header_len; + pkt_dev->pkt_overhead += x->props.header_len; } } @@ -2186,7 +2215,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev) if (pkt_dev->flags & F_QUEUE_MAP_CPU) pkt_dev->cur_queue_map = smp_processor_id(); - else if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { + else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) { __u16 t; if (pkt_dev->flags & F_QUEUE_MAP_RND) { t = random32() % @@ -2313,18 +2342,18 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (!(pkt_dev->flags & F_IPV6)) { - if ((imn = ntohl(pkt_dev->saddr_min)) < (imx = - ntohl(pkt_dev-> - saddr_max))) { + imn = ntohl(pkt_dev->saddr_min); + imx = ntohl(pkt_dev->saddr_max); + if (imn < imx) { __u32 t; if (pkt_dev->flags & F_IPSRC_RND) t = random32() % (imx - imn) + imn; else { t = ntohl(pkt_dev->cur_saddr); t++; - if (t > imx) { + if (t > imx) t = imn; - } + } pkt_dev->cur_saddr = htonl(t); } @@ -2435,14 +2464,14 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) if (err) goto error; - x->curlft.bytes +=skb->len; + x->curlft.bytes += skb->len; x->curlft.packets++; error: spin_unlock(&x->lock); return err; } -static inline void free_SAs(struct pktgen_dev *pkt_dev) +static void free_SAs(struct pktgen_dev *pkt_dev) { if (pkt_dev->cflows) { /* let go of the SAs if we have them */ @@ -2457,7 +2486,7 @@ static inline void free_SAs(struct pktgen_dev *pkt_dev) } } -static inline int process_ipsec(struct pktgen_dev *pkt_dev, +static int process_ipsec(struct pktgen_dev *pkt_dev, struct sk_buff *skb, __be16 protocol) { if (pkt_dev->flags & F_IPSEC_ON) { @@ -2467,11 +2496,11 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, int ret; __u8 *eth; nhead = x->props.header_len - skb_headroom(skb); - if (nhead >0) { + if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); if (ret < 0) { printk(KERN_ERR "Error expanding " - "ipsec packet %d\n",ret); + "ipsec packet %d\n", ret); goto err; } } @@ -2481,13 +2510,13 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, ret = pktgen_output_ipsec(skb, pkt_dev); if (ret) { printk(KERN_ERR "Error creating ipsec " - "packet %d\n",ret); + "packet %d\n", ret); goto err; } /* restore ll */ eth = (__u8 *) skb_push(skb, ETH_HLEN); memcpy(eth, pkt_dev->hh, 12); - *(u16 *) & eth[12] = protocol; + *(u16 *) ð[12] = protocol; } } return 1; @@ -2500,9 +2529,9 @@ err: static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev) { unsigned i; - for (i = 0; i < pkt_dev->nr_labels; i++) { + for (i = 0; i < pkt_dev->nr_labels; i++) *mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM; - } + mpls--; *mpls |= MPLS_STACK_BOTTOM; } @@ -2543,8 +2572,9 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, mod_cur_headers(pkt_dev); datalen = (odev->hard_header_len + 16) & ~0xf; - skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + datalen + - pkt_dev->pkt_overhead, GFP_ATOMIC); + skb = __netdev_alloc_skb(odev, + pkt_dev->cur_pkt_size + 64 + + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2603,6 +2633,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->protocol = IPPROTO_UDP; /* UDP */ iph->saddr = pkt_dev->cur_saddr; iph->daddr = pkt_dev->cur_daddr; + iph->id = htons(pkt_dev->ip_id); + pkt_dev->ip_id++; iph->frag_off = 0; iplen = 20 + 8 + datalen; iph->tot_len = htons(iplen); @@ -2614,24 +2646,26 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->dev = odev; skb->pkt_type = PACKET_HOST; - if (pkt_dev->nfrags <= 0) + if (pkt_dev->nfrags <= 0) { pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - else { + memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr)); + } else { int frags = pkt_dev->nfrags; - int i; + int i, len; pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); if (frags > MAX_SKB_FRAGS) frags = MAX_SKB_FRAGS; if (datalen > frags * PAGE_SIZE) { - skb_put(skb, datalen - frags * PAGE_SIZE); + len = datalen - frags * PAGE_SIZE; + memset(skb_put(skb, len), 0, len); datalen = frags * PAGE_SIZE; } i = 0; while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL, 0); + struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); skb_shinfo(skb)->frags[i].page = page; skb_shinfo(skb)->frags[i].page_offset = 0; skb_shinfo(skb)->frags[i].size = @@ -2668,8 +2702,9 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, } } - /* Stamp the time, and sequence number, convert them to network byte order */ - + /* Stamp the time, and sequence number, + * convert them to network byte order + */ if (pgh) { struct timeval timestamp; @@ -2882,8 +2917,9 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, queue_map = pkt_dev->cur_queue_map; mod_cur_headers(pkt_dev); - skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 + - pkt_dev->pkt_overhead, GFP_ATOMIC); + skb = __netdev_alloc_skb(odev, + pkt_dev->cur_pkt_size + 64 + + 16 + pkt_dev->pkt_overhead, GFP_NOWAIT); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2922,7 +2958,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, udph = udp_hdr(skb); memcpy(eth, pkt_dev->hh, 12); - *(__be16 *) & eth[12] = protocol; + *(__be16 *) ð[12] = protocol; /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - @@ -3016,8 +3052,10 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, } } - /* Stamp the time, and sequence number, convert them to network byte order */ - /* should we update cloned packets too ? */ + /* Stamp the time, and sequence number, + * convert them to network byte order + * should we update cloned packets too ? + */ if (pgh) { struct timeval timestamp; @@ -3033,8 +3071,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, return skb; } -static inline struct sk_buff *fill_packet(struct net_device *odev, - struct pktgen_dev *pkt_dev) +static struct sk_buff *fill_packet(struct net_device *odev, + struct pktgen_dev *pkt_dev) { if (pkt_dev->flags & F_IPV6) return fill_packet_ipv6(odev, pkt_dev); @@ -3072,9 +3110,9 @@ static void pktgen_run(struct pktgen_thread *t) pktgen_clear_counters(pkt_dev); pkt_dev->running = 1; /* Cranke yeself! */ pkt_dev->skb = NULL; - pkt_dev->started_at = getCurUs(); - pkt_dev->next_tx_us = getCurUs(); /* Transmit immediately */ - pkt_dev->next_tx_ns = 0; + pkt_dev->started_at = + pkt_dev->next_tx = ktime_now(); + set_pkt_overhead(pkt_dev); strcpy(pkt_dev->result, "Starting"); @@ -3101,17 +3139,14 @@ static void pktgen_stop_all_threads_ifs(void) mutex_unlock(&pktgen_thread_lock); } -static int thread_is_running(struct pktgen_thread *t) +static int thread_is_running(const struct pktgen_thread *t) { - struct pktgen_dev *pkt_dev; - int res = 0; + const struct pktgen_dev *pkt_dev; list_for_each_entry(pkt_dev, &t->if_list, list) - if (pkt_dev->running) { - res = 1; - break; - } - return res; + if (pkt_dev->running) + return 1; + return 0; } static int pktgen_wait_thread_run(struct pktgen_thread *t) @@ -3168,7 +3203,8 @@ static void pktgen_run_all_threads(void) mutex_unlock(&pktgen_thread_lock); - schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ + /* Propagate thread->control */ + schedule_timeout_interruptible(msecs_to_jiffies(125)); pktgen_wait_all_threads_run(); } @@ -3186,35 +3222,29 @@ static void pktgen_reset_all_threads(void) mutex_unlock(&pktgen_thread_lock); - schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ + /* Propagate thread->control */ + schedule_timeout_interruptible(msecs_to_jiffies(125)); pktgen_wait_all_threads_run(); } static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) { - __u64 total_us, bps, mbps, pps, idle; + __u64 bps, mbps, pps; char *p = pkt_dev->result; - - total_us = pkt_dev->stopped_at - pkt_dev->started_at; - - idle = pkt_dev->idle_acc; - - p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n", - (unsigned long long)total_us, - (unsigned long long)(total_us - idle), - (unsigned long long)idle, + ktime_t elapsed = ktime_sub(pkt_dev->stopped_at, + pkt_dev->started_at); + ktime_t idle = ns_to_ktime(pkt_dev->idle_acc); + + p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n", + (unsigned long long)ktime_to_us(elapsed), + (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)), + (unsigned long long)ktime_to_us(idle), (unsigned long long)pkt_dev->sofar, pkt_dev->cur_pkt_size, nr_frags); - pps = pkt_dev->sofar * USEC_PER_SEC; - - while ((total_us >> 32) != 0) { - pps >>= 1; - total_us >>= 1; - } - - do_div(pps, total_us); + pps = div64_u64(pkt_dev->sofar * NSEC_PER_SEC, + ktime_to_ns(elapsed)); bps = pps * 8 * pkt_dev->cur_pkt_size; @@ -3228,18 +3258,19 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) } /* Set stopped-at timer, remove from running list, do counters & statistics */ - static int pktgen_stop_device(struct pktgen_dev *pkt_dev) { int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1; if (!pkt_dev->running) { printk(KERN_WARNING "pktgen: interface: %s is already " - "stopped\n", pkt_dev->odev->name); + "stopped\n", pkt_dev->odevname); return -EINVAL; } - pkt_dev->stopped_at = getCurUs(); + kfree_skb(pkt_dev->skb); + pkt_dev->skb = NULL; + pkt_dev->stopped_at = ktime_now(); pkt_dev->running = 0; show_results(pkt_dev, nr_frags); @@ -3258,7 +3289,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) continue; if (best == NULL) best = pkt_dev; - else if (pkt_dev->next_tx_us < best->next_tx_us) + else if (ktime_lt(pkt_dev->next_tx, best->next_tx)) best = pkt_dev; } if_unlock(t); @@ -3275,9 +3306,6 @@ static void pktgen_stop(struct pktgen_thread *t) list_for_each_entry(pkt_dev, &t->if_list, list) { pktgen_stop_device(pkt_dev); - kfree_skb(pkt_dev->skb); - - pkt_dev->skb = NULL; } if_unlock(t); @@ -3348,156 +3376,117 @@ static void pktgen_rem_thread(struct pktgen_thread *t) mutex_unlock(&pktgen_thread_lock); } -static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) +static void pktgen_resched(struct pktgen_dev *pkt_dev) +{ + ktime_t idle_start = ktime_now(); + schedule(); + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); +} + +static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) +{ + ktime_t idle_start = ktime_now(); + + while (atomic_read(&(pkt_dev->skb->users)) != 1) { + if (signal_pending(current)) + break; + + if (need_resched()) + pktgen_resched(pkt_dev); + else + cpu_relax(); + } + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); +} + +static void pktgen_xmit(struct pktgen_dev *pkt_dev) { struct net_device *odev = pkt_dev->odev; - int (*xmit)(struct sk_buff *, struct net_device *) + netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *) = odev->netdev_ops->ndo_start_xmit; struct netdev_queue *txq; - __u64 idle_start = 0; u16 queue_map; int ret; - if (pkt_dev->delay_us || pkt_dev->delay_ns) { - u64 now; - - now = getCurUs(); - if (now < pkt_dev->next_tx_us) - spin(pkt_dev, pkt_dev->next_tx_us); - - /* This is max DELAY, this has special meaning of - * "never transmit" - */ - if (pkt_dev->delay_us == 0x7FFFFFFF) { - pkt_dev->next_tx_us = getCurUs() + pkt_dev->delay_us; - pkt_dev->next_tx_ns = pkt_dev->delay_ns; - goto out; - } + /* If device is offline, then don't send */ + if (unlikely(!netif_running(odev) || !netif_carrier_ok(odev))) { + pktgen_stop_device(pkt_dev); + return; } - if (!pkt_dev->skb) { - set_cur_queue_map(pkt_dev); - queue_map = pkt_dev->cur_queue_map; - } else { - queue_map = skb_get_queue_mapping(pkt_dev->skb); + /* This is max DELAY, this has special meaning of + * "never transmit" + */ + if (unlikely(pkt_dev->delay == ULLONG_MAX)) { + pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX); + return; } - txq = netdev_get_tx_queue(odev, queue_map); - if (netif_tx_queue_stopped(txq) || - netif_tx_queue_frozen(txq) || - need_resched()) { - idle_start = getCurUs(); - - if (!netif_running(odev)) { - pktgen_stop_device(pkt_dev); - kfree_skb(pkt_dev->skb); - pkt_dev->skb = NULL; - goto out; - } - if (need_resched()) - schedule(); - - pkt_dev->idle_acc += getCurUs() - idle_start; + /* If no skb or clone count exhausted then get new one */ + if (!pkt_dev->skb || (pkt_dev->last_ok && + ++pkt_dev->clone_count >= pkt_dev->clone_skb)) { + /* build a new pkt */ + kfree_skb(pkt_dev->skb); - if (netif_tx_queue_stopped(txq) || - netif_tx_queue_frozen(txq)) { - pkt_dev->next_tx_us = getCurUs(); /* TODO */ - pkt_dev->next_tx_ns = 0; - goto out; /* Try the next interface */ + pkt_dev->skb = fill_packet(odev, pkt_dev); + if (pkt_dev->skb == NULL) { + printk(KERN_ERR "pktgen: ERROR: couldn't " + "allocate skb in fill_packet.\n"); + schedule(); + pkt_dev->clone_count--; /* back out increment, OOM */ + return; } + pkt_dev->last_pkt_size = pkt_dev->skb->len; + pkt_dev->allocated_skbs++; + pkt_dev->clone_count = 0; /* reset counter */ } - if (pkt_dev->last_ok || !pkt_dev->skb) { - if ((++pkt_dev->clone_count >= pkt_dev->clone_skb) - || (!pkt_dev->skb)) { - /* build a new pkt */ - kfree_skb(pkt_dev->skb); + if (pkt_dev->delay && pkt_dev->last_ok) + spin(pkt_dev, pkt_dev->next_tx); - pkt_dev->skb = fill_packet(odev, pkt_dev); - if (pkt_dev->skb == NULL) { - printk(KERN_ERR "pktgen: ERROR: couldn't " - "allocate skb in fill_packet.\n"); - schedule(); - pkt_dev->clone_count--; /* back out increment, OOM */ - goto out; - } - pkt_dev->allocated_skbs++; - pkt_dev->clone_count = 0; /* reset counter */ - } - } - - /* fill_packet() might have changed the queue */ queue_map = skb_get_queue_mapping(pkt_dev->skb); txq = netdev_get_tx_queue(odev, queue_map); __netif_tx_lock_bh(txq); - if (!netif_tx_queue_stopped(txq) && - !netif_tx_queue_frozen(txq)) { - - atomic_inc(&(pkt_dev->skb->users)); - retry_now: - ret = (*xmit)(pkt_dev->skb, odev); - if (likely(ret == NETDEV_TX_OK)) { - txq_trans_update(txq); - pkt_dev->last_ok = 1; - pkt_dev->sofar++; - pkt_dev->seq_num++; - pkt_dev->tx_bytes += pkt_dev->cur_pkt_size; - - } else if (ret == NETDEV_TX_LOCKED - && (odev->features & NETIF_F_LLTX)) { - cpu_relax(); - goto retry_now; - } else { /* Retry it next time */ - - atomic_dec(&(pkt_dev->skb->users)); - - if (debug && net_ratelimit()) - printk(KERN_INFO "pktgen: Hard xmit error\n"); - - pkt_dev->errors++; - pkt_dev->last_ok = 0; - } - - pkt_dev->next_tx_us = getCurUs(); - pkt_dev->next_tx_ns = 0; - - pkt_dev->next_tx_us += pkt_dev->delay_us; - pkt_dev->next_tx_ns += pkt_dev->delay_ns; - - if (pkt_dev->next_tx_ns > 1000) { - pkt_dev->next_tx_us++; - pkt_dev->next_tx_ns -= 1000; - } - } - else { /* Retry it next time */ + if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) { + ret = NETDEV_TX_BUSY; + pkt_dev->last_ok = 0; + goto unlock; + } + atomic_inc(&(pkt_dev->skb->users)); + ret = (*xmit)(pkt_dev->skb, odev); + + switch (ret) { + case NETDEV_TX_OK: + txq_trans_update(txq); + pkt_dev->last_ok = 1; + pkt_dev->sofar++; + pkt_dev->seq_num++; + pkt_dev->tx_bytes += pkt_dev->last_pkt_size; + break; + default: /* Drivers are not supposed to return other values! */ + if (net_ratelimit()) + pr_info("pktgen: %s xmit error: %d\n", + pkt_dev->odevname, ret); + pkt_dev->errors++; + /* fallthru */ + case NETDEV_TX_LOCKED: + case NETDEV_TX_BUSY: + /* Retry it next time */ + atomic_dec(&(pkt_dev->skb->users)); pkt_dev->last_ok = 0; - pkt_dev->next_tx_us = getCurUs(); /* TODO */ - pkt_dev->next_tx_ns = 0; } - +unlock: __netif_tx_unlock_bh(txq); /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { - if (atomic_read(&(pkt_dev->skb->users)) != 1) { - idle_start = getCurUs(); - while (atomic_read(&(pkt_dev->skb->users)) != 1) { - if (signal_pending(current)) { - break; - } - schedule(); - } - pkt_dev->idle_acc += getCurUs() - idle_start; - } + pktgen_wait_for_skb(pkt_dev); /* Done with this */ pktgen_stop_device(pkt_dev); - kfree_skb(pkt_dev->skb); - pkt_dev->skb = NULL; } -out:; } /* @@ -3516,7 +3505,8 @@ static int pktgen_thread_worker(void *arg) init_waitqueue_head(&t->queue); complete(&t->start_done); - pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); + pr_debug("pktgen: starting pktgen/%d: pid=%d\n", + cpu, task_pid_nr(current)); set_current_state(TASK_INTERRUPTIBLE); @@ -3525,20 +3515,24 @@ static int pktgen_thread_worker(void *arg) while (!kthread_should_stop()) { pkt_dev = next_to_run(t); - if (!pkt_dev && - (t->control & (T_STOP | T_RUN | T_REMDEVALL | T_REMDEV)) - == 0) { - prepare_to_wait(&(t->queue), &wait, - TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 10); - finish_wait(&(t->queue), &wait); + if (unlikely(!pkt_dev && t->control == 0)) { + wait_event_interruptible_timeout(t->queue, + t->control != 0, + HZ/10); + continue; } __set_current_state(TASK_RUNNING); - if (pkt_dev) + if (likely(pkt_dev)) { pktgen_xmit(pkt_dev); + if (need_resched()) + pktgen_resched(pkt_dev); + else + cpu_relax(); + } + if (t->control & T_STOP) { pktgen_stop(t); t->control &= ~(T_STOP); @@ -3577,13 +3571,18 @@ static int pktgen_thread_worker(void *arg) } static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, - const char *ifname) + const char *ifname, bool exact) { struct pktgen_dev *p, *pkt_dev = NULL; - if_lock(t); + size_t len = strlen(ifname); + if_lock(t); list_for_each_entry(p, &t->if_list, list) - if (strncmp(p->odev->name, ifname, IFNAMSIZ) == 0) { + if (strncmp(p->odevname, ifname, len) == 0) { + if (p->odevname[len]) { + if (exact || p->odevname[len] != '@') + continue; + } pkt_dev = p; break; } @@ -3626,6 +3625,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) { struct pktgen_dev *pkt_dev; int err; + int node = cpu_to_node(t->cpu); /* We don't allow a device to be on several threads */ @@ -3635,11 +3635,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) return -EBUSY; } - pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL); + pkt_dev = kzalloc_node(sizeof(struct pktgen_dev), GFP_KERNEL, node); if (!pkt_dev) return -ENOMEM; - pkt_dev->flows = vmalloc(MAX_CFLOWS * sizeof(struct flow_state)); + strcpy(pkt_dev->odevname, ifname); + pkt_dev->flows = vmalloc_node(MAX_CFLOWS * sizeof(struct flow_state), + node); if (pkt_dev->flows == NULL) { kfree(pkt_dev); return -ENOMEM; @@ -3651,8 +3653,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->max_pkt_size = ETH_ZLEN; pkt_dev->nfrags = 0; pkt_dev->clone_skb = pg_clone_skb_d; - pkt_dev->delay_us = pg_delay_d / 1000; - pkt_dev->delay_ns = pg_delay_d % 1000; + pkt_dev->delay = pg_delay_d; pkt_dev->count = pg_count_d; pkt_dev->sofar = 0; pkt_dev->udp_src_min = 9; /* sink port */ @@ -3702,7 +3703,8 @@ static int __init pktgen_create_thread(int cpu) struct proc_dir_entry *pe; struct task_struct *p; - t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); + t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL, + cpu_to_node(cpu)); if (!t) { printk(KERN_ERR "pktgen: ERROR: out of memory, can't " "create new thread.\n"); @@ -3864,10 +3866,15 @@ static void __exit pg_cleanup(void) module_init(pg_init); module_exit(pg_cleanup); -MODULE_AUTHOR("Robert Olsson <[email protected]"); +MODULE_AUTHOR("Robert Olsson <[email protected]>"); MODULE_DESCRIPTION("Packet Generator tool"); MODULE_LICENSE("GPL"); +MODULE_VERSION(VERSION); module_param(pg_count_d, int, 0); +MODULE_PARM_DESC(pg_count_d, "Default number of packets to inject"); module_param(pg_delay_d, int, 0); +MODULE_PARM_DESC(pg_delay_d, "Default delay between packets (nanoseconds)"); module_param(pg_clone_skb_d, int, 0); +MODULE_PARM_DESC(pg_clone_skb_d, "Default number of copies of the same packet"); module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "Enable debugging of pktgen module"); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d78030f88bd0..33148a568199 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -35,11 +35,9 @@ #include <linux/security.h> #include <linux/mutex.h> #include <linux/if_addr.h> -#include <linux/nsproxy.h> #include <asm/uaccess.h> #include <asm/system.h> -#include <asm/string.h> #include <linux/inet.h> #include <linux/netdevice.h> @@ -52,9 +50,9 @@ #include <net/pkt_sched.h> #include <net/fib_rules.h> #include <net/rtnetlink.h> +#include <net/net_namespace.h> -struct rtnl_link -{ +struct rtnl_link { rtnl_doit_func doit; rtnl_dumpit_func dumpit; }; @@ -65,6 +63,7 @@ void rtnl_lock(void) { mutex_lock(&rtnl_mutex); } +EXPORT_SYMBOL(rtnl_lock); void __rtnl_unlock(void) { @@ -76,16 +75,19 @@ void rtnl_unlock(void) /* This fellow will unlock it for us. */ netdev_run_todo(); } +EXPORT_SYMBOL(rtnl_unlock); int rtnl_trylock(void) { return mutex_trylock(&rtnl_mutex); } +EXPORT_SYMBOL(rtnl_trylock); int rtnl_is_locked(void) { return mutex_is_locked(&rtnl_mutex); } +EXPORT_SYMBOL(rtnl_is_locked); static struct rtnl_link *rtnl_msg_handlers[NPROTO]; @@ -168,7 +170,6 @@ int __rtnl_register(int protocol, int msgtype, return 0; } - EXPORT_SYMBOL_GPL(__rtnl_register); /** @@ -188,7 +189,6 @@ void rtnl_register(int protocol, int msgtype, "protocol = %d, message type = %d\n", protocol, msgtype); } - EXPORT_SYMBOL_GPL(rtnl_register); /** @@ -213,7 +213,6 @@ int rtnl_unregister(int protocol, int msgtype) return 0; } - EXPORT_SYMBOL_GPL(rtnl_unregister); /** @@ -230,7 +229,6 @@ void rtnl_unregister_all(int protocol) kfree(rtnl_msg_handlers[protocol]); rtnl_msg_handlers[protocol] = NULL; } - EXPORT_SYMBOL_GPL(rtnl_unregister_all); static LIST_HEAD(link_ops); @@ -248,12 +246,11 @@ static LIST_HEAD(link_ops); int __rtnl_link_register(struct rtnl_link_ops *ops) { if (!ops->dellink) - ops->dellink = unregister_netdevice; + ops->dellink = unregister_netdevice_queue; list_add_tail(&ops->list, &link_ops); return 0; } - EXPORT_SYMBOL_GPL(__rtnl_link_register); /** @@ -271,19 +268,18 @@ int rtnl_link_register(struct rtnl_link_ops *ops) rtnl_unlock(); return err; } - EXPORT_SYMBOL_GPL(rtnl_link_register); static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) { struct net_device *dev; -restart: + LIST_HEAD(list_kill); + for_each_netdev(net, dev) { - if (dev->rtnl_link_ops == ops) { - ops->dellink(dev); - goto restart; - } + if (dev->rtnl_link_ops == ops) + ops->dellink(dev, &list_kill); } + unregister_netdevice_many(&list_kill); } void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) @@ -309,7 +305,6 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) } list_del(&ops->list); } - EXPORT_SYMBOL_GPL(__rtnl_link_unregister); /** @@ -322,7 +317,6 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops) __rtnl_link_unregister(ops); rtnl_unlock(); } - EXPORT_SYMBOL_GPL(rtnl_link_unregister); static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) @@ -427,12 +421,13 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data struct rtattr *rta; int size = RTA_LENGTH(attrlen); - rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size)); + rta = (struct rtattr *)skb_put(skb, RTA_ALIGN(size)); rta->rta_type = attrtype; rta->rta_len = size; memcpy(RTA_DATA(rta), data, attrlen); memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size); } +EXPORT_SYMBOL(__rta_fill); int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo) { @@ -454,6 +449,7 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) return nlmsg_unicast(rtnl, skb, pid); } +EXPORT_SYMBOL(rtnl_unicast); void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, struct nlmsghdr *nlh, gfp_t flags) @@ -466,6 +462,7 @@ void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, nlmsg_notify(rtnl, skb, pid, group, report, flags); } +EXPORT_SYMBOL(rtnl_notify); void rtnl_set_sk_err(struct net *net, u32 group, int error) { @@ -473,6 +470,7 @@ void rtnl_set_sk_err(struct net *net, u32 group, int error) netlink_set_err(rtnl, 0, group, error); } +EXPORT_SYMBOL(rtnl_set_sk_err); int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) { @@ -501,6 +499,7 @@ nla_put_failure: nla_nest_cancel(skb, mx); return -EMSGSIZE; } +EXPORT_SYMBOL(rtnetlink_put_metrics); int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, u32 ts, u32 tsage, long expires, u32 error) @@ -520,14 +519,13 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); } - EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); static void set_operstate(struct net_device *dev, unsigned char transition) { unsigned char operstate = dev->operstate; - switch(transition) { + switch (transition) { case IF_OPER_UP: if ((operstate == IF_OPER_DORMANT || operstate == IF_OPER_UNKNOWN) && @@ -606,7 +604,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags) { - struct netdev_queue *txq; struct ifinfomsg *ifm; struct nlmsghdr *nlh; const struct net_device_stats *stats; @@ -637,9 +634,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (dev->master) NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); - txq = netdev_get_tx_queue(dev, 0); - if (txq->qdisc_sleeping) - NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); + if (dev->qdisc) + NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc->ops->id); if (dev->ifalias) NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias); @@ -684,22 +680,33 @@ nla_put_failure: static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - int idx; - int s_idx = cb->args[0]; + int h, s_h; + int idx = 0, s_idx; struct net_device *dev; - - idx = 0; - for_each_netdev(net, dev) { - if (idx < s_idx) - goto cont; - if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) - break; + struct hlist_head *head; + struct hlist_node *node; + + s_h = cb->args[0]; + s_idx = cb->args[1]; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(dev, node, head, index_hlist) { + if (idx < s_idx) + goto cont; + if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, 0, + NLM_F_MULTI) <= 0) + goto out; cont: - idx++; + idx++; + } } - cb->args[0] = idx; +out: + cb->args[1] = idx; + cb->args[0] = h; return skb->len; } @@ -719,30 +726,26 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_NET_NS_PID] = { .type = NLA_U32 }, [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, }; +EXPORT_SYMBOL(ifla_policy); static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_KIND] = { .type = NLA_STRING }, [IFLA_INFO_DATA] = { .type = NLA_NESTED }, }; -static struct net *get_net_ns_by_pid(pid_t pid) +struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) { - struct task_struct *tsk; struct net *net; - - /* Lookup the network namespace */ - net = ERR_PTR(-ESRCH); - rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (tsk) { - struct nsproxy *nsproxy; - nsproxy = task_nsproxy(tsk); - if (nsproxy) - net = get_net(nsproxy->net_ns); - } - rcu_read_unlock(); + /* Examine the link attributes and figure out which + * network namespace we are talking about. + */ + if (tb[IFLA_NET_NS_PID]) + net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + else + net = get_net(src_net); return net; } +EXPORT_SYMBOL(rtnl_link_get_net); static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) { @@ -767,8 +770,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, int err; if (tb[IFLA_NET_NS_PID]) { - struct net *net; - net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + struct net *net = rtnl_link_get_net(dev_net(dev), tb); if (IS_ERR(net)) { err = PTR_ERR(net); goto errout; @@ -931,9 +933,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = dev_get_by_index(net, ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(net, ifname); + dev = __dev_get_by_name(net, ifname); else goto errout; @@ -942,12 +944,11 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto errout; } - if ((err = validate_linkmsg(dev, tb)) < 0) - goto errout_dev; + err = validate_linkmsg(dev, tb); + if (err < 0) + goto errout; err = do_setlink(dev, ifm, tb, ifname, 0); -errout_dev: - dev_put(dev); errout: return err; } @@ -984,30 +985,39 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!ops) return -EOPNOTSUPP; - ops->dellink(dev); + ops->dellink(dev, NULL); return 0; } -struct net_device *rtnl_create_link(struct net *net, char *ifname, - const struct rtnl_link_ops *ops, struct nlattr *tb[]) +struct net_device *rtnl_create_link(struct net *src_net, struct net *net, + char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) { int err; struct net_device *dev; + unsigned int num_queues = 1; + unsigned int real_num_queues = 1; + if (ops->get_tx_queues) { + err = ops->get_tx_queues(src_net, tb, &num_queues, + &real_num_queues); + if (err) + goto err; + } err = -ENOMEM; - dev = alloc_netdev(ops->priv_size, ifname, ops->setup); + dev = alloc_netdev_mq(ops->priv_size, ifname, ops->setup, num_queues); if (!dev) goto err; + dev_net_set(dev, net); + dev->rtnl_link_ops = ops; + dev->real_num_tx_queues = real_num_queues; + if (strchr(dev->name, '%')) { err = dev_alloc_name(dev, dev->name); if (err < 0) goto err_free; } - dev_net_set(dev, net); - dev->rtnl_link_ops = ops; - if (tb[IFLA_MTU]) dev->mtu = nla_get_u32(tb[IFLA_MTU]); if (tb[IFLA_ADDRESS]) @@ -1030,6 +1040,7 @@ err_free: err: return ERR_PTR(err); } +EXPORT_SYMBOL(rtnl_create_link); static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { @@ -1063,7 +1074,8 @@ replay: else dev = NULL; - if ((err = validate_linkmsg(dev, tb)) < 0) + err = validate_linkmsg(dev, tb); + if (err < 0) return err; if (tb[IFLA_LINKINFO]) { @@ -1084,6 +1096,7 @@ replay: if (1) { struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; + struct net *dest_net; if (ops) { if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { @@ -1148,17 +1161,19 @@ replay: if (!ifname[0]) snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); - dev = rtnl_create_link(net, ifname, ops, tb); + dest_net = rtnl_link_get_net(net, tb); + dev = rtnl_create_link(net, dest_net, ifname, ops, tb); if (IS_ERR(dev)) err = PTR_ERR(dev); else if (ops->newlink) - err = ops->newlink(dev, tb, data); + err = ops->newlink(net, dev, tb, data); else err = register_netdevice(dev); - if (err < 0 && !IS_ERR(dev)) free_netdev(dev); + + put_net(dest_net); return err; } } @@ -1167,6 +1182,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; + char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; struct sk_buff *nskb; @@ -1176,19 +1192,23 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (err < 0) return err; + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) { - dev = dev_get_by_index(net, ifm->ifi_index); - if (dev == NULL) - return -ENODEV; - } else + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(net, ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = __dev_get_by_name(net, ifname); + else return -EINVAL; + if (dev == NULL) + return -ENODEV; + nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); - if (nskb == NULL) { - err = -ENOBUFS; - goto errout; - } + if (nskb == NULL) + return -ENOBUFS; err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); @@ -1196,11 +1216,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); - goto errout; - } - err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); -errout: - dev_put(dev); + } else + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); return err; } @@ -1212,7 +1229,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) if (s_idx == 0) s_idx = 1; - for (idx=1; idx<NPROTO; idx++) { + for (idx = 1; idx < NPROTO; idx++) { int type = cb->nlh->nlmsg_type-RTM_BASE; if (idx < s_idx || idx == PF_PACKET) continue; @@ -1279,7 +1296,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) return 0; - family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family; + family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; if (family >= NPROTO) return -EAFNOSUPPORT; @@ -1312,7 +1329,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (nlh->nlmsg_len > min_len) { int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len); + struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); while (RTA_OK(attr, attrlen)) { unsigned flavor = attr->rta_type; @@ -1418,14 +1435,3 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); } -EXPORT_SYMBOL(__rta_fill); -EXPORT_SYMBOL(rtnetlink_put_metrics); -EXPORT_SYMBOL(rtnl_lock); -EXPORT_SYMBOL(rtnl_trylock); -EXPORT_SYMBOL(rtnl_unlock); -EXPORT_SYMBOL(rtnl_is_locked); -EXPORT_SYMBOL(rtnl_unicast); -EXPORT_SYMBOL(rtnl_notify); -EXPORT_SYMBOL(rtnl_set_sk_err); -EXPORT_SYMBOL(rtnl_create_link); -EXPORT_SYMBOL(ifla_policy); diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c deleted file mode 100644 index 79687dfd6957..000000000000 --- a/net/core/skb_dma_map.c +++ /dev/null @@ -1,65 +0,0 @@ -/* skb_dma_map.c: DMA mapping helpers for socket buffers. - * - * Copyright (C) David S. Miller <[email protected]> - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/dma-mapping.h> -#include <linux/skbuff.h> - -int skb_dma_map(struct device *dev, struct sk_buff *skb, - enum dma_data_direction dir) -{ - struct skb_shared_info *sp = skb_shinfo(skb); - dma_addr_t map; - int i; - - map = dma_map_single(dev, skb->data, - skb_headlen(skb), dir); - if (dma_mapping_error(dev, map)) - goto out_err; - - sp->dma_head = map; - for (i = 0; i < sp->nr_frags; i++) { - skb_frag_t *fp = &sp->frags[i]; - - map = dma_map_page(dev, fp->page, fp->page_offset, - fp->size, dir); - if (dma_mapping_error(dev, map)) - goto unwind; - sp->dma_maps[i] = map; - } - - return 0; - -unwind: - while (--i >= 0) { - skb_frag_t *fp = &sp->frags[i]; - - dma_unmap_page(dev, sp->dma_maps[i], - fp->size, dir); - } - dma_unmap_single(dev, sp->dma_head, - skb_headlen(skb), dir); -out_err: - return -ENOMEM; -} -EXPORT_SYMBOL(skb_dma_map); - -void skb_dma_unmap(struct device *dev, struct sk_buff *skb, - enum dma_data_direction dir) -{ - struct skb_shared_info *sp = skb_shinfo(skb); - int i; - - dma_unmap_single(dev, sp->dma_head, - skb_headlen(skb), dir); - for (i = 0; i < sp->nr_frags; i++) { - skb_frag_t *fp = &sp->frags[i]; - - dma_unmap_page(dev, sp->dma_maps[i], - fp->size, dir); - } -} -EXPORT_SYMBOL(skb_dma_unmap); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9e0597d189b0..bfa3e7865a8c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -493,6 +493,9 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) { struct skb_shared_info *shinfo; + if (irqs_disabled()) + return 0; + if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) return 0; @@ -546,7 +549,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif new->protocol = old->protocol; new->mark = old->mark; - new->iif = old->iif; + new->skb_iif = old->skb_iif; __nf_copy(new, old); #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) @@ -559,9 +562,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif #endif new->vlan_tci = old->vlan_tci; -#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) - new->do_not_encrypt = old->do_not_encrypt; -#endif skb_copy_secmark(new, old); } @@ -2704,7 +2704,8 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) NAPI_GRO_CB(skb)->free = 1; goto done; - } + } else if (skb_gro_len(p) != pinfo->gso_size) + return -E2BIG; headroom = skb_headroom(p); nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p)); diff --git a/net/core/sock.c b/net/core/sock.c index bbb25be7ddfe..76ff58d43e26 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -142,7 +142,7 @@ static struct lock_class_key af_family_slock_keys[AF_MAX]; * strings build-time, so that runtime initialization of socket * locks is fast): */ -static const char *af_family_key_strings[AF_MAX+1] = { +static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , @@ -158,7 +158,7 @@ static const char *af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_IEEE802154", "sk_lock-AF_MAX" }; -static const char *af_family_slock_key_strings[AF_MAX+1] = { +static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , @@ -174,7 +174,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_IEEE802154", "slock-AF_MAX" }; -static const char *af_family_clock_key_strings[AF_MAX+1] = { +static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , @@ -274,25 +274,27 @@ static void sock_disable_timestamp(struct sock *sk, int flag) int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { - int err = 0; + int err; int skb_len; + unsigned long flags; + struct sk_buff_head *list = &sk->sk_receive_queue; /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces number of warnings when compiling with -W --ANK */ if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) { - err = -ENOMEM; - goto out; + atomic_inc(&sk->sk_drops); + return -ENOMEM; } err = sk_filter(sk, skb); if (err) - goto out; + return err; if (!sk_rmem_schedule(sk, skb->truesize)) { - err = -ENOBUFS; - goto out; + atomic_inc(&sk->sk_drops); + return -ENOBUFS; } skb->dev = NULL; @@ -305,12 +307,14 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ skb_len = skb->len; - skb_queue_tail(&sk->sk_receive_queue, skb); + spin_lock_irqsave(&list->lock, flags); + skb->dropcount = atomic_read(&sk->sk_drops); + __skb_queue_tail(list, skb); + spin_unlock_irqrestore(&list->lock, flags); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, skb_len); -out: - return err; + return 0; } EXPORT_SYMBOL(sock_queue_rcv_skb); @@ -348,11 +352,18 @@ discard_and_relse: } EXPORT_SYMBOL(sk_receive_skb); +void sk_reset_txq(struct sock *sk) +{ + sk_tx_queue_clear(sk); +} +EXPORT_SYMBOL(sk_reset_txq); + struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) { struct dst_entry *dst = sk->sk_dst_cache; if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { + sk_tx_queue_clear(sk); sk->sk_dst_cache = NULL; dst_release(dst); return NULL; @@ -406,17 +417,18 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) if (copy_from_user(devname, optval, optlen)) goto out; - if (devname[0] == '\0') { - index = 0; - } else { - struct net_device *dev = dev_get_by_name(net, devname); + index = 0; + if (devname[0] != '\0') { + struct net_device *dev; + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, devname); + if (dev) + index = dev->ifindex; + rcu_read_unlock(); ret = -ENODEV; if (!dev) goto out; - - index = dev->ifindex; - dev_put(dev); } lock_sock(sk); @@ -446,7 +458,7 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) */ int sock_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; int val; @@ -482,6 +494,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname, sk->sk_reuse = valbool; break; case SO_TYPE: + case SO_PROTOCOL: + case SO_DOMAIN: case SO_ERROR: ret = -ENOPROTOOPT; break; @@ -700,6 +714,12 @@ set_rcvbuf: /* We implement the SO_SNDLOWAT etc to not be settable (1003.1g 5.3) */ + case SO_RXQ_OVFL: + if (valbool) + sock_set_flag(sk, SOCK_RXQ_OVFL); + else + sock_reset_flag(sk, SOCK_RXQ_OVFL); + break; default: ret = -ENOPROTOOPT; break; @@ -764,6 +784,14 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_type; break; + case SO_PROTOCOL: + v.val = sk->sk_protocol; + break; + + case SO_DOMAIN: + v.val = sk->sk_family; + break; + case SO_ERROR: v.val = -sock_error(sk); if (v.val == 0) @@ -891,6 +919,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_mark; break; + case SO_RXQ_OVFL: + v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); + break; + default: return -ENOPROTOOPT; } @@ -929,7 +961,8 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) void *sptr = nsk->sk_security; #endif BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != - sizeof(osk->sk_node) + sizeof(osk->sk_refcnt)); + sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) + + sizeof(osk->sk_tx_queue_mapping)); memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); #ifdef CONFIG_SECURITY_NETWORK @@ -973,6 +1006,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!try_module_get(prot->owner)) goto out_free_sec; + sk_tx_queue_clear(sk); } return sk; @@ -1025,6 +1059,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); sock_net_set(sk, get_net(net)); + atomic_set(&sk->sk_wmem_alloc, 1); } return sk; @@ -1195,12 +1230,12 @@ EXPORT_SYMBOL_GPL(sk_setup_caps); void __init sk_init(void) { - if (num_physpages <= 4096) { + if (totalram_pages <= 4096) { sysctl_wmem_max = 32767; sysctl_rmem_max = 32767; sysctl_wmem_default = 32767; sysctl_rmem_default = 32767; - } else if (num_physpages >= 131072) { + } else if (totalram_pages >= 131072) { sysctl_wmem_max = 131071; sysctl_rmem_max = 131071; } @@ -1217,17 +1252,22 @@ void __init sk_init(void) void sock_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; - int res; + unsigned int len = skb->truesize; - /* In case it might be waiting for more memory. */ - res = atomic_sub_return(skb->truesize, &sk->sk_wmem_alloc); - if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) + if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { + /* + * Keep a reference on sk_wmem_alloc, this will be released + * after sk_write_space() call + */ + atomic_sub(len - 1, &sk->sk_wmem_alloc); sk->sk_write_space(sk); + len = 1; + } /* - * if sk_wmem_alloc reached 0, we are last user and should - * free this sock, as sk_free() call could not do it. + * if sk_wmem_alloc reaches 0, we must finish what sk_free() + * could not do because of in-flight packets */ - if (res == 0) + if (atomic_sub_and_test(len, &sk->sk_wmem_alloc)) __sk_free(sk); } EXPORT_SYMBOL(sock_wfree); @@ -1686,7 +1726,7 @@ int sock_no_shutdown(struct socket *sock, int how) EXPORT_SYMBOL(sock_no_shutdown); int sock_no_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { return -EOPNOTSUPP; } @@ -1872,7 +1912,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) */ smp_wmb(); atomic_set(&sk->sk_refcnt, 1); - atomic_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_drops, 0); } EXPORT_SYMBOL(sock_init_data); @@ -2008,7 +2047,7 @@ EXPORT_SYMBOL(sock_common_recvmsg); * Set socket options on an inet socket. */ int sock_common_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; @@ -2018,7 +2057,7 @@ EXPORT_SYMBOL(sock_common_setsockopt); #ifdef CONFIG_COMPAT int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7db1de0497c6..06124872af5b 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -10,14 +10,15 @@ #include <linux/module.h> #include <linux/socket.h> #include <linux/netdevice.h> +#include <linux/ratelimit.h> #include <linux/init.h> + #include <net/ip.h> #include <net/sock.h> static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { - .ctl_name = NET_CORE_WMEM_MAX, .procname = "wmem_max", .data = &sysctl_wmem_max, .maxlen = sizeof(int), @@ -25,7 +26,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_RMEM_MAX, .procname = "rmem_max", .data = &sysctl_rmem_max, .maxlen = sizeof(int), @@ -33,7 +33,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_WMEM_DEFAULT, .procname = "wmem_default", .data = &sysctl_wmem_default, .maxlen = sizeof(int), @@ -41,7 +40,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_RMEM_DEFAULT, .procname = "rmem_default", .data = &sysctl_rmem_default, .maxlen = sizeof(int), @@ -49,7 +47,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_DEV_WEIGHT, .procname = "dev_weight", .data = &weight_p, .maxlen = sizeof(int), @@ -57,7 +54,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_MAX_BACKLOG, .procname = "netdev_max_backlog", .data = &netdev_max_backlog, .maxlen = sizeof(int), @@ -65,16 +61,13 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_MSG_COST, .procname = "message_cost", .data = &net_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_CORE_MSG_BURST, .procname = "message_burst", .data = &net_ratelimit_state.burst, .maxlen = sizeof(int), @@ -82,7 +75,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_CORE_OPTMEM_MAX, .procname = "optmem_max", .data = &sysctl_optmem_max, .maxlen = sizeof(int), @@ -91,7 +83,6 @@ static struct ctl_table net_core_table[] = { }, #endif /* CONFIG_NET */ { - .ctl_name = NET_CORE_BUDGET, .procname = "netdev_budget", .data = &netdev_budget, .maxlen = sizeof(int), @@ -99,31 +90,29 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_WARNINGS, .procname = "warnings", .data = &net_msg_warn, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; static struct ctl_table netns_core_table[] = { { - .ctl_name = NET_CORE_SOMAXCONN, .procname = "somaxconn", .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; __net_initdata struct ctl_path net_core_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "core", .ctl_name = NET_CORE, }, + { .procname = "net", }, + { .procname = "core", }, { }, }; @@ -134,7 +123,7 @@ static __net_init int sysctl_core_net_init(struct net *net) net->core.sysctl_somaxconn = SOMAXCONN; tbl = netns_core_table; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); if (tbl == NULL) goto err_dup; diff --git a/net/core/utils.c b/net/core/utils.c index 83221aee7084..838250241d26 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -24,6 +24,8 @@ #include <linux/types.h> #include <linux/percpu.h> #include <linux/init.h> +#include <linux/ratelimit.h> + #include <net/sock.h> #include <asm/byteorder.h> |