aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c6
-rw-r--r--net/core/dev.c119
-rw-r--r--net/core/drop_monitor.c12
-rw-r--r--net/core/ethtool.c41
-rw-r--r--net/core/gen_estimator.c15
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/net-sysfs.c63
-rw-r--r--net/core/net-sysfs.h1
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/rtnetlink.c30
-rw-r--r--net/core/skbuff.c82
-rw-r--r--net/core/sock.c52
12 files changed, 302 insertions, 127 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index e0097531417a..f5b6f43a4c2e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -229,15 +229,17 @@ EXPORT_SYMBOL(skb_free_datagram);
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
{
+ bool slow;
+
if (likely(atomic_read(&skb->users) == 1))
smp_rmb();
else if (likely(!atomic_dec_and_test(&skb->users)))
return;
- lock_sock_bh(sk);
+ slow = lock_sock_fast(sk);
skb_orphan(skb);
sk_mem_reclaim_partial(sk);
- unlock_sock_bh(sk);
+ unlock_sock_fast(sk, slow);
/* skb is now orphaned, can be freed outside of locked section */
__kfree_skb(skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0aab66d68b19..0ea10f849be8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -954,18 +954,22 @@ int dev_alloc_name(struct net_device *dev, const char *name)
}
EXPORT_SYMBOL(dev_alloc_name);
-static int dev_get_valid_name(struct net *net, const char *name, char *buf,
- bool fmt)
+static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt)
{
+ struct net *net;
+
+ BUG_ON(!dev_net(dev));
+ net = dev_net(dev);
+
if (!dev_valid_name(name))
return -EINVAL;
if (fmt && strchr(name, '%'))
- return __dev_alloc_name(net, name, buf);
+ return dev_alloc_name(dev, name);
else if (__dev_get_by_name(net, name))
return -EEXIST;
- else if (buf != name)
- strlcpy(buf, name, IFNAMSIZ);
+ else if (dev->name != name)
+ strlcpy(dev->name, name, IFNAMSIZ);
return 0;
}
@@ -997,20 +1001,15 @@ int dev_change_name(struct net_device *dev, const char *newname)
memcpy(oldname, dev->name, IFNAMSIZ);
- err = dev_get_valid_name(net, newname, dev->name, 1);
+ err = dev_get_valid_name(dev, newname, 1);
if (err < 0)
return err;
rollback:
- /* For now only devices in the initial network namespace
- * are in sysfs.
- */
- if (net_eq(net, &init_net)) {
- ret = device_rename(&dev->dev, dev->name);
- if (ret) {
- memcpy(dev->name, oldname, IFNAMSIZ);
- return ret;
- }
+ ret = device_rename(&dev->dev, dev->name);
+ if (ret) {
+ memcpy(dev->name, oldname, IFNAMSIZ);
+ return ret;
}
write_lock_bh(&dev_base_lock);
@@ -1554,6 +1553,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
rcu_read_unlock();
}
+/*
+ * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
+ * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ */
+void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+{
+ unsigned int real_num = dev->real_num_tx_queues;
+
+ if (unlikely(txq > dev->num_tx_queues))
+ ;
+ else if (txq > real_num)
+ dev->real_num_tx_queues = txq;
+ else if (txq < real_num) {
+ dev->real_num_tx_queues = txq;
+ qdisc_reset_all_tx_gt(dev, txq);
+ }
+}
+EXPORT_SYMBOL(netif_set_real_num_tx_queues);
static inline void __netif_reschedule(struct Qdisc *q)
{
@@ -1894,8 +1911,16 @@ static int dev_gso_segment(struct sk_buff *skb)
*/
static inline void skb_orphan_try(struct sk_buff *skb)
{
- if (!skb_tx(skb)->flags)
+ struct sock *sk = skb->sk;
+
+ if (sk && !skb_tx(skb)->flags) {
+ /* skb_tx_hash() wont be able to get sk.
+ * We copy sk_hash into skb->rxhash
+ */
+ if (!skb->rxhash)
+ skb->rxhash = sk->sk_hash;
skb_orphan(skb);
+ }
}
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -1981,8 +2006,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
if (skb->sk && skb->sk->sk_hash)
hash = skb->sk->sk_hash;
else
- hash = (__force u16) skb->protocol;
-
+ hash = (__force u16) skb->protocol ^ skb->rxhash;
hash = jhash_1word(hash, hashrnd);
return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
@@ -2005,12 +2029,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
- u16 queue_index;
+ int queue_index;
struct sock *sk = skb->sk;
- if (sk_tx_queue_recorded(sk)) {
- queue_index = sk_tx_queue_get(sk);
- } else {
+ queue_index = sk_tx_queue_get(sk);
+ if (queue_index < 0) {
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_select_queue) {
@@ -2254,11 +2277,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
if (skb_rx_queue_recorded(skb)) {
u16 index = skb_get_rx_queue(skb);
if (unlikely(index >= dev->num_rx_queues)) {
- if (net_ratelimit()) {
- pr_warning("%s received packet on queue "
- "%u, but number of RX queues is %u\n",
- dev->name, index, dev->num_rx_queues);
- }
+ WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
+ "on queue %u, but number of RX queues is %u\n",
+ dev->name, index, dev->num_rx_queues);
goto done;
}
rxqueue = dev->_rx + index;
@@ -2796,7 +2817,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
struct net_device *orig_dev;
struct net_device *master;
struct net_device *null_or_orig;
- struct net_device *null_or_bond;
+ struct net_device *orig_or_bond;
int ret = NET_RX_DROP;
__be16 type;
@@ -2813,13 +2834,24 @@ static int __netif_receive_skb(struct sk_buff *skb)
if (!skb->skb_iif)
skb->skb_iif = skb->dev->ifindex;
+ /*
+ * bonding note: skbs received on inactive slaves should only
+ * be delivered to pkt handlers that are exact matches. Also
+ * the deliver_no_wcard flag will be set. If packet handlers
+ * are sensitive to duplicate packets these skbs will need to
+ * be dropped at the handler. The vlan accel path may have
+ * already set the deliver_no_wcard flag.
+ */
null_or_orig = NULL;
orig_dev = skb->dev;
master = ACCESS_ONCE(orig_dev->master);
- if (master) {
- if (skb_bond_should_drop(skb, master))
+ if (skb->deliver_no_wcard)
+ null_or_orig = orig_dev;
+ else if (master) {
+ if (skb_bond_should_drop(skb, master)) {
+ skb->deliver_no_wcard = 1;
null_or_orig = orig_dev; /* deliver only exact match */
- else
+ } else
skb->dev = master;
}
@@ -2869,10 +2901,10 @@ ncls:
* device that may have registered for a specific ptype. The
* handler may have to adjust skb->dev and orig_dev.
*/
- null_or_bond = NULL;
+ orig_or_bond = orig_dev;
if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
(vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
- null_or_bond = vlan_dev_real_dev(skb->dev);
+ orig_or_bond = vlan_dev_real_dev(skb->dev);
}
type = skb->protocol;
@@ -2880,7 +2912,7 @@ ncls:
&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
if (ptype->type == type && (ptype->dev == null_or_orig ||
ptype->dev == skb->dev || ptype->dev == orig_dev ||
- ptype->dev == null_or_bond)) {
+ ptype->dev == orig_or_bond)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
@@ -4965,7 +4997,7 @@ int register_netdevice(struct net_device *dev)
}
}
- ret = dev_get_valid_name(net, dev->name, dev->name, 0);
+ ret = dev_get_valid_name(dev, dev->name, 0);
if (ret)
goto err_uninit;
@@ -4994,8 +5026,6 @@ int register_netdevice(struct net_device *dev)
if (dev->features & NETIF_F_SG)
dev->features |= NETIF_F_GSO;
- netdev_initialize_kobject(dev);
-
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
if (ret)
@@ -5547,15 +5577,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
if (dev->features & NETIF_F_NETNS_LOCAL)
goto out;
-#ifdef CONFIG_SYSFS
- /* Don't allow real devices to be moved when sysfs
- * is enabled.
- */
- err = -EINVAL;
- if (dev->dev.parent)
- goto out;
-#endif
-
/* Ensure the device has been registrered */
err = -EINVAL;
if (dev->reg_state != NETREG_REGISTERED)
@@ -5574,7 +5595,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/* We get here if we can't use the current device name */
if (!pat)
goto out;
- if (dev_get_valid_name(net, pat, dev->name, 1))
+ if (dev_get_valid_name(dev, pat, 1))
goto out;
}
@@ -5606,8 +5627,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
dev_uc_flush(dev);
dev_mc_flush(dev);
- netdev_unregister_kobject(dev);
-
/* Actually switch the network namespace */
dev_net_set(dev, net);
@@ -5620,7 +5639,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
}
/* Fixup kobjects */
- err = netdev_register_kobject(dev);
+ err = device_rename(&dev->dev, dev->name);
WARN_ON(err);
/* Add the device back in the hashes */
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index cf208d8042b1..ad41529fb60f 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -172,12 +172,12 @@ out:
return;
}
-static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
+static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
{
trace_drop_common(skb, location);
}
-static void trace_napi_poll_hit(struct napi_struct *napi)
+static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
{
struct dm_hw_stat_delta *new_stat;
@@ -225,12 +225,12 @@ static int set_all_monitor_traces(int state)
switch (state) {
case TRACE_ON:
- rc |= register_trace_kfree_skb(trace_kfree_skb_hit);
- rc |= register_trace_napi_poll(trace_napi_poll_hit);
+ rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+ rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
break;
case TRACE_OFF:
- rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit);
- rc |= unregister_trace_napi_poll(trace_napi_poll_hit);
+ rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+ rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
tracepoint_synchronize_unregister();
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a0f4964033d2..75e4ffeb8cc9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -318,23 +318,33 @@ out:
}
static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
- void __user *useraddr)
+ u32 cmd, void __user *useraddr)
{
- struct ethtool_rxnfc cmd;
+ struct ethtool_rxnfc info;
+ size_t info_size = sizeof(info);
if (!dev->ethtool_ops->set_rxnfc)
return -EOPNOTSUPP;
- if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+ /* struct ethtool_rxnfc was originally defined for
+ * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+ * members. User-space might still be using that
+ * definition. */
+ if (cmd == ETHTOOL_SRXFH)
+ info_size = (offsetof(struct ethtool_rxnfc, data) +
+ sizeof(info.data));
+
+ if (copy_from_user(&info, useraddr, info_size))
return -EFAULT;
- return dev->ethtool_ops->set_rxnfc(dev, &cmd);
+ return dev->ethtool_ops->set_rxnfc(dev, &info);
}
static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
- void __user *useraddr)
+ u32 cmd, void __user *useraddr)
{
struct ethtool_rxnfc info;
+ size_t info_size = sizeof(info);
const struct ethtool_ops *ops = dev->ethtool_ops;
int ret;
void *rule_buf = NULL;
@@ -342,13 +352,22 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (!ops->get_rxnfc)
return -EOPNOTSUPP;
- if (copy_from_user(&info, useraddr, sizeof(info)))
+ /* struct ethtool_rxnfc was originally defined for
+ * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+ * members. User-space might still be using that
+ * definition. */
+ if (cmd == ETHTOOL_GRXFH)
+ info_size = (offsetof(struct ethtool_rxnfc, data) +
+ sizeof(info.data));
+
+ if (copy_from_user(&info, useraddr, info_size))
return -EFAULT;
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
if (info.rule_cnt > 0) {
- rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
- GFP_USER);
+ if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
+ rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
+ GFP_USER);
if (!rule_buf)
return -ENOMEM;
}
@@ -359,7 +378,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
goto err_out;
ret = -EFAULT;
- if (copy_to_user(useraddr, &info, sizeof(info)))
+ if (copy_to_user(useraddr, &info, info_size))
goto err_out;
if (rule_buf) {
@@ -1516,12 +1535,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GRXCLSRLCNT:
case ETHTOOL_GRXCLSRULE:
case ETHTOOL_GRXCLSRLALL:
- rc = ethtool_get_rxnfc(dev, useraddr);
+ rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
break;
case ETHTOOL_SRXFH:
case ETHTOOL_SRXCLSRLDEL:
case ETHTOOL_SRXCLSRLINS:
- rc = ethtool_set_rxnfc(dev, useraddr);
+ rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
break;
case ETHTOOL_GGRO:
rc = ethtool_get_gro(dev, useraddr);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index cf8e70392fe0..785e5276a300 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -107,6 +107,7 @@ static DEFINE_RWLOCK(est_lock);
/* Protects against soft lockup during large deletion */
static struct rb_root est_root = RB_ROOT;
+static DEFINE_SPINLOCK(est_tree_lock);
static void est_timer(unsigned long arg)
{
@@ -201,7 +202,6 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
*
* Returns 0 on success or a negative error code.
*
- * NOTE: Called under rtnl_mutex
*/
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_rate_est *rate_est,
@@ -232,6 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
est->last_packets = bstats->packets;
est->avpps = rate_est->pps<<10;
+ spin_lock(&est_tree_lock);
if (!elist[idx].timer.function) {
INIT_LIST_HEAD(&elist[idx].list);
setup_timer(&elist[idx].timer, est_timer, idx);
@@ -242,6 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
list_add_rcu(&est->list, &elist[idx].list);
gen_add_node(est);
+ spin_unlock(&est_tree_lock);
return 0;
}
@@ -261,13 +263,13 @@ static void __gen_kill_estimator(struct rcu_head *head)
*
* Removes the rate estimator specified by &bstats and &rate_est.
*
- * NOTE: Called under rtnl_mutex
*/
void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_rate_est *rate_est)
{
struct gen_estimator *e;
+ spin_lock(&est_tree_lock);
while ((e = gen_find_node(bstats, rate_est))) {
rb_erase(&e->node, &est_root);
@@ -278,6 +280,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
list_del_rcu(&e->list);
call_rcu(&e->e_rcu, __gen_kill_estimator);
}
+ spin_unlock(&est_tree_lock);
}
EXPORT_SYMBOL(gen_kill_estimator);
@@ -312,8 +315,14 @@ EXPORT_SYMBOL(gen_replace_estimator);
bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
const struct gnet_stats_rate_est *rate_est)
{
+ bool res;
+
ASSERT_RTNL();
- return gen_find_node(bstats, rate_est) != NULL;
+ spin_lock(&est_tree_lock);
+ res = gen_find_node(bstats, rate_est) != NULL;
+ spin_unlock(&est_tree_lock);
+
+ return res;
}
EXPORT_SYMBOL(gen_estimator_active);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index bff37908bd55..a4e0a7482c2b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -934,6 +934,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
kfree_skb(buff);
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
+ skb_dst_force(skb);
__skb_queue_tail(&neigh->arp_queue, skb);
}
rc = 1;
@@ -948,7 +949,10 @@ static void neigh_update_hhs(struct neighbour *neigh)
{
struct hh_cache *hh;
void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
- = neigh->dev->header_ops->cache_update;
+ = NULL;
+
+ if (neigh->dev->header_ops)
+ update = neigh->dev->header_ops->cache_update;
if (update) {
for (hh = neigh->hh; hh; hh = hh->hh_next) {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c57c4b228bb5..99e7052d7323 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -14,7 +14,9 @@
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/slab.h>
+#include <linux/nsproxy.h>
#include <net/sock.h>
+#include <net/net_namespace.h>
#include <linux/rtnetlink.h>
#include <linux/wireless.h>
#include <linux/vmalloc.h>
@@ -467,6 +469,7 @@ static struct attribute_group wireless_group = {
.attrs = wireless_attrs,
};
#endif
+#endif /* CONFIG_SYSFS */
#ifdef CONFIG_RPS
/*
@@ -766,7 +769,38 @@ static void rx_queue_remove_kobjects(struct net_device *net)
kset_unregister(net->queues_kset);
}
#endif /* CONFIG_RPS */
-#endif /* CONFIG_SYSFS */
+
+static const void *net_current_ns(void)
+{
+ return current->nsproxy->net_ns;
+}
+
+static const void *net_initial_ns(void)
+{
+ return &init_net;
+}
+
+static const void *net_netlink_ns(struct sock *sk)
+{
+ return sock_net(sk);
+}
+
+static struct kobj_ns_type_operations net_ns_type_operations = {
+ .type = KOBJ_NS_TYPE_NET,
+ .current_ns = net_current_ns,
+ .netlink_ns = net_netlink_ns,
+ .initial_ns = net_initial_ns,
+};
+
+static void net_kobj_ns_exit(struct net *net)
+{
+ kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
+}
+
+static struct pernet_operations kobj_net_ops = {
+ .exit = net_kobj_ns_exit,
+};
+
#ifdef CONFIG_HOTPLUG
static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
@@ -774,9 +808,6 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
struct net_device *dev = to_net_dev(d);
int retval;
- if (!net_eq(dev_net(dev), &init_net))
- return 0;
-
/* pass interface to uevent. */
retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
if (retval)
@@ -806,6 +837,13 @@ static void netdev_release(struct device *d)
kfree((char *)dev - dev->padded);
}
+static const void *net_namespace(struct device *d)
+{
+ struct net_device *dev;
+ dev = container_of(d, struct net_device, dev);
+ return dev_net(dev);
+}
+
static struct class net_class = {
.name = "net",
.dev_release = netdev_release,
@@ -815,6 +853,8 @@ static struct class net_class = {
#ifdef CONFIG_HOTPLUG
.dev_uevent = netdev_uevent,
#endif
+ .ns_type = &net_ns_type_operations,
+ .namespace = net_namespace,
};
/* Delete sysfs entries but hold kobject reference until after all
@@ -826,9 +866,6 @@ void netdev_unregister_kobject(struct net_device * net)
kobject_get(&dev->kobj);
- if (!net_eq(dev_net(net), &init_net))
- return;
-
#ifdef CONFIG_RPS
rx_queue_remove_kobjects(net);
#endif
@@ -843,6 +880,7 @@ int netdev_register_kobject(struct net_device *net)
const struct attribute_group **groups = net->sysfs_groups;
int error = 0;
+ device_initialize(dev);
dev->class = &net_class;
dev->platform_data = net;
dev->groups = groups;
@@ -865,9 +903,6 @@ int netdev_register_kobject(struct net_device *net)
#endif
#endif /* CONFIG_SYSFS */
- if (!net_eq(dev_net(net), &init_net))
- return 0;
-
error = device_add(dev);
if (error)
return error;
@@ -896,13 +931,9 @@ void netdev_class_remove_file(struct class_attribute *class_attr)
EXPORT_SYMBOL(netdev_class_create_file);
EXPORT_SYMBOL(netdev_class_remove_file);
-void netdev_initialize_kobject(struct net_device *net)
-{
- struct device *device = &(net->dev);
- device_initialize(device);
-}
-
int netdev_kobject_init(void)
{
+ kobj_ns_type_register(&net_ns_type_operations);
+ register_pernet_subsys(&kobj_net_ops);
return class_register(&net_class);
}
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 14e7524260b3..805555e8b187 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,5 +4,4 @@
int netdev_kobject_init(void);
int netdev_register_kobject(struct net_device *);
void netdev_unregister_kobject(struct net_device *);
-void netdev_initialize_kobject(struct net_device *);
#endif
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2ad68da418df..1dacd7ba8dbb 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2170,7 +2170,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
end_time = ktime_now();
pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
- pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay);
+ pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
}
static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e4b9870e4706..1a2af24e9e3d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -650,11 +650,12 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev)
if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
int num_vfs = dev_num_vf(dev->dev.parent);
- size_t size = nlmsg_total_size(sizeof(struct nlattr));
- size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
- size += num_vfs * (sizeof(struct ifla_vf_mac) +
- sizeof(struct ifla_vf_vlan) +
- sizeof(struct ifla_vf_tx_rate));
+ size_t size = nla_total_size(sizeof(struct nlattr));
+ size += nla_total_size(num_vfs * sizeof(struct nlattr));
+ size += num_vfs *
+ (nla_total_size(sizeof(struct ifla_vf_mac)) +
+ nla_total_size(sizeof(struct ifla_vf_vlan)) +
+ nla_total_size(sizeof(struct ifla_vf_tx_rate)));
return size;
} else
return 0;
@@ -722,14 +723,13 @@ static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
vf_port = nla_nest_start(skb, IFLA_VF_PORT);
- if (!vf_port) {
- nla_nest_cancel(skb, vf_ports);
- return -EMSGSIZE;
- }
+ if (!vf_port)
+ goto nla_put_failure;
NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
+ if (err == -EMSGSIZE)
+ goto nla_put_failure;
if (err) {
-nla_put_failure:
nla_nest_cancel(skb, vf_port);
continue;
}
@@ -739,6 +739,10 @@ nla_put_failure:
nla_nest_end(skb, vf_ports);
return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, vf_ports);
+ return -EMSGSIZE;
}
static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
@@ -753,7 +757,7 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
if (err) {
nla_nest_cancel(skb, port_self);
- return err;
+ return (err == -EMSGSIZE) ? err : 0;
}
nla_nest_end(skb, port_self);
@@ -1199,8 +1203,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
struct nlattr *attr;
int rem;
nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
- if (nla_type(attr) != IFLA_VF_INFO)
+ if (nla_type(attr) != IFLA_VF_INFO) {
+ err = -EINVAL;
goto errout;
+ }
err = do_setvfinfo(dev, attr);
if (err < 0)
goto errout;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4c11000a96aa..34432b4e96bb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -482,22 +482,22 @@ EXPORT_SYMBOL(consume_skb);
* reference count dropping and cleans up the skbuff as if it
* just came from __alloc_skb().
*/
-int skb_recycle_check(struct sk_buff *skb, int skb_size)
+bool skb_recycle_check(struct sk_buff *skb, int skb_size)
{
struct skb_shared_info *shinfo;
if (irqs_disabled())
- return 0;
+ return false;
if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
- return 0;
+ return false;
skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
if (skb_end_pointer(skb) - skb->head < skb_size)
- return 0;
+ return false;
if (skb_shared(skb) || skb_cloned(skb))
- return 0;
+ return false;
skb_release_head_state(skb);
@@ -509,7 +509,7 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
skb->data = skb->head + NET_SKB_PAD;
skb_reset_tail_pointer(skb);
- return 1;
+ return true;
}
EXPORT_SYMBOL(skb_recycle_check);
@@ -532,6 +532,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->ip_summed = old->ip_summed;
skb_copy_queue_mapping(new, old);
new->priority = old->priority;
+ new->deliver_no_wcard = old->deliver_no_wcard;
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
new->ipvs_property = old->ipvs_property;
#endif
@@ -569,7 +570,6 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
C(len);
C(data_len);
C(mac_len);
- C(rxhash);
n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
n->cloned = 1;
n->nohdr = 0;
@@ -1406,12 +1406,13 @@ new_page:
/*
* Fill page/offset/length into spd, if it can hold more pages.
*/
-static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
+static inline int spd_fill_page(struct splice_pipe_desc *spd,
+ struct pipe_inode_info *pipe, struct page *page,
unsigned int *len, unsigned int offset,
struct sk_buff *skb, int linear,
struct sock *sk)
{
- if (unlikely(spd->nr_pages == PIPE_BUFFERS))
+ if (unlikely(spd->nr_pages == pipe->buffers))
return 1;
if (linear) {
@@ -1447,7 +1448,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
unsigned int plen, unsigned int *off,
unsigned int *len, struct sk_buff *skb,
struct splice_pipe_desc *spd, int linear,
- struct sock *sk)
+ struct sock *sk,
+ struct pipe_inode_info *pipe)
{
if (!*len)
return 1;
@@ -1470,7 +1472,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
/* the linear region may spread across several pages */
flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
- if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk))
+ if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
return 1;
__segment_seek(&page, &poff, &plen, flen);
@@ -1485,9 +1487,9 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
* Map linear and fragment data from the skb to spd. It reports failure if the
* pipe is full or if we already spliced the requested length.
*/
-static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
- unsigned int *len, struct splice_pipe_desc *spd,
- struct sock *sk)
+static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
+ unsigned int *offset, unsigned int *len,
+ struct splice_pipe_desc *spd, struct sock *sk)
{
int seg;
@@ -1497,7 +1499,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
if (__splice_segment(virt_to_page(skb->data),
(unsigned long) skb->data & (PAGE_SIZE - 1),
skb_headlen(skb),
- offset, len, skb, spd, 1, sk))
+ offset, len, skb, spd, 1, sk, pipe))
return 1;
/*
@@ -1507,7 +1509,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
if (__splice_segment(f->page, f->page_offset, f->size,
- offset, len, skb, spd, 0, sk))
+ offset, len, skb, spd, 0, sk, pipe))
return 1;
}
@@ -1524,8 +1526,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int tlen,
unsigned int flags)
{
- struct partial_page partial[PIPE_BUFFERS];
- struct page *pages[PIPE_BUFFERS];
+ struct partial_page partial[PIPE_DEF_BUFFERS];
+ struct page *pages[PIPE_DEF_BUFFERS];
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
@@ -1535,12 +1537,16 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
};
struct sk_buff *frag_iter;
struct sock *sk = skb->sk;
+ int ret = 0;
+
+ if (splice_grow_spd(pipe, &spd))
+ return -ENOMEM;
/*
* __skb_splice_bits() only fails if the output has no room left,
* so no point in going over the frag_list for the error case.
*/
- if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk))
+ if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
goto done;
else if (!tlen)
goto done;
@@ -1551,14 +1557,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
skb_walk_frags(skb, frag_iter) {
if (!tlen)
break;
- if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk))
+ if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
break;
}
done:
if (spd.nr_pages) {
- int ret;
-
/*
* Drop the socket lock, otherwise we have reverse
* locking dependencies between sk_lock and i_mutex
@@ -1571,10 +1575,10 @@ done:
release_sock(sk);
ret = splice_to_pipe(pipe, &spd);
lock_sock(sk);
- return ret;
}
- return 0;
+ splice_shrink_spd(pipe, &spd);
+ return ret;
}
/**
@@ -2961,6 +2965,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
}
EXPORT_SYMBOL_GPL(skb_cow_data);
+static void sock_rmem_free(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+}
+
+/*
+ * Note: We dont mem charge error packets (no sk_forward_alloc changes)
+ */
+int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
+{
+ if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+ (unsigned)sk->sk_rcvbuf)
+ return -ENOMEM;
+
+ skb_orphan(skb);
+ skb->sk = sk;
+ skb->destructor = sock_rmem_free;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+
+ skb_queue_tail(&sk->sk_error_queue, skb);
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_data_ready(sk, skb->len);
+ return 0;
+}
+EXPORT_SYMBOL(sock_queue_err_skb);
+
void skb_tstamp_tx(struct sk_buff *orig_skb,
struct skb_shared_hwtstamps *hwtstamps)
{
@@ -2992,7 +3024,9 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
memset(serr, 0, sizeof(*serr));
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+
err = sock_queue_err_skb(sk, skb);
+
if (err)
kfree_skb(skb);
}
diff --git a/net/core/sock.c b/net/core/sock.c
index bf88a167c8f2..2cf7f9f7e775 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -123,6 +123,7 @@
#include <linux/net_tstamp.h>
#include <net/xfrm.h>
#include <linux/ipsec.h>
+#include <net/cls_cgroup.h>
#include <linux/filter.h>
@@ -217,6 +218,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
EXPORT_SYMBOL(sysctl_optmem_max);
+#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
+int net_cls_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_cls_subsys_id);
+#endif
+
static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
{
struct timeval tv;
@@ -1050,6 +1056,17 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
module_put(owner);
}
+#ifdef CONFIG_CGROUPS
+void sock_update_classid(struct sock *sk)
+{
+ u32 classid = task_cls_classid(current);
+
+ if (classid && classid != sk->sk_classid)
+ sk->sk_classid = classid;
+}
+EXPORT_SYMBOL(sock_update_classid);
+#endif
+
/**
* sk_alloc - All socket objects are allocated here
* @net: the applicable net namespace
@@ -1073,6 +1090,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sock_lock_init(sk);
sock_net_set(sk, get_net(net));
atomic_set(&sk->sk_wmem_alloc, 1);
+
+ sock_update_classid(sk);
}
return sk;
@@ -1988,6 +2007,39 @@ void release_sock(struct sock *sk)
}
EXPORT_SYMBOL(release_sock);
+/**
+ * lock_sock_fast - fast version of lock_sock
+ * @sk: socket
+ *
+ * This version should be used for very small section, where process wont block
+ * return false if fast path is taken
+ * sk_lock.slock locked, owned = 0, BH disabled
+ * return true if slow path is taken
+ * sk_lock.slock unlocked, owned = 1, BH enabled
+ */
+bool lock_sock_fast(struct sock *sk)
+{
+ might_sleep();
+ spin_lock_bh(&sk->sk_lock.slock);
+
+ if (!sk->sk_lock.owned)
+ /*
+ * Note : We must disable BH
+ */
+ return false;
+
+ __lock_sock(sk);
+ sk->sk_lock.owned = 1;
+ spin_unlock(&sk->sk_lock.slock);
+ /*
+ * The sk_lock has mutex_lock() semantics here:
+ */
+ mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+ local_bh_enable();
+ return true;
+}
+EXPORT_SYMBOL(lock_sock_fast);
+
int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
{
struct timeval tv;