aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c38
-rw-r--r--net/core/drop_monitor.c102
-rw-r--r--net/core/filter.c4
-rw-r--r--net/core/neighbour.c14
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/netpoll.c11
-rw-r--r--net/core/netprio_cgroup.c78
-rw-r--r--net/core/skbuff.c5
8 files changed, 122 insertions, 134 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index cd0981977f5c..1cb0d8a6aa6c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1136,8 +1136,8 @@ void dev_load(struct net *net, const char *name)
no_module = request_module("netdev-%s", name);
if (no_module && capable(CAP_SYS_MODULE)) {
if (!request_module("%s", name))
- pr_err("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
- name);
+ pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
+ name);
}
}
EXPORT_SYMBOL(dev_load);
@@ -2089,25 +2089,6 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
return 0;
}
-/*
- * Try to orphan skb early, right before transmission by the device.
- * We cannot orphan skb if tx timestamp is requested or the sk-reference
- * is needed on driver level for other reasons, e.g. see net/can/raw.c
- */
-static inline void skb_orphan_try(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
-
- if (sk && !skb_shinfo(skb)->tx_flags) {
- /* skb_tx_hash() wont be able to get sk.
- * We copy sk_hash into skb->rxhash
- */
- if (!skb->rxhash)
- skb->rxhash = sk->sk_hash;
- skb_orphan(skb);
- }
-}
-
static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
{
return ((features & NETIF_F_GEN_CSUM) ||
@@ -2193,8 +2174,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
- skb_orphan_try(skb);
-
features = netif_skb_features(skb);
if (vlan_tx_tag_present(skb) &&
@@ -2304,7 +2283,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
if (skb->sk && skb->sk->sk_hash)
hash = skb->sk->sk_hash;
else
- hash = (__force u16) skb->protocol ^ skb->rxhash;
+ hash = (__force u16) skb->protocol;
hash = jhash_1word(hash, hashrnd);
return (u16) (((u64) hash * qcount) >> 32) + qoffset;
@@ -2465,8 +2444,12 @@ static void skb_update_prio(struct sk_buff *skb)
{
struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
- if ((!skb->priority) && (skb->sk) && map)
- skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx];
+ if (!skb->priority && skb->sk && map) {
+ unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
+
+ if (prioidx < map->priomap_len)
+ skb->priority = map->priomap[prioidx];
+ }
}
#else
#define skb_update_prio(skb)
@@ -6300,7 +6283,8 @@ static struct hlist_head *netdev_create_hash(void)
/* Initialize per network namespace state */
static int __net_init netdev_init(struct net *net)
{
- INIT_LIST_HEAD(&net->dev_base_head);
+ if (net != &init_net)
+ INIT_LIST_HEAD(&net->dev_base_head);
net->dev_name_head = netdev_create_hash();
if (net->dev_name_head == NULL)
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index ea5fb9fcc3f5..d23b6682f4e9 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -36,9 +36,6 @@
#define TRACE_ON 1
#define TRACE_OFF 0
-static void send_dm_alert(struct work_struct *unused);
-
-
/*
* Globals, our netlink socket pointer
* and the work handle that will send up
@@ -48,11 +45,10 @@ static int trace_state = TRACE_OFF;
static DEFINE_MUTEX(trace_state_mutex);
struct per_cpu_dm_data {
- struct work_struct dm_alert_work;
- struct sk_buff __rcu *skb;
- atomic_t dm_hit_count;
- struct timer_list send_timer;
- int cpu;
+ spinlock_t lock;
+ struct sk_buff *skb;
+ struct work_struct dm_alert_work;
+ struct timer_list send_timer;
};
struct dm_hw_stat_delta {
@@ -78,13 +74,13 @@ static int dm_delay = 1;
static unsigned long dm_hw_check_delta = 2*HZ;
static LIST_HEAD(hw_stats_list);
-static void reset_per_cpu_data(struct per_cpu_dm_data *data)
+static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
{
size_t al;
struct net_dm_alert_msg *msg;
struct nlattr *nla;
struct sk_buff *skb;
- struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1);
+ unsigned long flags;
al = sizeof(struct net_dm_alert_msg);
al += dm_hit_limit * sizeof(struct net_dm_drop_point);
@@ -99,65 +95,40 @@ static void reset_per_cpu_data(struct per_cpu_dm_data *data)
sizeof(struct net_dm_alert_msg));
msg = nla_data(nla);
memset(msg, 0, al);
- } else
- schedule_work_on(data->cpu, &data->dm_alert_work);
-
- /*
- * Don't need to lock this, since we are guaranteed to only
- * run this on a single cpu at a time.
- * Note also that we only update data->skb if the old and new skb
- * pointers don't match. This ensures that we don't continually call
- * synchornize_rcu if we repeatedly fail to alloc a new netlink message.
- */
- if (skb != oskb) {
- rcu_assign_pointer(data->skb, skb);
-
- synchronize_rcu();
-
- atomic_set(&data->dm_hit_count, dm_hit_limit);
+ } else {
+ mod_timer(&data->send_timer, jiffies + HZ / 10);
}
+ spin_lock_irqsave(&data->lock, flags);
+ swap(data->skb, skb);
+ spin_unlock_irqrestore(&data->lock, flags);
+
+ return skb;
}
-static void send_dm_alert(struct work_struct *unused)
+static void send_dm_alert(struct work_struct *work)
{
struct sk_buff *skb;
- struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
+ struct per_cpu_dm_data *data;
- WARN_ON_ONCE(data->cpu != smp_processor_id());
+ data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
- /*
- * Grab the skb we're about to send
- */
- skb = rcu_dereference_protected(data->skb, 1);
-
- /*
- * Replace it with a new one
- */
- reset_per_cpu_data(data);
+ skb = reset_per_cpu_data(data);
- /*
- * Ship it!
- */
if (skb)
genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
-
- put_cpu_var(dm_cpu_data);
}
/*
* This is the timer function to delay the sending of an alert
* in the event that more drops will arrive during the
- * hysteresis period. Note that it operates under the timer interrupt
- * so we don't need to disable preemption here
+ * hysteresis period.
*/
-static void sched_send_work(unsigned long unused)
+static void sched_send_work(unsigned long _data)
{
- struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
-
- schedule_work_on(smp_processor_id(), &data->dm_alert_work);
+ struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data;
- put_cpu_var(dm_cpu_data);
+ schedule_work(&data->dm_alert_work);
}
static void trace_drop_common(struct sk_buff *skb, void *location)
@@ -167,33 +138,28 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
struct nlattr *nla;
int i;
struct sk_buff *dskb;
- struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
-
+ struct per_cpu_dm_data *data;
+ unsigned long flags;
- rcu_read_lock();
- dskb = rcu_dereference(data->skb);
+ local_irq_save(flags);
+ data = &__get_cpu_var(dm_cpu_data);
+ spin_lock(&data->lock);
+ dskb = data->skb;
if (!dskb)
goto out;
- if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
- /*
- * we're already at zero, discard this hit
- */
- goto out;
- }
-
nlh = (struct nlmsghdr *)dskb->data;
nla = genlmsg_data(nlmsg_data(nlh));
msg = nla_data(nla);
for (i = 0; i < msg->entries; i++) {
if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
msg->points[i].count++;
- atomic_inc(&data->dm_hit_count);
goto out;
}
}
-
+ if (msg->entries == dm_hit_limit)
+ goto out;
/*
* We need to create a new entry
*/
@@ -205,13 +171,11 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
if (!timer_pending(&data->send_timer)) {
data->send_timer.expires = jiffies + dm_delay * HZ;
- add_timer_on(&data->send_timer, smp_processor_id());
+ add_timer(&data->send_timer);
}
out:
- rcu_read_unlock();
- put_cpu_var(dm_cpu_data);
- return;
+ spin_unlock_irqrestore(&data->lock, flags);
}
static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
@@ -418,11 +382,11 @@ static int __init init_net_drop_monitor(void)
for_each_possible_cpu(cpu) {
data = &per_cpu(dm_cpu_data, cpu);
- data->cpu = cpu;
INIT_WORK(&data->dm_alert_work, send_dm_alert);
init_timer(&data->send_timer);
- data->send_timer.data = cpu;
+ data->send_timer.data = (unsigned long)data;
data->send_timer.function = sched_send_work;
+ spin_lock_init(&data->lock);
reset_per_cpu_data(data);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index a3eddb515d1b..d4ce2dc712e3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -616,9 +616,9 @@ static int __sk_prepare_filter(struct sk_filter *fp)
/**
* sk_unattached_filter_create - create an unattached filter
* @fprog: the filter program
- * @sk: the socket to use
+ * @pfp: the unattached filter that is created
*
- * Create a filter independent ofr any socket. We first run some
+ * Create a filter independent of any socket. We first run some
* sanity checks on it to make sure it does not explode on us later.
* If an error occurs or there is insufficient memory for the filter
* a negative errno code is returned. On success the return is zero.
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index eb09f8bbbf07..d81d026138f0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2219,9 +2219,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
- for (h = 0; h < (1 << nht->hash_shift); h++) {
- if (h < s_h)
- continue;
+ for (h = s_h; h < (1 << nht->hash_shift); h++) {
if (h > s_h)
s_idx = 0;
for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
@@ -2260,9 +2258,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
read_lock_bh(&tbl->lock);
- for (h = 0; h <= PNEIGH_HASHMASK; h++) {
- if (h < s_h)
- continue;
+ for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
if (h > s_h)
s_idx = 0;
for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
@@ -2297,7 +2293,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
struct neigh_table *tbl;
int t, family, s_t;
int proxy = 0;
- int err = 0;
+ int err;
read_lock(&neigh_tbl_lock);
family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
@@ -2311,7 +2307,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
- for (tbl = neigh_tables, t = 0; tbl && (err >= 0);
+ for (tbl = neigh_tables, t = 0; tbl;
tbl = tbl->next, t++) {
if (t < s_t || (family && tbl->family != family))
continue;
@@ -2322,6 +2318,8 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
err = pneigh_dump_table(tbl, skb, cb);
else
err = neigh_dump_table(tbl, skb, cb);
+ if (err < 0)
+ break;
}
read_unlock(&neigh_tbl_lock);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index dddbacb8f28c..42f1e1c7514f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,7 +27,9 @@ static DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
-struct net init_net;
+struct net init_net = {
+ .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
+};
EXPORT_SYMBOL(init_net);
#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 3d84fb9d8873..f9f40b932e4b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -362,22 +362,23 @@ EXPORT_SYMBOL(netpoll_send_skb_on_dev);
void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
- int total_len, eth_len, ip_len, udp_len;
+ int total_len, ip_len, udp_len;
struct sk_buff *skb;
struct udphdr *udph;
struct iphdr *iph;
struct ethhdr *eth;
udp_len = len + sizeof(*udph);
- ip_len = eth_len = udp_len + sizeof(*iph);
- total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
+ ip_len = udp_len + sizeof(*iph);
+ total_len = ip_len + LL_RESERVED_SPACE(np->dev);
- skb = find_skb(np, total_len, total_len - len);
+ skb = find_skb(np, total_len + np->dev->needed_tailroom,
+ total_len - len);
if (!skb)
return;
skb_copy_to_linear_data(skb, msg, len);
- skb->len += len;
+ skb_put(skb, len);
skb_push(skb, sizeof(*udph));
skb_reset_transport_header(skb);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 5b8aa2fae48b..b2e9caa1ad1a 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -49,8 +49,9 @@ static int get_prioidx(u32 *prio)
return -ENOSPC;
}
set_bit(prioidx, prioidx_map);
+ if (atomic_read(&max_prioidx) < prioidx)
+ atomic_set(&max_prioidx, prioidx);
spin_unlock_irqrestore(&prioidx_map_lock, flags);
- atomic_set(&max_prioidx, prioidx);
*prio = prioidx;
return 0;
}
@@ -64,7 +65,7 @@ static void put_prioidx(u32 idx)
spin_unlock_irqrestore(&prioidx_map_lock, flags);
}
-static void extend_netdev_table(struct net_device *dev, u32 new_len)
+static int extend_netdev_table(struct net_device *dev, u32 new_len)
{
size_t new_size = sizeof(struct netprio_map) +
((sizeof(u32) * new_len));
@@ -76,7 +77,7 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len)
if (!new_priomap) {
pr_warn("Unable to alloc new priomap!\n");
- return;
+ return -ENOMEM;
}
for (i = 0;
@@ -89,46 +90,79 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len)
rcu_assign_pointer(dev->priomap, new_priomap);
if (old_priomap)
kfree_rcu(old_priomap, rcu);
+ return 0;
}
-static void update_netdev_tables(void)
+static int write_update_netdev_table(struct net_device *dev)
{
+ int ret = 0;
+ u32 max_len;
+ struct netprio_map *map;
+
+ rtnl_lock();
+ max_len = atomic_read(&max_prioidx) + 1;
+ map = rtnl_dereference(dev->priomap);
+ if (!map || map->priomap_len < max_len)
+ ret = extend_netdev_table(dev, max_len);
+ rtnl_unlock();
+
+ return ret;
+}
+
+static int update_netdev_tables(void)
+{
+ int ret = 0;
struct net_device *dev;
- u32 max_len = atomic_read(&max_prioidx) + 1;
+ u32 max_len;
struct netprio_map *map;
rtnl_lock();
+ max_len = atomic_read(&max_prioidx) + 1;
for_each_netdev(&init_net, dev) {
map = rtnl_dereference(dev->priomap);
- if ((!map) ||
- (map->priomap_len < max_len))
- extend_netdev_table(dev, max_len);
+ /*
+ * don't allocate priomap if we didn't
+ * change net_prio.ifpriomap (map == NULL),
+ * this will speed up skb_update_prio.
+ */
+ if (map && map->priomap_len < max_len) {
+ ret = extend_netdev_table(dev, max_len);
+ if (ret < 0)
+ break;
+ }
}
rtnl_unlock();
+ return ret;
}
static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
{
struct cgroup_netprio_state *cs;
- int ret;
+ int ret = -EINVAL;
cs = kzalloc(sizeof(*cs), GFP_KERNEL);
if (!cs)
return ERR_PTR(-ENOMEM);
- if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
- kfree(cs);
- return ERR_PTR(-EINVAL);
- }
+ if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx)
+ goto out;
ret = get_prioidx(&cs->prioidx);
- if (ret != 0) {
+ if (ret < 0) {
pr_warn("No space in priority index array\n");
- kfree(cs);
- return ERR_PTR(ret);
+ goto out;
+ }
+
+ ret = update_netdev_tables();
+ if (ret < 0) {
+ put_prioidx(cs->prioidx);
+ goto out;
}
return &cs->css;
+out:
+ kfree(cs);
+ return ERR_PTR(ret);
}
static void cgrp_destroy(struct cgroup *cgrp)
@@ -141,7 +175,7 @@ static void cgrp_destroy(struct cgroup *cgrp)
rtnl_lock();
for_each_netdev(&init_net, dev) {
map = rtnl_dereference(dev->priomap);
- if (map)
+ if (map && cs->prioidx < map->priomap_len)
map->priomap[cs->prioidx] = 0;
}
rtnl_unlock();
@@ -165,7 +199,7 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft,
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev) {
map = rcu_dereference(dev->priomap);
- priority = map ? map->priomap[prioidx] : 0;
+ priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0;
cb->fill(cb, dev->name, priority);
}
rcu_read_unlock();
@@ -220,13 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
if (!dev)
goto out_free_devname;
- update_netdev_tables();
- ret = 0;
+ ret = write_update_netdev_table(dev);
+ if (ret < 0)
+ goto out_put_dev;
+
rcu_read_lock();
map = rcu_dereference(dev->priomap);
if (map)
map->priomap[prioidx] = priority;
rcu_read_unlock();
+
+out_put_dev:
dev_put(dev);
out_free_devname:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 016694d62484..d124306b81fd 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -353,7 +353,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- if (fragsz <= PAGE_SIZE && !(gfp_mask & __GFP_WAIT)) {
+ if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
void *data = netdev_alloc_frag(fragsz);
if (likely(data)) {
@@ -1755,6 +1755,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
+ .nr_pages_max = MAX_SKB_FRAGS,
.flags = flags,
.ops = &sock_pipe_buf_ops,
.spd_release = sock_spd_release,
@@ -3361,7 +3362,7 @@ EXPORT_SYMBOL(kfree_skb_partial);
* @to: prior buffer
* @from: buffer to add
* @fragstolen: pointer to boolean
- *
+ * @delta_truesize: how much more was allocated than was requested
*/
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
bool *fragstolen, int *delta_truesize)