aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c152
-rw-r--r--net/core/dst.c1
-rw-r--r--net/core/filter.c373
-rw-r--r--net/core/net-sysfs.c435
-rw-r--r--net/core/net-sysfs.h4
-rw-r--r--net/core/netpoll.c3
-rw-r--r--net/core/pktgen.c45
-rw-r--r--net/core/request_sock.c1
-rw-r--r--net/core/rtnetlink.c175
-rw-r--r--net/core/scm.c10
-rw-r--r--net/core/skbuff.c34
-rw-r--r--net/core/sock.c14
-rw-r--r--net/core/timestamping.c2
13 files changed, 951 insertions, 298 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 35dfb8318483..cd2437495428 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1557,12 +1557,19 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
*/
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
+ int rc;
+
if (txq < 1 || txq > dev->num_tx_queues)
return -EINVAL;
if (dev->reg_state == NETREG_REGISTERED) {
ASSERT_RTNL();
+ rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
+ txq);
+ if (rc)
+ return rc;
+
if (txq < dev->real_num_tx_queues)
qdisc_reset_all_tx_gt(dev, txq);
}
@@ -1794,16 +1801,18 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
struct packet_type *ptype;
__be16 type = skb->protocol;
+ int vlan_depth = ETH_HLEN;
int err;
- if (type == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *veh;
+ while (type == htons(ETH_P_8021Q)) {
+ struct vlan_hdr *vh;
- if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+ if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
return ERR_PTR(-EINVAL);
- veh = (struct vlan_ethhdr *)skb->data;
- type = veh->h_vlan_encapsulated_proto;
+ vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+ type = vh->h_vlan_encapsulated_proto;
+ vlan_depth += VLAN_HLEN;
}
skb_reset_mac_header(skb);
@@ -1817,8 +1826,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
dev->ethtool_ops->get_drvinfo(dev, &info);
- WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
- "ip_summed=%d",
+ WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
info.driver, dev ? dev->features : 0L,
skb->sk ? skb->sk->sk_route_caps : 0L,
skb->len, skb->data_len, skb->ip_summed);
@@ -1967,6 +1975,23 @@ static inline void skb_orphan_try(struct sk_buff *skb)
}
}
+int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
+{
+ __be16 protocol = skb->protocol;
+
+ if (protocol == htons(ETH_P_8021Q)) {
+ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+ protocol = veh->h_vlan_encapsulated_proto;
+ } else if (!skb->vlan_tci)
+ return dev->features;
+
+ if (protocol != htons(ETH_P_8021Q))
+ return dev->features & dev->vlan_features;
+ else
+ return 0;
+}
+EXPORT_SYMBOL(netif_get_vlan_features);
+
/*
* Returns true if either:
* 1. skb has frag_list and the device doesn't support FRAGLIST, or
@@ -1977,15 +2002,20 @@ static inline void skb_orphan_try(struct sk_buff *skb)
static inline int skb_needs_linearize(struct sk_buff *skb,
struct net_device *dev)
{
- int features = dev->features;
+ if (skb_is_nonlinear(skb)) {
+ int features = dev->features;
- if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
- features &= dev->vlan_features;
+ if (vlan_tx_tag_present(skb))
+ features &= dev->vlan_features;
+
+ return (skb_has_frag_list(skb) &&
+ !(features & NETIF_F_FRAGLIST)) ||
+ (skb_shinfo(skb)->nr_frags &&
+ (!(features & NETIF_F_SG) ||
+ illegal_highdma(dev, skb)));
+ }
- return skb_is_nonlinear(skb) &&
- ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
- (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
- illegal_highdma(dev, skb))));
+ return 0;
}
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -2119,26 +2149,70 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
return queue_index;
}
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+ struct xps_dev_maps *dev_maps;
+ struct xps_map *map;
+ int queue_index = -1;
+
+ rcu_read_lock();
+ dev_maps = rcu_dereference(dev->xps_maps);
+ if (dev_maps) {
+ map = rcu_dereference(
+ dev_maps->cpu_map[raw_smp_processor_id()]);
+ if (map) {
+ if (map->len == 1)
+ queue_index = map->queues[0];
+ else {
+ u32 hash;
+ if (skb->sk && skb->sk->sk_hash)
+ hash = skb->sk->sk_hash;
+ else
+ hash = (__force u16) skb->protocol ^
+ skb->rxhash;
+ hash = jhash_1word(hash, hashrnd);
+ queue_index = map->queues[
+ ((u64)hash * map->len) >> 32];
+ }
+ if (unlikely(queue_index >= dev->real_num_tx_queues))
+ queue_index = -1;
+ }
+ }
+ rcu_read_unlock();
+
+ return queue_index;
+#else
+ return -1;
+#endif
+}
+
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
int queue_index;
const struct net_device_ops *ops = dev->netdev_ops;
- if (ops->ndo_select_queue) {
+ if (dev->real_num_tx_queues == 1)
+ queue_index = 0;
+ else if (ops->ndo_select_queue) {
queue_index = ops->ndo_select_queue(dev, skb);
queue_index = dev_cap_txqueue(dev, queue_index);
} else {
struct sock *sk = skb->sk;
queue_index = sk_tx_queue_get(sk);
- if (queue_index < 0) {
- queue_index = 0;
- if (dev->real_num_tx_queues > 1)
+ if (queue_index < 0 || skb->ooo_okay ||
+ queue_index >= dev->real_num_tx_queues) {
+ int old_index = queue_index;
+
+ queue_index = get_xps_queue(dev, skb);
+ if (queue_index < 0)
queue_index = skb_tx_hash(dev, skb);
- if (sk) {
- struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
+ if (queue_index != old_index && sk) {
+ struct dst_entry *dst =
+ rcu_dereference_check(sk->sk_dst_cache, 1);
if (dst && skb_dst(skb) == dst)
sk_tx_queue_set(sk, queue_index);
@@ -5014,9 +5088,9 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
}
EXPORT_SYMBOL(netif_stacked_transfer_operstate);
+#ifdef CONFIG_RPS
static int netif_alloc_rx_queues(struct net_device *dev)
{
-#ifdef CONFIG_RPS
unsigned int i, count = dev->num_rx_queues;
struct netdev_rx_queue *rx;
@@ -5029,20 +5103,17 @@ static int netif_alloc_rx_queues(struct net_device *dev)
}
dev->_rx = rx;
- /*
- * Set a pointer to first element in the array which holds the
- * reference count.
- */
for (i = 0; i < count; i++)
- rx[i].first = rx;
-#endif
+ rx[i].dev = dev;
return 0;
}
+#endif
static int netif_alloc_netdev_queues(struct net_device *dev)
{
unsigned int count = dev->num_tx_queues;
struct netdev_queue *tx;
+ int i;
BUG_ON(count < 1);
@@ -5053,6 +5124,11 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
return -ENOMEM;
}
dev->_tx = tx;
+
+ for (i = 0; i < count; i++) {
+ netdev_queue_numa_node_write(&tx[i], -1);
+ tx[i].dev = dev;
+ }
return 0;
}
@@ -5060,8 +5136,6 @@ static void netdev_init_one_queue(struct net_device *dev,
struct netdev_queue *queue,
void *_unused)
{
- queue->dev = dev;
-
/* Initialize queue lock */
spin_lock_init(&queue->_xmit_lock);
netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
@@ -5110,14 +5184,6 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
- ret = netif_alloc_rx_queues(dev);
- if (ret)
- goto out;
-
- ret = netif_alloc_netdev_queues(dev);
- if (ret)
- goto out;
-
netdev_init_queues(dev);
/* Init, if this function is available */
@@ -5577,10 +5643,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
+ if (netif_alloc_netdev_queues(dev))
+ goto free_pcpu;
#ifdef CONFIG_RPS
dev->num_rx_queues = queue_count;
dev->real_num_rx_queues = queue_count;
+ if (netif_alloc_rx_queues(dev))
+ goto free_pcpu;
#endif
dev->gso_max_size = GSO_MAX_SIZE;
@@ -5597,6 +5667,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
free_pcpu:
free_percpu(dev->pcpu_refcnt);
+ kfree(dev->_tx);
+#ifdef CONFIG_RPS
+ kfree(dev->_rx);
+#endif
+
free_p:
kfree(p);
return NULL;
@@ -5618,6 +5693,9 @@ void free_netdev(struct net_device *dev)
release_net(dev_net(dev));
kfree(dev->_tx);
+#ifdef CONFIG_RPS
+ kfree(dev->_rx);
+#endif
kfree(rcu_dereference_raw(dev->ingress_queue));
diff --git a/net/core/dst.c b/net/core/dst.c
index 8abe628b79f1..b99c7c7ffce2 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -370,6 +370,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
static struct notifier_block dst_dev_notifier = {
.notifier_call = dst_dev_event,
+ .priority = -10, /* must be called after other network notifiers */
};
void __init dst_init(void)
diff --git a/net/core/filter.c b/net/core/filter.c
index 7beaec36b541..a44d27f9f0f0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -37,6 +37,55 @@
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <linux/filter.h>
+#include <linux/reciprocal_div.h>
+
+enum {
+ BPF_S_RET_K = 1,
+ BPF_S_RET_A,
+ BPF_S_ALU_ADD_K,
+ BPF_S_ALU_ADD_X,
+ BPF_S_ALU_SUB_K,
+ BPF_S_ALU_SUB_X,
+ BPF_S_ALU_MUL_K,
+ BPF_S_ALU_MUL_X,
+ BPF_S_ALU_DIV_X,
+ BPF_S_ALU_AND_K,
+ BPF_S_ALU_AND_X,
+ BPF_S_ALU_OR_K,
+ BPF_S_ALU_OR_X,
+ BPF_S_ALU_LSH_K,
+ BPF_S_ALU_LSH_X,
+ BPF_S_ALU_RSH_K,
+ BPF_S_ALU_RSH_X,
+ BPF_S_ALU_NEG,
+ BPF_S_LD_W_ABS,
+ BPF_S_LD_H_ABS,
+ BPF_S_LD_B_ABS,
+ BPF_S_LD_W_LEN,
+ BPF_S_LD_W_IND,
+ BPF_S_LD_H_IND,
+ BPF_S_LD_B_IND,
+ BPF_S_LD_IMM,
+ BPF_S_LDX_W_LEN,
+ BPF_S_LDX_B_MSH,
+ BPF_S_LDX_IMM,
+ BPF_S_MISC_TAX,
+ BPF_S_MISC_TXA,
+ BPF_S_ALU_DIV_K,
+ BPF_S_LD_MEM,
+ BPF_S_LDX_MEM,
+ BPF_S_ST,
+ BPF_S_STX,
+ BPF_S_JMP_JA,
+ BPF_S_JMP_JEQ_K,
+ BPF_S_JMP_JEQ_X,
+ BPF_S_JMP_JGE_K,
+ BPF_S_JMP_JGE_X,
+ BPF_S_JMP_JGT_K,
+ BPF_S_JMP_JGT_X,
+ BPF_S_JMP_JSET_K,
+ BPF_S_JMP_JSET_X,
+};
/* No hurry in this branch */
static void *__load_pointer(struct sk_buff *skb, int k)
@@ -89,7 +138,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
+ unsigned int pkt_len = sk_run_filter(skb, filter->insns);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
@@ -103,48 +152,53 @@ EXPORT_SYMBOL(sk_filter);
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
* @filter: filter to apply
- * @flen: length of filter
*
* Decode and apply filter instructions to the skb->data.
- * Return length to keep, 0 for none. skb is the data we are
- * filtering, filter is the array of filter instructions, and
- * len is the number of filter blocks in the array.
+ * Return length to keep, 0 for none. @skb is the data we are
+ * filtering, @filter is the array of filter instructions.
+ * Because all jumps are guaranteed to be before last instruction,
+ * and last instruction guaranteed to be a RET, we dont need to check
+ * flen. (We used to pass to this function the length of filter)
*/
-unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
{
- struct sock_filter *fentry; /* We walk down these */
void *ptr;
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
+ unsigned long memvalid = 0;
u32 tmp;
int k;
- int pc;
+ BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
/*
* Process array of filter instructions.
*/
- for (pc = 0; pc < flen; pc++) {
- fentry = &filter[pc];
+ for (;; fentry++) {
+#if defined(CONFIG_X86_32)
+#define K (fentry->k)
+#else
+ const u32 K = fentry->k;
+#endif
switch (fentry->code) {
case BPF_S_ALU_ADD_X:
A += X;
continue;
case BPF_S_ALU_ADD_K:
- A += fentry->k;
+ A += K;
continue;
case BPF_S_ALU_SUB_X:
A -= X;
continue;
case BPF_S_ALU_SUB_K:
- A -= fentry->k;
+ A -= K;
continue;
case BPF_S_ALU_MUL_X:
A *= X;
continue;
case BPF_S_ALU_MUL_K:
- A *= fentry->k;
+ A *= K;
continue;
case BPF_S_ALU_DIV_X:
if (X == 0)
@@ -152,64 +206,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
A /= X;
continue;
case BPF_S_ALU_DIV_K:
- A /= fentry->k;
+ A = reciprocal_divide(A, K);
continue;
case BPF_S_ALU_AND_X:
A &= X;
continue;
case BPF_S_ALU_AND_K:
- A &= fentry->k;
+ A &= K;
continue;
case BPF_S_ALU_OR_X:
A |= X;
continue;
case BPF_S_ALU_OR_K:
- A |= fentry->k;
+ A |= K;
continue;
case BPF_S_ALU_LSH_X:
A <<= X;
continue;
case BPF_S_ALU_LSH_K:
- A <<= fentry->k;
+ A <<= K;
continue;
case BPF_S_ALU_RSH_X:
A >>= X;
continue;
case BPF_S_ALU_RSH_K:
- A >>= fentry->k;
+ A >>= K;
continue;
case BPF_S_ALU_NEG:
A = -A;
continue;
case BPF_S_JMP_JA:
- pc += fentry->k;
+ fentry += K;
continue;
case BPF_S_JMP_JGT_K:
- pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+ fentry += (A > K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_K:
- pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+ fentry += (A >= K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_K:
- pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+ fentry += (A == K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_K:
- pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+ fentry += (A & K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGT_X:
- pc += (A > X) ? fentry->jt : fentry->jf;
+ fentry += (A > X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_X:
- pc += (A >= X) ? fentry->jt : fentry->jf;
+ fentry += (A >= X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_X:
- pc += (A == X) ? fentry->jt : fentry->jf;
+ fentry += (A == X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_X:
- pc += (A & X) ? fentry->jt : fentry->jf;
+ fentry += (A & X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_LD_W_ABS:
- k = fentry->k;
+ k = K;
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
if (ptr != NULL) {
@@ -218,7 +272,7 @@ load_w:
}
break;
case BPF_S_LD_H_ABS:
- k = fentry->k;
+ k = K;
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
if (ptr != NULL) {
@@ -227,7 +281,7 @@ load_h:
}
break;
case BPF_S_LD_B_ABS:
- k = fentry->k;
+ k = K;
load_b:
ptr = load_pointer(skb, k, 1, &tmp);
if (ptr != NULL) {
@@ -242,32 +296,34 @@ load_b:
X = skb->len;
continue;
case BPF_S_LD_W_IND:
- k = X + fentry->k;
+ k = X + K;
goto load_w;
case BPF_S_LD_H_IND:
- k = X + fentry->k;
+ k = X + K;
goto load_h;
case BPF_S_LD_B_IND:
- k = X + fentry->k;
+ k = X + K;
goto load_b;
case BPF_S_LDX_B_MSH:
- ptr = load_pointer(skb, fentry->k, 1, &tmp);
+ ptr = load_pointer(skb, K, 1, &tmp);
if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2;
continue;
}
return 0;
case BPF_S_LD_IMM:
- A = fentry->k;
+ A = K;
continue;
case BPF_S_LDX_IMM:
- X = fentry->k;
+ X = K;
continue;
case BPF_S_LD_MEM:
- A = mem[fentry->k];
+ A = (memvalid & (1UL << K)) ?
+ mem[K] : 0;
continue;
case BPF_S_LDX_MEM:
- X = mem[fentry->k];
+ X = (memvalid & (1UL << K)) ?
+ mem[K] : 0;
continue;
case BPF_S_MISC_TAX:
X = A;
@@ -276,14 +332,16 @@ load_b:
A = X;
continue;
case BPF_S_RET_K:
- return fentry->k;
+ return K;
case BPF_S_RET_A:
return A;
case BPF_S_ST:
- mem[fentry->k] = A;
+ memvalid |= 1UL << K;
+ mem[K] = A;
continue;
case BPF_S_STX:
- mem[fentry->k] = X;
+ memvalid |= 1UL << K;
+ mem[K] = X;
continue;
default:
WARN_ON(1);
@@ -377,7 +435,57 @@ EXPORT_SYMBOL(sk_run_filter);
*/
int sk_chk_filter(struct sock_filter *filter, int flen)
{
- struct sock_filter *ftest;
+ /*
+ * Valid instructions are initialized to non-0.
+ * Invalid instructions are initialized to 0.
+ */
+ static const u8 codes[] = {
+ [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
+ [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
+ [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
+ [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
+ [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
+ [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
+ [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
+ [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
+ [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
+ [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
+ [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
+ [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
+ [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
+ [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
+ [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
+ [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
+ [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
+ [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
+ [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
+ [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
+ [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
+ [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
+ [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
+ [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
+ [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
+ [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
+ [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
+ [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
+ [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
+ [BPF_RET|BPF_K] = BPF_S_RET_K,
+ [BPF_RET|BPF_A] = BPF_S_RET_A,
+ [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
+ [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
+ [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
+ [BPF_ST] = BPF_S_ST,
+ [BPF_STX] = BPF_S_STX,
+ [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
+ [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
+ [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
+ [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
+ [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
+ [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
+ [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
+ [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
+ [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
+ };
int pc;
if (flen == 0 || flen > BPF_MAXINSNS)
@@ -385,136 +493,31 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
/* check the filter code now */
for (pc = 0; pc < flen; pc++) {
- ftest = &filter[pc];
-
- /* Only allow valid instructions */
- switch (ftest->code) {
- case BPF_ALU|BPF_ADD|BPF_K:
- ftest->code = BPF_S_ALU_ADD_K;
- break;
- case BPF_ALU|BPF_ADD|BPF_X:
- ftest->code = BPF_S_ALU_ADD_X;
- break;
- case BPF_ALU|BPF_SUB|BPF_K:
- ftest->code = BPF_S_ALU_SUB_K;
- break;
- case BPF_ALU|BPF_SUB|BPF_X:
- ftest->code = BPF_S_ALU_SUB_X;
- break;
- case BPF_ALU|BPF_MUL|BPF_K:
- ftest->code = BPF_S_ALU_MUL_K;
- break;
- case BPF_ALU|BPF_MUL|BPF_X:
- ftest->code = BPF_S_ALU_MUL_X;
- break;
- case BPF_ALU|BPF_DIV|BPF_X:
- ftest->code = BPF_S_ALU_DIV_X;
- break;
- case BPF_ALU|BPF_AND|BPF_K:
- ftest->code = BPF_S_ALU_AND_K;
- break;
- case BPF_ALU|BPF_AND|BPF_X:
- ftest->code = BPF_S_ALU_AND_X;
- break;
- case BPF_ALU|BPF_OR|BPF_K:
- ftest->code = BPF_S_ALU_OR_K;
- break;
- case BPF_ALU|BPF_OR|BPF_X:
- ftest->code = BPF_S_ALU_OR_X;
- break;
- case BPF_ALU|BPF_LSH|BPF_K:
- ftest->code = BPF_S_ALU_LSH_K;
- break;
- case BPF_ALU|BPF_LSH|BPF_X:
- ftest->code = BPF_S_ALU_LSH_X;
- break;
- case BPF_ALU|BPF_RSH|BPF_K:
- ftest->code = BPF_S_ALU_RSH_K;
- break;
- case BPF_ALU|BPF_RSH|BPF_X:
- ftest->code = BPF_S_ALU_RSH_X;
- break;
- case BPF_ALU|BPF_NEG:
- ftest->code = BPF_S_ALU_NEG;
- break;
- case BPF_LD|BPF_W|BPF_ABS:
- ftest->code = BPF_S_LD_W_ABS;
- break;
- case BPF_LD|BPF_H|BPF_ABS:
- ftest->code = BPF_S_LD_H_ABS;
- break;
- case BPF_LD|BPF_B|BPF_ABS:
- ftest->code = BPF_S_LD_B_ABS;
- break;
- case BPF_LD|BPF_W|BPF_LEN:
- ftest->code = BPF_S_LD_W_LEN;
- break;
- case BPF_LD|BPF_W|BPF_IND:
- ftest->code = BPF_S_LD_W_IND;
- break;
- case BPF_LD|BPF_H|BPF_IND:
- ftest->code = BPF_S_LD_H_IND;
- break;
- case BPF_LD|BPF_B|BPF_IND:
- ftest->code = BPF_S_LD_B_IND;
- break;
- case BPF_LD|BPF_IMM:
- ftest->code = BPF_S_LD_IMM;
- break;
- case BPF_LDX|BPF_W|BPF_LEN:
- ftest->code = BPF_S_LDX_W_LEN;
- break;
- case BPF_LDX|BPF_B|BPF_MSH:
- ftest->code = BPF_S_LDX_B_MSH;
- break;
- case BPF_LDX|BPF_IMM:
- ftest->code = BPF_S_LDX_IMM;
- break;
- case BPF_MISC|BPF_TAX:
- ftest->code = BPF_S_MISC_TAX;
- break;
- case BPF_MISC|BPF_TXA:
- ftest->code = BPF_S_MISC_TXA;
- break;
- case BPF_RET|BPF_K:
- ftest->code = BPF_S_RET_K;
- break;
- case BPF_RET|BPF_A:
- ftest->code = BPF_S_RET_A;
- break;
+ struct sock_filter *ftest = &filter[pc];
+ u16 code = ftest->code;
+ if (code >= ARRAY_SIZE(codes))
+ return -EINVAL;
+ code = codes[code];
+ if (!code)
+ return -EINVAL;
/* Some instructions need special checks */
-
+ switch (code) {
+ case BPF_S_ALU_DIV_K:
/* check for division by zero */
- case BPF_ALU|BPF_DIV|BPF_K:
if (ftest->k == 0)
return -EINVAL;
- ftest->code = BPF_S_ALU_DIV_K;
- break;
-
- /* check for invalid memory addresses */
- case BPF_LD|BPF_MEM:
- if (ftest->k >= BPF_MEMWORDS)
- return -EINVAL;
- ftest->code = BPF_S_LD_MEM;
- break;
- case BPF_LDX|BPF_MEM:
- if (ftest->k >= BPF_MEMWORDS)
- return -EINVAL;
- ftest->code = BPF_S_LDX_MEM;
- break;
- case BPF_ST:
- if (ftest->k >= BPF_MEMWORDS)
- return -EINVAL;
- ftest->code = BPF_S_ST;
+ ftest->k = reciprocal_value(ftest->k);
break;
- case BPF_STX:
+ case BPF_S_LD_MEM:
+ case BPF_S_LDX_MEM:
+ case BPF_S_ST:
+ case BPF_S_STX:
+ /* check for invalid memory addresses */
if (ftest->k >= BPF_MEMWORDS)
return -EINVAL;
- ftest->code = BPF_S_STX;
break;
-
- case BPF_JMP|BPF_JA:
+ case BPF_S_JMP_JA:
/*
* Note, the large ftest->k might cause loops.
* Compare this with conditional jumps below,
@@ -522,40 +525,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
*/
if (ftest->k >= (unsigned)(flen-pc-1))
return -EINVAL;
- ftest->code = BPF_S_JMP_JA;
- break;
-
- case BPF_JMP|BPF_JEQ|BPF_K:
- ftest->code = BPF_S_JMP_JEQ_K;
break;
- case BPF_JMP|BPF_JEQ|BPF_X:
- ftest->code = BPF_S_JMP_JEQ_X;
- break;
- case BPF_JMP|BPF_JGE|BPF_K:
- ftest->code = BPF_S_JMP_JGE_K;
- break;
- case BPF_JMP|BPF_JGE|BPF_X:
- ftest->code = BPF_S_JMP_JGE_X;
- break;
- case BPF_JMP|BPF_JGT|BPF_K:
- ftest->code = BPF_S_JMP_JGT_K;
- break;
- case BPF_JMP|BPF_JGT|BPF_X:
- ftest->code = BPF_S_JMP_JGT_X;
- break;
- case BPF_JMP|BPF_JSET|BPF_K:
- ftest->code = BPF_S_JMP_JSET_K;
- break;
- case BPF_JMP|BPF_JSET|BPF_X:
- ftest->code = BPF_S_JMP_JSET_X;
- break;
-
- default:
- return -EINVAL;
- }
-
- /* for conditionals both must be safe */
- switch (ftest->code) {
case BPF_S_JMP_JEQ_K:
case BPF_S_JMP_JEQ_X:
case BPF_S_JMP_JGE_K:
@@ -564,10 +534,13 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
case BPF_S_JMP_JGT_X:
case BPF_S_JMP_JSET_X:
case BPF_S_JMP_JSET_K:
+ /* for conditionals both must be safe */
if (pc + ftest->jt + 1 >= flen ||
pc + ftest->jf + 1 >= flen)
return -EINVAL;
+ break;
}
+ ftest->code = code;
}
/* last instruction must be a RET code */
@@ -575,15 +548,13 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
case BPF_S_RET_K:
case BPF_S_RET_A:
return 0;
- break;
- default:
- return -EINVAL;
- }
+ }
+ return -EINVAL;
}
EXPORT_SYMBOL(sk_chk_filter);
/**
- * sk_filter_rcu_release: Release a socket filter by rcu_head
+ * sk_filter_rcu_release - Release a socket filter by rcu_head
* @rcu: rcu_head that contains the sk_filter to free
*/
static void sk_filter_rcu_release(struct rcu_head *rcu)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a5ff5a89f376..85e8b5326dd6 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -706,21 +706,24 @@ static struct attribute *rx_queue_default_attrs[] = {
static void rx_queue_release(struct kobject *kobj)
{
struct netdev_rx_queue *queue = to_rx_queue(kobj);
- struct netdev_rx_queue *first = queue->first;
struct rps_map *map;
struct rps_dev_flow_table *flow_table;
map = rcu_dereference_raw(queue->rps_map);
- if (map)
+ if (map) {
+ RCU_INIT_POINTER(queue->rps_map, NULL);
call_rcu(&map->rcu, rps_map_release);
+ }
flow_table = rcu_dereference_raw(queue->rps_flow_table);
- if (flow_table)
+ if (flow_table) {
+ RCU_INIT_POINTER(queue->rps_flow_table, NULL);
call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
+ }
- if (atomic_dec_and_test(&first->count))
- kfree(first);
+ memset(kobj, 0, sizeof(*kobj));
+ dev_put(queue->dev);
}
static struct kobj_type rx_queue_ktype = {
@@ -732,7 +735,6 @@ static struct kobj_type rx_queue_ktype = {
static int rx_queue_add_kobject(struct net_device *net, int index)
{
struct netdev_rx_queue *queue = net->_rx + index;
- struct netdev_rx_queue *first = queue->first;
struct kobject *kobj = &queue->kobj;
int error = 0;
@@ -745,14 +747,16 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
}
kobject_uevent(kobj, KOBJ_ADD);
- atomic_inc(&first->count);
+ dev_hold(queue->dev);
return error;
}
+#endif /* CONFIG_RPS */
int
net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
{
+#ifdef CONFIG_RPS
int i;
int error = 0;
@@ -768,23 +772,422 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
kobject_put(&net->_rx[i].kobj);
return error;
+#else
+ return 0;
+#endif
+}
+
+#ifdef CONFIG_XPS
+/*
+ * netdev_queue sysfs structures and functions.
+ */
+struct netdev_queue_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attr, char *buf);
+ ssize_t (*store)(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_netdev_queue_attr(_attr) container_of(_attr, \
+ struct netdev_queue_attribute, attr)
+
+#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
+
+static ssize_t netdev_queue_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+ struct netdev_queue *queue = to_netdev_queue(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t netdev_queue_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+ struct netdev_queue *queue = to_netdev_queue(kobj);
+
+ if (!attribute->store)
+ return -EIO;
+
+ return attribute->store(queue, attribute, buf, count);
+}
+
+static const struct sysfs_ops netdev_queue_sysfs_ops = {
+ .show = netdev_queue_attr_show,
+ .store = netdev_queue_attr_store,
+};
+
+static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+{
+ struct net_device *dev = queue->dev;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++)
+ if (queue == &dev->_tx[i])
+ break;
+
+ BUG_ON(i >= dev->num_tx_queues);
+
+ return i;
}
-static int rx_queue_register_kobjects(struct net_device *net)
+
+static ssize_t show_xps_map(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute, char *buf)
{
+ struct net_device *dev = queue->dev;
+ struct xps_dev_maps *dev_maps;
+ cpumask_var_t mask;
+ unsigned long index;
+ size_t len = 0;
+ int i;
+
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ index = get_netdev_queue_index(queue);
+
+ rcu_read_lock();
+ dev_maps = rcu_dereference(dev->xps_maps);
+ if (dev_maps) {
+ for_each_possible_cpu(i) {
+ struct xps_map *map =
+ rcu_dereference(dev_maps->cpu_map[i]);
+ if (map) {
+ int j;
+ for (j = 0; j < map->len; j++) {
+ if (map->queues[j] == index) {
+ cpumask_set_cpu(i, mask);
+ break;
+ }
+ }
+ }
+ }
+ }
+ rcu_read_unlock();
+
+ len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+ if (PAGE_SIZE - len < 3) {
+ free_cpumask_var(mask);
+ return -EINVAL;
+ }
+
+ free_cpumask_var(mask);
+ len += sprintf(buf + len, "\n");
+ return len;
+}
+
+static void xps_map_release(struct rcu_head *rcu)
+{
+ struct xps_map *map = container_of(rcu, struct xps_map, rcu);
+
+ kfree(map);
+}
+
+static void xps_dev_maps_release(struct rcu_head *rcu)
+{
+ struct xps_dev_maps *dev_maps =
+ container_of(rcu, struct xps_dev_maps, rcu);
+
+ kfree(dev_maps);
+}
+
+static DEFINE_MUTEX(xps_map_mutex);
+#define xmap_dereference(P) \
+ rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
+
+static ssize_t store_xps_map(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ const char *buf, size_t len)
+{
+ struct net_device *dev = queue->dev;
+ cpumask_var_t mask;
+ int err, i, cpu, pos, map_len, alloc_len, need_set;
+ unsigned long index;
+ struct xps_map *map, *new_map;
+ struct xps_dev_maps *dev_maps, *new_dev_maps;
+ int nonempty = 0;
+ int numa_node = -2;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ index = get_netdev_queue_index(queue);
+
+ err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+ if (err) {
+ free_cpumask_var(mask);
+ return err;
+ }
+
+ new_dev_maps = kzalloc(max_t(unsigned,
+ XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
+ if (!new_dev_maps) {
+ free_cpumask_var(mask);
+ return -ENOMEM;
+ }
+
+ mutex_lock(&xps_map_mutex);
+
+ dev_maps = xmap_dereference(dev->xps_maps);
+
+ for_each_possible_cpu(cpu) {
+ map = dev_maps ?
+ xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+ new_map = map;
+ if (map) {
+ for (pos = 0; pos < map->len; pos++)
+ if (map->queues[pos] == index)
+ break;
+ map_len = map->len;
+ alloc_len = map->alloc_len;
+ } else
+ pos = map_len = alloc_len = 0;
+
+ need_set = cpu_isset(cpu, *mask) && cpu_online(cpu);
+#ifdef CONFIG_NUMA
+ if (need_set) {
+ if (numa_node == -2)
+ numa_node = cpu_to_node(cpu);
+ else if (numa_node != cpu_to_node(cpu))
+ numa_node = -1;
+ }
+#endif
+ if (need_set && pos >= map_len) {
+ /* Need to add queue to this CPU's map */
+ if (map_len >= alloc_len) {
+ alloc_len = alloc_len ?
+ 2 * alloc_len : XPS_MIN_MAP_ALLOC;
+ new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!new_map)
+ goto error;
+ new_map->alloc_len = alloc_len;
+ for (i = 0; i < map_len; i++)
+ new_map->queues[i] = map->queues[i];
+ new_map->len = map_len;
+ }
+ new_map->queues[new_map->len++] = index;
+ } else if (!need_set && pos < map_len) {
+ /* Need to remove queue from this CPU's map */
+ if (map_len > 1)
+ new_map->queues[pos] =
+ new_map->queues[--new_map->len];
+ else
+ new_map = NULL;
+ }
+ RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
+ }
+
+ /* Cleanup old maps */
+ for_each_possible_cpu(cpu) {
+ map = dev_maps ?
+ xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+ if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
+ call_rcu(&map->rcu, xps_map_release);
+ if (new_dev_maps->cpu_map[cpu])
+ nonempty = 1;
+ }
+
+ if (nonempty)
+ rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+ else {
+ kfree(new_dev_maps);
+ rcu_assign_pointer(dev->xps_maps, NULL);
+ }
+
+ if (dev_maps)
+ call_rcu(&dev_maps->rcu, xps_dev_maps_release);
+
+ netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : -1);
+
+ mutex_unlock(&xps_map_mutex);
+
+ free_cpumask_var(mask);
+ return len;
+
+error:
+ mutex_unlock(&xps_map_mutex);
+
+ if (new_dev_maps)
+ for_each_possible_cpu(i)
+ kfree(rcu_dereference_protected(
+ new_dev_maps->cpu_map[i],
+ 1));
+ kfree(new_dev_maps);
+ free_cpumask_var(mask);
+ return -ENOMEM;
+}
+
+static struct netdev_queue_attribute xps_cpus_attribute =
+ __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+
+static struct attribute *netdev_queue_default_attrs[] = {
+ &xps_cpus_attribute.attr,
+ NULL
+};
+
+static void netdev_queue_release(struct kobject *kobj)
+{
+ struct netdev_queue *queue = to_netdev_queue(kobj);
+ struct net_device *dev = queue->dev;
+ struct xps_dev_maps *dev_maps;
+ struct xps_map *map;
+ unsigned long index;
+ int i, pos, nonempty = 0;
+
+ index = get_netdev_queue_index(queue);
+
+ mutex_lock(&xps_map_mutex);
+ dev_maps = xmap_dereference(dev->xps_maps);
+
+ if (dev_maps) {
+ for_each_possible_cpu(i) {
+ map = xmap_dereference(dev_maps->cpu_map[i]);
+ if (!map)
+ continue;
+
+ for (pos = 0; pos < map->len; pos++)
+ if (map->queues[pos] == index)
+ break;
+
+ if (pos < map->len) {
+ if (map->len > 1)
+ map->queues[pos] =
+ map->queues[--map->len];
+ else {
+ RCU_INIT_POINTER(dev_maps->cpu_map[i],
+ NULL);
+ call_rcu(&map->rcu, xps_map_release);
+ map = NULL;
+ }
+ }
+ if (map)
+ nonempty = 1;
+ }
+
+ if (!nonempty) {
+ RCU_INIT_POINTER(dev->xps_maps, NULL);
+ call_rcu(&dev_maps->rcu, xps_dev_maps_release);
+ }
+ }
+
+ mutex_unlock(&xps_map_mutex);
+
+ memset(kobj, 0, sizeof(*kobj));
+ dev_put(queue->dev);
+}
+
+static struct kobj_type netdev_queue_ktype = {
+ .sysfs_ops = &netdev_queue_sysfs_ops,
+ .release = netdev_queue_release,
+ .default_attrs = netdev_queue_default_attrs,
+};
+
+static int netdev_queue_add_kobject(struct net_device *net, int index)
+{
+ struct netdev_queue *queue = net->_tx + index;
+ struct kobject *kobj = &queue->kobj;
+ int error = 0;
+
+ kobj->kset = net->queues_kset;
+ error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
+ "tx-%u", index);
+ if (error) {
+ kobject_put(kobj);
+ return error;
+ }
+
+ kobject_uevent(kobj, KOBJ_ADD);
+ dev_hold(queue->dev);
+
+ return error;
+}
+#endif /* CONFIG_XPS */
+
+int
+netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
+{
+#ifdef CONFIG_XPS
+ int i;
+ int error = 0;
+
+ for (i = old_num; i < new_num; i++) {
+ error = netdev_queue_add_kobject(net, i);
+ if (error) {
+ new_num = old_num;
+ break;
+ }
+ }
+
+ while (--i >= new_num)
+ kobject_put(&net->_tx[i].kobj);
+
+ return error;
+#else
+ return 0;
+#endif
+}
+
+static int register_queue_kobjects(struct net_device *net)
+{
+ int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
+
+#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
net->queues_kset = kset_create_and_add("queues",
NULL, &net->dev.kobj);
if (!net->queues_kset)
return -ENOMEM;
- return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+#endif
+
+#ifdef CONFIG_RPS
+ real_rx = net->real_num_rx_queues;
+#endif
+ real_tx = net->real_num_tx_queues;
+
+ error = net_rx_queue_update_kobjects(net, 0, real_rx);
+ if (error)
+ goto error;
+ rxq = real_rx;
+
+ error = netdev_queue_update_kobjects(net, 0, real_tx);
+ if (error)
+ goto error;
+ txq = real_tx;
+
+ return 0;
+
+error:
+ netdev_queue_update_kobjects(net, txq, 0);
+ net_rx_queue_update_kobjects(net, rxq, 0);
+ return error;
}
-static void rx_queue_remove_kobjects(struct net_device *net)
+static void remove_queue_kobjects(struct net_device *net)
{
- net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
+ int real_rx = 0, real_tx = 0;
+
+#ifdef CONFIG_RPS
+ real_rx = net->real_num_rx_queues;
+#endif
+ real_tx = net->real_num_tx_queues;
+
+ net_rx_queue_update_kobjects(net, real_rx, 0);
+ netdev_queue_update_kobjects(net, real_tx, 0);
+#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
kset_unregister(net->queues_kset);
+#endif
}
-#endif /* CONFIG_RPS */
static const void *net_current_ns(void)
{
@@ -883,9 +1286,7 @@ void netdev_unregister_kobject(struct net_device * net)
kobject_get(&dev->kobj);
-#ifdef CONFIG_RPS
- rx_queue_remove_kobjects(net);
-#endif
+ remove_queue_kobjects(net);
device_del(dev);
}
@@ -924,13 +1325,11 @@ int netdev_register_kobject(struct net_device *net)
if (error)
return error;
-#ifdef CONFIG_RPS
- error = rx_queue_register_kobjects(net);
+ error = register_queue_kobjects(net);
if (error) {
device_del(dev);
return error;
}
-#endif
return error;
}
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 778e1571548d..bd7751ec1c4d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,8 +4,8 @@
int netdev_kobject_init(void);
int netdev_register_kobject(struct net_device *);
void netdev_unregister_kobject(struct net_device *);
-#ifdef CONFIG_RPS
int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
-#endif
+int netdev_queue_update_kobjects(struct net_device *net,
+ int old_num, int new_num);
#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4e98ffac3af0..ee38acb6d463 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -76,8 +76,7 @@ static void queue_process(struct work_struct *work)
local_irq_save(flags);
__netif_tx_lock(txq, smp_processor_id());
- if (netif_tx_queue_stopped(txq) ||
- netif_tx_queue_frozen(txq) ||
+ if (netif_tx_queue_frozen_or_stopped(txq) ||
ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb);
__netif_tx_unlock(txq);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fbce4b05a53e..2953b2abc971 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -378,6 +378,7 @@ struct pktgen_dev {
u16 queue_map_min;
u16 queue_map_max;
+ __u32 skb_priority; /* skb priority field */
int node; /* Memory node */
#ifdef CONFIG_XFRM
@@ -394,6 +395,8 @@ struct pktgen_hdr {
__be32 tv_usec;
};
+static bool pktgen_exiting __read_mostly;
+
struct pktgen_thread {
spinlock_t if_lock; /* for list of devices */
struct list_head if_list; /* All device here */
@@ -547,6 +550,10 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
pkt_dev->queue_map_min,
pkt_dev->queue_map_max);
+ if (pkt_dev->skb_priority)
+ seq_printf(seq, " skb_priority: %u\n",
+ pkt_dev->skb_priority);
+
if (pkt_dev->flags & F_IPV6) {
char b1[128], b2[128], b3[128];
fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr);
@@ -887,7 +894,7 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
if (debug) {
- size_t copy = min(count, 1023);
+ size_t copy = min_t(size_t, count, 1023);
char tb[copy + 1];
if (copy_from_user(tb, user_buffer, copy))
return -EFAULT;
@@ -1711,6 +1718,18 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
+ if (!strcmp(name, "skb_priority")) {
+ len = num_arg(&user_buffer[i], 9, &value);
+ if (len < 0)
+ return len;
+
+ i += len;
+ pkt_dev->skb_priority = value;
+ sprintf(pg_result, "OK: skb_priority=%i",
+ pkt_dev->skb_priority);
+ return count;
+ }
+
sprintf(pkt_dev->result, "No such parameter \"%s\"", name);
return -EINVAL;
}
@@ -2612,8 +2631,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
- queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
+ queue_map = pkt_dev->cur_queue_map;
datalen = (odev->hard_header_len + 16) & ~0xf;
@@ -2671,6 +2690,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
skb->transport_header = skb->network_header + sizeof(struct iphdr);
skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
skb_set_queue_mapping(skb, queue_map);
+ skb->priority = pkt_dev->skb_priority;
+
iph = ip_hdr(skb);
udph = udp_hdr(skb);
@@ -2976,8 +2997,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
- queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
+ queue_map = pkt_dev->cur_queue_map;
skb = __netdev_alloc_skb(odev,
pkt_dev->cur_pkt_size + 64
@@ -3016,6 +3037,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
skb_set_queue_mapping(skb, queue_map);
+ skb->priority = pkt_dev->skb_priority;
iph = ipv6_hdr(skb);
udph = udp_hdr(skb);
@@ -3431,11 +3453,6 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
remove_proc_entry(t->tsk->comm, pg_proc_dir);
- mutex_lock(&pktgen_thread_lock);
-
- list_del(&t->th_list);
-
- mutex_unlock(&pktgen_thread_lock);
}
static void pktgen_resched(struct pktgen_dev *pkt_dev)
@@ -3510,7 +3527,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
__netif_tx_lock_bh(txq);
- if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) {
+ if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) {
ret = NETDEV_TX_BUSY;
pkt_dev->last_ok = 0;
goto unlock;
@@ -3582,6 +3599,8 @@ static int pktgen_thread_worker(void *arg)
pkt_dev = next_to_run(t);
if (unlikely(!pkt_dev && t->control == 0)) {
+ if (pktgen_exiting)
+ break;
wait_event_interruptible_timeout(t->queue,
t->control != 0,
HZ/10);
@@ -3634,6 +3653,13 @@ static int pktgen_thread_worker(void *arg)
pr_debug("%s removing thread\n", t->tsk->comm);
pktgen_rem_thread(t);
+ /* Wait for kthread_stop */
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
return 0;
}
@@ -3908,6 +3934,7 @@ static void __exit pg_cleanup(void)
struct list_head *q, *n;
/* Stop all interfaces & threads */
+ pktgen_exiting = true;
list_for_each_safe(q, n, &pktgen_threads) {
t = list_entry(q, struct pktgen_thread, th_list);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 7552495aff7a..41d99435f62d 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -33,6 +33,7 @@
* Note : Dont forget somaxconn that may limit backlog too.
*/
int sysctl_max_syn_backlog = 256;
+EXPORT_SYMBOL(sysctl_max_syn_backlog);
int reqsk_queue_alloc(struct request_sock_queue *queue,
unsigned int nr_table_entries)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8121268ddbdd..750db57f3bb3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -347,16 +347,106 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
if (!ops)
return 0;
- size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
- nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
+ size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
+ nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
if (ops->get_size)
/* IFLA_INFO_DATA + nested data */
- size += nlmsg_total_size(sizeof(struct nlattr)) +
+ size += nla_total_size(sizeof(struct nlattr)) +
ops->get_size(dev);
if (ops->get_xstats_size)
- size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */
+ /* IFLA_INFO_XSTATS */
+ size += nla_total_size(ops->get_xstats_size(dev));
+
+ return size;
+}
+
+static LIST_HEAD(rtnl_af_ops);
+
+static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
+{
+ const struct rtnl_af_ops *ops;
+
+ list_for_each_entry(ops, &rtnl_af_ops, list) {
+ if (ops->family == family)
+ return ops;
+ }
+
+ return NULL;
+}
+
+/**
+ * __rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * The caller must hold the rtnl_mutex.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_af_register(struct rtnl_af_ops *ops)
+{
+ list_add_tail(&ops->list, &rtnl_af_ops);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_register);
+
+/**
+ * rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_af_register(struct rtnl_af_ops *ops)
+{
+ int err;
+
+ rtnl_lock();
+ err = __rtnl_af_register(ops);
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL_GPL(rtnl_af_register);
+
+/**
+ * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+void __rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+ list_del(&ops->list);
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
+
+/**
+ * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ */
+void rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+ rtnl_lock();
+ __rtnl_af_unregister(ops);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(rtnl_af_unregister);
+
+static size_t rtnl_link_get_af_size(const struct net_device *dev)
+{
+ struct rtnl_af_ops *af_ops;
+ size_t size;
+
+ /* IFLA_AF_SPEC */
+ size = nla_total_size(sizeof(struct nlattr));
+
+ list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ if (af_ops->get_link_af_size) {
+ /* AF_* + nested data */
+ size += nla_total_size(sizeof(struct nlattr)) +
+ af_ops->get_link_af_size(dev);
+ }
+ }
return size;
}
@@ -670,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
+ nla_total_size(4) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+ rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
- + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
+ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -756,7 +847,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct nlmsghdr *nlh;
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats;
- struct nlattr *attr;
+ struct nlattr *attr, *af_spec;
+ struct rtnl_af_ops *af_ops;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
if (nlh == NULL)
@@ -865,6 +957,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
+ if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+ goto nla_put_failure;
+
+ list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ if (af_ops->fill_link_af) {
+ struct nlattr *af;
+ int err;
+
+ if (!(af = nla_nest_start(skb, af_ops->family)))
+ goto nla_put_failure;
+
+ err = af_ops->fill_link_af(skb, dev);
+
+ /*
+ * Caller may return ENODATA to indicate that there
+ * was no data to be dumped. This is not an error, it
+ * means we should trim the attribute header and
+ * continue.
+ */
+ if (err == -ENODATA)
+ nla_nest_cancel(skb, af);
+ else if (err < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, af);
+ }
+ }
+
+ nla_nest_end(skb, af_spec);
+
return nlmsg_end(skb, nlh);
nla_put_failure:
@@ -923,6 +1045,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
[IFLA_PORT_SELF] = { .type = NLA_NESTED },
+ [IFLA_AF_SPEC] = { .type = NLA_NESTED },
};
EXPORT_SYMBOL(ifla_policy);
@@ -984,6 +1107,28 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
return -EINVAL;
}
+ if (tb[IFLA_AF_SPEC]) {
+ struct nlattr *af;
+ int rem, err;
+
+ nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+ const struct rtnl_af_ops *af_ops;
+
+ if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+ return -EAFNOSUPPORT;
+
+ if (!af_ops->set_link_af)
+ return -EOPNOTSUPP;
+
+ if (af_ops->validate_link_af) {
+ err = af_ops->validate_link_af(dev,
+ tb[IFLA_AF_SPEC]);
+ if (err < 0)
+ return err;
+ }
+ }
+ }
+
return 0;
}
@@ -1224,6 +1369,24 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
goto errout;
modified = 1;
}
+
+ if (tb[IFLA_AF_SPEC]) {
+ struct nlattr *af;
+ int rem;
+
+ nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+ const struct rtnl_af_ops *af_ops;
+
+ if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+ BUG();
+
+ err = af_ops->set_link_af(dev, af);
+ if (err < 0)
+ goto errout;
+
+ modified = 1;
+ }
+ }
err = 0;
errout:
diff --git a/net/core/scm.c b/net/core/scm.c
index 413cab89017d..bbe454450801 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -79,10 +79,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
return -ENOMEM;
*fplp = fpl;
fpl->count = 0;
+ fpl->max = SCM_MAX_FD;
}
fpp = &fpl->fp[fpl->count];
- if (fpl->count + num > SCM_MAX_FD)
+ if (fpl->count + num > fpl->max)
return -EINVAL;
/*
@@ -331,11 +332,12 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
if (!fpl)
return NULL;
- new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+ new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
+ GFP_KERNEL);
if (new_fpl) {
- for (i=fpl->count-1; i>=0; i--)
+ for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
- memcpy(new_fpl, fpl, sizeof(*fpl));
+ new_fpl->max = new_fpl->count;
}
return new_fpl;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 104f8444754a..8814a9a52f47 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -778,6 +778,28 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
size = SKB_DATA_ALIGN(size);
+ /* Check if we can avoid taking references on fragments if we own
+ * the last reference on skb->head. (see skb_release_data())
+ */
+ if (!skb->cloned)
+ fastpath = true;
+ else {
+ int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
+
+ fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
+ }
+
+ if (fastpath &&
+ size + sizeof(struct skb_shared_info) <= ksize(skb->head)) {
+ memmove(skb->head + size, skb_shinfo(skb),
+ offsetof(struct skb_shared_info,
+ frags[skb_shinfo(skb)->nr_frags]));
+ memmove(skb->head + nhead, skb->head,
+ skb_tail_pointer(skb) - skb->head);
+ off = nhead;
+ goto adjust_others;
+ }
+
data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
if (!data)
goto nodata;
@@ -791,17 +813,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb_shinfo(skb),
offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
- /* Check if we can avoid taking references on fragments if we own
- * the last reference on skb->head. (see skb_release_data())
- */
- if (!skb->cloned)
- fastpath = true;
- else {
- int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
-
- fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
- }
-
if (fastpath) {
kfree(skb->head);
} else {
@@ -816,6 +827,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
off = (data + nhead) - skb->head;
skb->head = data;
+adjust_others:
skb->data += off;
#ifdef NET_SKBUFF_DATA_USES_OFFSET
skb->end = size;
diff --git a/net/core/sock.c b/net/core/sock.c
index 3eed5424e659..fb6080111461 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1653,10 +1653,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
{
struct proto *prot = sk->sk_prot;
int amt = sk_mem_pages(size);
- int allocated;
+ long allocated;
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
- allocated = atomic_add_return(amt, prot->memory_allocated);
+ allocated = atomic_long_add_return(amt, prot->memory_allocated);
/* Under limit. */
if (allocated <= prot->sysctl_mem[0]) {
@@ -1714,7 +1714,7 @@ suppress_allocation:
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
- atomic_sub(amt, prot->memory_allocated);
+ atomic_long_sub(amt, prot->memory_allocated);
return 0;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1727,12 +1727,12 @@ void __sk_mem_reclaim(struct sock *sk)
{
struct proto *prot = sk->sk_prot;
- atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
+ atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
prot->memory_allocated);
sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
if (prot->memory_pressure && *prot->memory_pressure &&
- (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
+ (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
*prot->memory_pressure = 0;
}
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -2452,12 +2452,12 @@ static char proto_method_implemented(const void *method)
static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
{
- seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
+ seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
proto->name,
proto->obj_size,
sock_prot_inuse_get(seq_file_net(seq), proto),
- proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
+ proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
proto->max_header,
proto->slab == NULL ? "no" : "yes",
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22da85b..dac7ed687f60 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -31,7 +31,7 @@ static unsigned int classify(struct sk_buff *skb)
if (likely(skb->dev &&
skb->dev->phydev &&
skb->dev->phydev->drv))
- return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+ return sk_run_filter(skb, ptp_filter);
else
return PTP_CLASS_NONE;
}