aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/protocol.c33
-rw-r--r--net/bridge/br_device.c2
-rw-r--r--net/bridge/br_netfilter.c2
-rw-r--r--net/ceph/Makefile22
-rw-r--r--net/ceph/buffer.c2
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c65
-rw-r--r--net/core/ethtool.c4
-rw-r--r--net/core/filter.c108
-rw-r--r--net/core/request_sock.c4
-rw-r--r--net/core/sock.c11
-rw-r--r--net/core/timestamping.c2
-rw-r--r--net/dccp/Makefile4
-rw-r--r--net/dccp/dccp.h13
-rw-r--r--net/dccp/input.c3
-rw-r--r--net/dccp/output.c7
-rw-r--r--net/dccp/proto.c71
-rw-r--r--net/dccp/qpolicy.c137
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/decnet/dn_route.c13
-rw-r--r--net/econet/af_econet.c93
-rw-r--r--net/ieee802154/af_ieee802154.c6
-rw-r--r--net/ipv4/arp.c19
-rw-r--r--net/ipv4/devinet.c8
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/inet_connection_sock.c7
-rw-r--r--net/ipv4/inet_hashtables.c3
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/route.c55
-rw-r--r--net/ipv4/sysctl_net_ipv4.c6
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_input.c22
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c42
-rw-r--r--net/ipv6/ip6_tunnel.c7
-rw-r--r--net/ipv6/ndisc.c5
-rw-r--r--net/ipv6/route.c66
-rw-r--r--net/ipv6/sit.c3
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/l2tp/l2tp_ip.c6
-rw-r--r--net/llc/af_llc.c11
-rw-r--r--net/packet/af_packet.c90
-rw-r--r--net/sctp/socket.c2
-rw-r--r--net/unix/af_unix.c37
-rw-r--r--net/unix/garbage.c9
-rw-r--r--net/x25/x25_link.c1
-rw-r--r--net/xfrm/xfrm_hash.c2
-rw-r--r--net/xfrm/xfrm_policy.c6
50 files changed, 660 insertions, 370 deletions
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 45c15f491401..798beac7f100 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -27,31 +27,16 @@
#include <linux/module.h>
#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/stddef.h>
#include <linux/types.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
#include "protocol.h"
-#ifndef MIN
-#define MIN(a, b) (((a) < (b)) ? (a) : (b))
-#endif
-
-#ifndef MAX
-#define MAX(a, b) (((a) > (b)) ? (a) : (b))
-#endif
-
-#ifndef offset_of
-#define offset_of(type, memb) \
- ((unsigned long)(&((type *)0)->memb))
-#endif
-#ifndef container_of
-#define container_of(obj, type, memb) \
- ((type *)(((char *)obj) - offset_of(type, memb)))
-#endif
-
static int
p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
@@ -104,7 +89,7 @@ EXPORT_SYMBOL(p9stat_free);
static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
{
- size_t len = MIN(pdu->size - pdu->offset, size);
+ size_t len = min(pdu->size - pdu->offset, size);
memcpy(data, &pdu->sdata[pdu->offset], len);
pdu->offset += len;
return size - len;
@@ -112,7 +97,7 @@ static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
{
- size_t len = MIN(pdu->capacity - pdu->size, size);
+ size_t len = min(pdu->capacity - pdu->size, size);
memcpy(&pdu->sdata[pdu->size], data, len);
pdu->size += len;
return size - len;
@@ -121,7 +106,7 @@ static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
static size_t
pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
{
- size_t len = MIN(pdu->capacity - pdu->size, size);
+ size_t len = min(pdu->capacity - pdu->size, size);
if (copy_from_user(&pdu->sdata[pdu->size], udata, len))
len = 0;
@@ -201,7 +186,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
if (errcode)
break;
- size = MAX(len, 0);
+ size = max_t(int16_t, len, 0);
*sptr = kmalloc(size + 1, GFP_KERNEL);
if (*sptr == NULL) {
@@ -256,8 +241,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
p9pdu_readf(pdu, proto_version, "d", count);
if (!errcode) {
*count =
- MIN(*count,
- pdu->size - pdu->offset);
+ min_t(int32_t, *count,
+ pdu->size - pdu->offset);
*data = &pdu->sdata[pdu->offset];
}
}
@@ -421,7 +406,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
const char *sptr = va_arg(ap, const char *);
int16_t len = 0;
if (sptr)
- len = MIN(strlen(sptr), USHRT_MAX);
+ len = min_t(int16_t, strlen(sptr), USHRT_MAX);
errcode = p9pdu_writef(pdu, proto_version,
"w", len);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 17cb0b633576..556443566e9c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -141,7 +141,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
#ifdef CONFIG_BRIDGE_NETFILTER
/* remember the MTU in the rtable for PMTU */
- br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu;
+ dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
#endif
return 0;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 6e1392093911..16f5c333596a 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -124,7 +124,7 @@ void br_netfilter_rtable_init(struct net_bridge *br)
atomic_set(&rt->dst.__refcnt, 1);
rt->dst.dev = br->dev;
rt->dst.path = &rt->dst;
- rt->dst.metrics[RTAX_MTU - 1] = 1500;
+ dst_metric_set(&rt->dst, RTAX_MTU, 1500);
rt->dst.flags = DST_NOXFRM;
rt->dst.ops = &fake_dst_ops;
}
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index 153bdec40835..e87ef435e11b 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -1,9 +1,6 @@
#
# Makefile for CEPH filesystem.
#
-
-ifneq ($(KERNELRELEASE),)
-
obj-$(CONFIG_CEPH_LIB) += libceph.o
libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
@@ -16,22 +13,3 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
ceph_fs.o ceph_strings.o ceph_hash.o \
pagevec.o
-else
-#Otherwise we were called directly from the command
-# line; invoke the kernel build system.
-
-KERNELDIR ?= /lib/modules/$(shell uname -r)/build
-PWD := $(shell pwd)
-
-default: all
-
-all:
- $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules
-
-modules_install:
- $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install
-
-clean:
- $(MAKE) -C $(KERNELDIR) M=$(PWD) clean
-
-endif
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 53d8abfa25d5..bf3e6a13c215 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -19,7 +19,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
if (b->vec.iov_base) {
b->is_vmalloc = false;
} else {
- b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL);
+ b->vec.iov_base = __vmalloc(len, gfp | __GFP_HIGHMEM, PAGE_KERNEL);
if (!b->vec.iov_base) {
kfree(b);
return NULL;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index cd1e039c8755..18ac112ea7ae 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -177,7 +177,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
* interrupt level will suddenly eat the receive_queue.
*
* Look at current nfs client by the way...
- * However, this function was corrent in any case. 8)
+ * However, this function was correct in any case. 8)
*/
unsigned long cpu_flags;
diff --git a/net/core/dev.c b/net/core/dev.c
index cd2437495428..d28b3a023bb2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -743,34 +743,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
EXPORT_SYMBOL(dev_get_by_index);
/**
- * dev_getbyhwaddr - find a device by its hardware address
+ * dev_getbyhwaddr_rcu - find a device by its hardware address
* @net: the applicable net namespace
* @type: media type of device
* @ha: hardware address
*
* Search for an interface by MAC address. Returns NULL if the device
- * is not found or a pointer to the device. The caller must hold the
- * rtnl semaphore. The returned device has not had its ref count increased
+ * is not found or a pointer to the device. The caller must hold RCU
+ * The returned device has not had its ref count increased
* and the caller must therefore be careful about locking
*
- * BUGS:
- * If the API was consistent this would be __dev_get_by_hwaddr
*/
-struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
+struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+ const char *ha)
{
struct net_device *dev;
- ASSERT_RTNL();
-
- for_each_netdev(net, dev)
+ for_each_netdev_rcu(net, dev)
if (dev->type == type &&
!memcmp(dev->dev_addr, ha, dev->addr_len))
return dev;
return NULL;
}
-EXPORT_SYMBOL(dev_getbyhwaddr);
+EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
@@ -2025,9 +2022,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
int rc = NETDEV_TX_OK;
if (likely(!skb->next)) {
- if (!list_empty(&ptype_all))
- dev_queue_xmit_nit(skb, dev);
-
/*
* If device doesnt need skb->dst, release it right now while
* its hot in this cpu cache
@@ -2035,6 +2029,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(skb);
+ if (!list_empty(&ptype_all))
+ dev_queue_xmit_nit(skb, dev);
+
skb_orphan_try(skb);
if (vlan_tx_tag_present(skb) &&
@@ -5041,10 +5038,13 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
}
if (features & NETIF_F_UFO) {
- if (!(features & NETIF_F_GEN_CSUM)) {
+ /* maybe split UFO into V4 and V6? */
+ if (!((features & NETIF_F_GEN_CSUM) ||
+ (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+ == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
if (name)
printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
- "since no NETIF_F_HW_CSUM feature.\n",
+ "since no checksum offload features.\n",
name);
features &= ~NETIF_F_UFO;
}
@@ -5109,11 +5109,21 @@ static int netif_alloc_rx_queues(struct net_device *dev)
}
#endif
+static void netdev_init_one_queue(struct net_device *dev,
+ struct netdev_queue *queue, void *_unused)
+{
+ /* Initialize queue lock */
+ spin_lock_init(&queue->_xmit_lock);
+ netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+ queue->xmit_lock_owner = -1;
+ netdev_queue_numa_node_write(queue, -1);
+ queue->dev = dev;
+}
+
static int netif_alloc_netdev_queues(struct net_device *dev)
{
unsigned int count = dev->num_tx_queues;
struct netdev_queue *tx;
- int i;
BUG_ON(count < 1);
@@ -5125,27 +5135,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
}
dev->_tx = tx;
- for (i = 0; i < count; i++) {
- netdev_queue_numa_node_write(&tx[i], -1);
- tx[i].dev = dev;
- }
- return 0;
-}
-
-static void netdev_init_one_queue(struct net_device *dev,
- struct netdev_queue *queue,
- void *_unused)
-{
- /* Initialize queue lock */
- spin_lock_init(&queue->_xmit_lock);
- netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
- queue->xmit_lock_owner = -1;
-}
-
-static void netdev_init_queues(struct net_device *dev)
-{
netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
spin_lock_init(&dev->tx_global_lock);
+
+ return 0;
}
/**
@@ -5184,8 +5177,6 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
- netdev_init_queues(dev);
-
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 956a9f4971cb..d5bc28818883 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1171,7 +1171,9 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
return -EFAULT;
if (edata.data && !(dev->features & NETIF_F_SG))
return -EINVAL;
- if (edata.data && !(dev->features & NETIF_F_HW_CSUM))
+ if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) ||
+ (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+ == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
return -EINVAL;
return dev->ethtool_ops->set_ufo(dev, edata.data);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index a44d27f9f0f0..e8a6ac411ffb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -88,7 +88,7 @@ enum {
};
/* No hurry in this branch */
-static void *__load_pointer(struct sk_buff *skb, int k)
+static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
{
u8 *ptr = NULL;
@@ -97,12 +97,12 @@ static void *__load_pointer(struct sk_buff *skb, int k)
else if (k >= SKF_LL_OFF)
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
- if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
+ if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
return ptr;
return NULL;
}
-static inline void *load_pointer(struct sk_buff *skb, int k,
+static inline void *load_pointer(const struct sk_buff *skb, int k,
unsigned int size, void *buffer)
{
if (k >= 0)
@@ -110,7 +110,7 @@ static inline void *load_pointer(struct sk_buff *skb, int k,
else {
if (k >= SKF_AD_OFF)
return NULL;
- return __load_pointer(skb, k);
+ return __load_pointer(skb, k, size);
}
}
@@ -160,17 +160,16 @@ EXPORT_SYMBOL(sk_filter);
* and last instruction guaranteed to be a RET, we dont need to check
* flen. (We used to pass to this function the length of filter)
*/
-unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
+unsigned int sk_run_filter(const struct sk_buff *skb,
+ const struct sock_filter *fentry)
{
void *ptr;
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
- unsigned long memvalid = 0;
u32 tmp;
int k;
- BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
/*
* Process array of filter instructions.
*/
@@ -318,12 +317,10 @@ load_b:
X = K;
continue;
case BPF_S_LD_MEM:
- A = (memvalid & (1UL << K)) ?
- mem[K] : 0;
+ A = mem[K];
continue;
case BPF_S_LDX_MEM:
- X = (memvalid & (1UL << K)) ?
- mem[K] : 0;
+ X = mem[K];
continue;
case BPF_S_MISC_TAX:
X = A;
@@ -336,11 +333,9 @@ load_b:
case BPF_S_RET_A:
return A;
case BPF_S_ST:
- memvalid |= 1UL << K;
mem[K] = A;
continue;
case BPF_S_STX:
- memvalid |= 1UL << K;
mem[K] = X;
continue;
default:
@@ -375,6 +370,12 @@ load_b:
return 0;
A = skb->dev->type;
continue;
+ case SKF_AD_RXHASH:
+ A = skb->rxhash;
+ continue;
+ case SKF_AD_CPU:
+ A = raw_smp_processor_id();
+ continue;
case SKF_AD_NLATTR: {
struct nlattr *nla;
@@ -419,6 +420,66 @@ load_b:
}
EXPORT_SYMBOL(sk_run_filter);
+/*
+ * Security :
+ * A BPF program is able to use 16 cells of memory to store intermediate
+ * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
+ * As we dont want to clear mem[] array for each packet going through
+ * sk_run_filter(), we check that filter loaded by user never try to read
+ * a cell if not previously written, and we check all branches to be sure
+ * a malicious user doesnt try to abuse us.
+ */
+static int check_load_and_stores(struct sock_filter *filter, int flen)
+{
+ u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
+ int pc, ret = 0;
+
+ BUILD_BUG_ON(BPF_MEMWORDS > 16);
+ masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
+ if (!masks)
+ return -ENOMEM;
+ memset(masks, 0xff, flen * sizeof(*masks));
+
+ for (pc = 0; pc < flen; pc++) {
+ memvalid &= masks[pc];
+
+ switch (filter[pc].code) {
+ case BPF_S_ST:
+ case BPF_S_STX:
+ memvalid |= (1 << filter[pc].k);
+ break;
+ case BPF_S_LD_MEM:
+ case BPF_S_LDX_MEM:
+ if (!(memvalid & (1 << filter[pc].k))) {
+ ret = -EINVAL;
+ goto error;
+ }
+ break;
+ case BPF_S_JMP_JA:
+ /* a jump must set masks on target */
+ masks[pc + 1 + filter[pc].k] &= memvalid;
+ memvalid = ~0;
+ break;
+ case BPF_S_JMP_JEQ_K:
+ case BPF_S_JMP_JEQ_X:
+ case BPF_S_JMP_JGE_K:
+ case BPF_S_JMP_JGE_X:
+ case BPF_S_JMP_JGT_K:
+ case BPF_S_JMP_JGT_X:
+ case BPF_S_JMP_JSET_X:
+ case BPF_S_JMP_JSET_K:
+ /* a jump must set masks on targets */
+ masks[pc + 1 + filter[pc].jt] &= memvalid;
+ masks[pc + 1 + filter[pc].jf] &= memvalid;
+ memvalid = ~0;
+ break;
+ }
+ }
+error:
+ kfree(masks);
+ return ret;
+}
+
/**
* sk_chk_filter - verify socket filter code
* @filter: filter to verify
@@ -547,30 +608,23 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
switch (filter[flen - 1].code) {
case BPF_S_RET_K:
case BPF_S_RET_A:
- return 0;
+ return check_load_and_stores(filter, flen);
}
return -EINVAL;
}
EXPORT_SYMBOL(sk_chk_filter);
/**
- * sk_filter_rcu_release - Release a socket filter by rcu_head
+ * sk_filter_release_rcu - Release a socket filter by rcu_head
* @rcu: rcu_head that contains the sk_filter to free
*/
-static void sk_filter_rcu_release(struct rcu_head *rcu)
+void sk_filter_release_rcu(struct rcu_head *rcu)
{
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
- sk_filter_release(fp);
-}
-
-static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
-{
- unsigned int size = sk_filter_len(fp);
-
- atomic_sub(size, &sk->sk_omem_alloc);
- call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
+ kfree(fp);
}
+EXPORT_SYMBOL(sk_filter_release_rcu);
/**
* sk_attach_filter - attach a socket filter
@@ -614,7 +668,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
rcu_assign_pointer(sk->sk_filter, fp);
if (old_fp)
- sk_filter_delayed_uncharge(sk, old_fp);
+ sk_filter_uncharge(sk, old_fp);
return 0;
}
EXPORT_SYMBOL_GPL(sk_attach_filter);
@@ -628,7 +682,7 @@ int sk_detach_filter(struct sock *sk)
sock_owned_by_user(sk));
if (filter) {
rcu_assign_pointer(sk->sk_filter, NULL);
- sk_filter_delayed_uncharge(sk, filter);
+ sk_filter_uncharge(sk, filter);
ret = 0;
}
return ret;
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 41d99435f62d..182236b2510a 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -46,9 +46,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
lopt_size += nr_table_entries * sizeof(struct request_sock *);
if (lopt_size > PAGE_SIZE)
- lopt = __vmalloc(lopt_size,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
- PAGE_KERNEL);
+ lopt = vzalloc(lopt_size);
else
lopt = kzalloc(lopt_size, GFP_KERNEL);
if (lopt == NULL)
diff --git a/net/core/sock.c b/net/core/sock.c
index fb6080111461..bcdb6ff6e621 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -992,17 +992,18 @@ static inline void sock_lock_init(struct sock *sk)
/*
* Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
* even temporarly, because of RCU lookups. sk_node should also be left as is.
+ * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
*/
static void sock_copy(struct sock *nsk, const struct sock *osk)
{
#ifdef CONFIG_SECURITY_NETWORK
void *sptr = nsk->sk_security;
#endif
- BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
- sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) +
- sizeof(osk->sk_tx_queue_mapping));
- memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
- osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
+ memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
+
+ memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
+ osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
+
#ifdef CONFIG_SECURITY_NETWORK
nsk->sk_security = sptr;
security_sk_clone(osk, nsk);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index dac7ed687f60..b124d28ff1c8 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -26,7 +26,7 @@ static struct sock_filter ptp_filter[] = {
PTP_FILTER
};
-static unsigned int classify(struct sk_buff *skb)
+static unsigned int classify(const struct sk_buff *skb)
{
if (likely(skb->dev &&
skb->dev->phydev &&
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 2991efcc8dea..5c8362b037ed 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,7 +1,7 @@
obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
-dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
-
+dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \
+ qpolicy.o
#
# CCID algorithms to be used by dccp.ko
#
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 19fafd597465..48ad5d9da7cb 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -243,6 +243,19 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
extern void dccp_send_sync(struct sock *sk, const u64 seq,
const enum dccp_pkt_type pkt_type);
+/*
+ * TX Packet Dequeueing Interface
+ */
+extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
+extern bool dccp_qpolicy_full(struct sock *sk);
+extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
+extern struct sk_buff *dccp_qpolicy_top(struct sock *sk);
+extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
+extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param);
+
+/*
+ * TX Packet Output and TX Timers
+ */
extern void dccp_write_xmit(struct sock *sk);
extern void dccp_write_space(struct sock *sk);
extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 7d230d14ce22..15af247ea007 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -241,7 +241,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
dccp_update_gsr(sk, seqno);
if (dh->dccph_type != DCCP_PKT_SYNC &&
- (ackno != DCCP_PKT_WITHOUT_ACK_SEQ))
+ ackno != DCCP_PKT_WITHOUT_ACK_SEQ &&
+ after48(ackno, dp->dccps_gar))
dp->dccps_gar = ackno;
} else {
unsigned long now = jiffies;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d96dd9d362ae..784d30210543 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk)
{
int err, len;
struct dccp_sock *dp = dccp_sk(sk);
- struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
+ struct sk_buff *skb = dccp_qpolicy_pop(sk);
if (unlikely(skb == NULL))
return;
@@ -345,7 +345,7 @@ void dccp_write_xmit(struct sock *sk)
struct dccp_sock *dp = dccp_sk(sk);
struct sk_buff *skb;
- while ((skb = skb_peek(&sk->sk_write_queue))) {
+ while ((skb = dccp_qpolicy_top(sk))) {
int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
switch (ccid_packet_dequeue_eval(rc)) {
@@ -359,8 +359,7 @@ void dccp_write_xmit(struct sock *sk)
dccp_xmit_packet(sk);
break;
case CCID_PACKET_ERR:
- skb_dequeue(&sk->sk_write_queue);
- kfree_skb(skb);
+ dccp_qpolicy_drop(sk, skb);
dccp_pr_debug("packet discarded due to err=%d\n", rc);
}
}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ef343d53fcea..152975d942d9 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
dp->dccps_role = DCCP_ROLE_UNDEFINED;
dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
+ dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
dccp_init_xmit_timers(sk);
@@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
case DCCP_SOCKOPT_RECV_CSCOV:
err = dccp_setsockopt_cscov(sk, val, true);
break;
+ case DCCP_SOCKOPT_QPOLICY_ID:
+ if (sk->sk_state != DCCP_CLOSED)
+ err = -EISCONN;
+ else if (val < 0 || val >= DCCPQ_POLICY_MAX)
+ err = -EINVAL;
+ else
+ dp->dccps_qpolicy = val;
+ break;
+ case DCCP_SOCKOPT_QPOLICY_TXQLEN:
+ if (val < 0)
+ err = -EINVAL;
+ else
+ dp->dccps_tx_qlen = val;
+ break;
default:
err = -ENOPROTOOPT;
break;
@@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
case DCCP_SOCKOPT_RECV_CSCOV:
val = dp->dccps_pcrlen;
break;
+ case DCCP_SOCKOPT_QPOLICY_ID:
+ val = dp->dccps_qpolicy;
+ break;
+ case DCCP_SOCKOPT_QPOLICY_TXQLEN:
+ val = dp->dccps_tx_qlen;
+ break;
case 128 ... 191:
return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
len, (u32 __user *)optval, optlen);
@@ -681,6 +702,47 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
#endif
+static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
+{
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
+
+ /*
+ * Assign an (opaque) qpolicy priority value to skb->priority.
+ *
+ * We are overloading this skb field for use with the qpolicy subystem.
+ * The skb->priority is normally used for the SO_PRIORITY option, which
+ * is initialised from sk_priority. Since the assignment of sk_priority
+ * to skb->priority happens later (on layer 3), we overload this field
+ * for use with queueing priorities as long as the skb is on layer 4.
+ * The default priority value (if nothing is set) is 0.
+ */
+ skb->priority = 0;
+
+ for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+
+ if (cmsg->cmsg_level != SOL_DCCP)
+ continue;
+
+ if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
+ !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
+ return -EINVAL;
+
+ switch (cmsg->cmsg_type) {
+ case DCCP_SCM_PRIORITY:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
+ return -EINVAL;
+ skb->priority = *(__u32 *)CMSG_DATA(cmsg);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len)
{
@@ -696,8 +758,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
lock_sock(sk);
- if (sysctl_dccp_tx_qlen &&
- (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
+ if (dccp_qpolicy_full(sk)) {
rc = -EAGAIN;
goto out_release;
}
@@ -725,7 +786,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (rc != 0)
goto out_discard;
- skb_queue_tail(&sk->sk_write_queue, skb);
+ rc = dccp_msghdr_parse(msg, skb);
+ if (rc != 0)
+ goto out_discard;
+
+ dccp_qpolicy_push(sk, skb);
/*
* The xmit_timer is set if the TX CCID is rate-based and will expire
* when congestion control permits to release further packets into the
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
new file mode 100644
index 000000000000..63c30bfa4703
--- /dev/null
+++ b/net/dccp/qpolicy.c
@@ -0,0 +1,137 @@
+/*
+ * net/dccp/qpolicy.c
+ *
+ * Policy-based packet dequeueing interface for DCCP.
+ *
+ * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License v2
+ * as published by the Free Software Foundation.
+ */
+#include "dccp.h"
+
+/*
+ * Simple Dequeueing Policy:
+ * If tx_qlen is different from 0, enqueue up to tx_qlen elements.
+ */
+static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
+{
+ skb_queue_tail(&sk->sk_write_queue, skb);
+}
+
+static bool qpolicy_simple_full(struct sock *sk)
+{
+ return dccp_sk(sk)->dccps_tx_qlen &&
+ sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
+}
+
+static struct sk_buff *qpolicy_simple_top(struct sock *sk)
+{
+ return skb_peek(&sk->sk_write_queue);
+}
+
+/*
+ * Priority-based Dequeueing Policy:
+ * If tx_qlen is different from 0 and the queue has reached its upper bound
+ * of tx_qlen elements, replace older packets lowest-priority-first.
+ */
+static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
+{
+ struct sk_buff *skb, *best = NULL;
+
+ skb_queue_walk(&sk->sk_write_queue, skb)
+ if (best == NULL || skb->priority > best->priority)
+ best = skb;
+ return best;
+}
+
+static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
+{
+ struct sk_buff *skb, *worst = NULL;
+
+ skb_queue_walk(&sk->sk_write_queue, skb)
+ if (worst == NULL || skb->priority < worst->priority)
+ worst = skb;
+ return worst;
+}
+
+static bool qpolicy_prio_full(struct sock *sk)
+{
+ if (qpolicy_simple_full(sk))
+ dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
+ return false;
+}
+
+/**
+ * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface
+ * @push: add a new @skb to the write queue
+ * @full: indicates that no more packets will be admitted
+ * @top: peeks at whatever the queueing policy defines as its `top'
+ */
+static struct dccp_qpolicy_operations {
+ void (*push) (struct sock *sk, struct sk_buff *skb);
+ bool (*full) (struct sock *sk);
+ struct sk_buff* (*top) (struct sock *sk);
+ __be32 params;
+
+} qpol_table[DCCPQ_POLICY_MAX] = {
+ [DCCPQ_POLICY_SIMPLE] = {
+ .push = qpolicy_simple_push,
+ .full = qpolicy_simple_full,
+ .top = qpolicy_simple_top,
+ .params = 0,
+ },
+ [DCCPQ_POLICY_PRIO] = {
+ .push = qpolicy_simple_push,
+ .full = qpolicy_prio_full,
+ .top = qpolicy_prio_best_skb,
+ .params = DCCP_SCM_PRIORITY,
+ },
+};
+
+/*
+ * Externally visible interface
+ */
+void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
+{
+ qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
+}
+
+bool dccp_qpolicy_full(struct sock *sk)
+{
+ return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
+}
+
+void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
+{
+ if (skb != NULL) {
+ skb_unlink(skb, &sk->sk_write_queue);
+ kfree_skb(skb);
+ }
+}
+
+struct sk_buff *dccp_qpolicy_top(struct sock *sk)
+{
+ return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
+}
+
+struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
+{
+ struct sk_buff *skb = dccp_qpolicy_top(sk);
+
+ if (skb != NULL) {
+ /* Clear any skb fields that we used internally */
+ skb->priority = 0;
+ skb_unlink(skb, &sk->sk_write_queue);
+ }
+ return skb;
+}
+
+bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param)
+{
+ /* check if exactly one bit is set */
+ if (!param || (param & (param - 1)))
+ return false;
+ return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param;
+}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 9ecef9968c39..0065e7e14af4 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1556,6 +1556,8 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
if (r_len > sizeof(struct linkinfo_dn))
r_len = sizeof(struct linkinfo_dn);
+ memset(&link, 0, sizeof(link));
+
switch(sock->state) {
case SS_CONNECTING:
link.idn_linkstate = LL_CONNECTING;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 8280e43c8861..e2e926841fe6 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -240,13 +240,13 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) {
if (!(dst_metric_locked(dst, RTAX_MTU))) {
- dst->metrics[RTAX_MTU-1] = mtu;
+ dst_metric_set(dst, RTAX_MTU, mtu);
dst_set_expires(dst, dn_rt_mtu_expires);
}
if (!(dst_metric_locked(dst, RTAX_ADVMSS))) {
u32 mss = mtu - DN_MAX_NSP_DATA_HEADER;
if (dst_metric(dst, RTAX_ADVMSS) > mss)
- dst->metrics[RTAX_ADVMSS-1] = mss;
+ dst_metric_set(dst, RTAX_ADVMSS, mss);
}
}
}
@@ -806,8 +806,7 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
if (DN_FIB_RES_GW(*res) &&
DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = DN_FIB_RES_GW(*res);
- memcpy(rt->dst.metrics, fi->fib_metrics,
- sizeof(rt->dst.metrics));
+ dst_import_metrics(&rt->dst, fi->fib_metrics);
}
rt->rt_type = res->type;
@@ -820,11 +819,11 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
if (dst_metric(&rt->dst, RTAX_MTU) == 0 ||
dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
- rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
+ dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 ||
dst_metric(&rt->dst, RTAX_ADVMSS) > mss)
- rt->dst.metrics[RTAX_ADVMSS-1] = mss;
+ dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
return 0;
}
@@ -1502,7 +1501,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src);
if (rt->rt_daddr != rt->rt_gateway)
RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway);
- if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
+ if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
goto rtattr_failure;
expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index f8c1ae4b41f0..f180371fa415 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -31,6 +31,7 @@
#include <linux/skbuff.h>
#include <linux/udp.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <linux/stat.h>
@@ -276,12 +277,12 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
#endif
#ifdef CONFIG_ECONET_AUNUDP
struct msghdr udpmsg;
- struct iovec iov[msg->msg_iovlen+1];
+ struct iovec iov[2];
struct aunhdr ah;
struct sockaddr_in udpdest;
__kernel_size_t size;
- int i;
mm_segment_t oldfs;
+ char *userbuf;
#endif
/*
@@ -297,23 +298,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
mutex_lock(&econet_mutex);
- if (saddr == NULL) {
- struct econet_sock *eo = ec_sk(sk);
-
- addr.station = eo->station;
- addr.net = eo->net;
- port = eo->port;
- cb = eo->cb;
- } else {
- if (msg->msg_namelen < sizeof(struct sockaddr_ec)) {
- mutex_unlock(&econet_mutex);
- return -EINVAL;
- }
- addr.station = saddr->addr.station;
- addr.net = saddr->addr.net;
- port = saddr->port;
- cb = saddr->cb;
- }
+ if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) {
+ mutex_unlock(&econet_mutex);
+ return -EINVAL;
+ }
+ addr.station = saddr->addr.station;
+ addr.net = saddr->addr.net;
+ port = saddr->port;
+ cb = saddr->cb;
/* Look for a device with the right network number. */
dev = net2dev_map[addr.net];
@@ -328,17 +320,17 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
}
}
- if (len + 15 > dev->mtu) {
- mutex_unlock(&econet_mutex);
- return -EMSGSIZE;
- }
-
if (dev->type == ARPHRD_ECONET) {
/* Real hardware Econet. We're not worthy etc. */
#ifdef CONFIG_ECONET_NATIVE
unsigned short proto = 0;
int res;
+ if (len + 15 > dev->mtu) {
+ mutex_unlock(&econet_mutex);
+ return -EMSGSIZE;
+ }
+
dev_hold(dev);
skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev),
@@ -351,7 +343,6 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
eb = (struct ec_cb *)&skb->cb;
- /* BUG: saddr may be NULL */
eb->cookie = saddr->cookie;
eb->sec = *saddr;
eb->sent = ec_tx_done;
@@ -415,6 +406,11 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
return -ENETDOWN; /* No socket - can't send */
}
+ if (len > 32768) {
+ err = -E2BIG;
+ goto error;
+ }
+
/* Make up a UDP datagram and hand it off to some higher intellect. */
memset(&udpdest, 0, sizeof(udpdest));
@@ -446,36 +442,26 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
/* tack our header on the front of the iovec */
size = sizeof(struct aunhdr);
- /*
- * XXX: that is b0rken. We can't mix userland and kernel pointers
- * in iovec, since on a lot of platforms copy_from_user() will
- * *not* work with the kernel and userland ones at the same time,
- * regardless of what we do with set_fs(). And we are talking about
- * econet-over-ethernet here, so "it's only ARM anyway" doesn't
- * apply. Any suggestions on fixing that code? -- AV
- */
iov[0].iov_base = (void *)&ah;
iov[0].iov_len = size;
- for (i = 0; i < msg->msg_iovlen; i++) {
- void __user *base = msg->msg_iov[i].iov_base;
- size_t iov_len = msg->msg_iov[i].iov_len;
- /* Check it now since we switch to KERNEL_DS later. */
- if (!access_ok(VERIFY_READ, base, iov_len)) {
- mutex_unlock(&econet_mutex);
- return -EFAULT;
- }
- iov[i+1].iov_base = base;
- iov[i+1].iov_len = iov_len;
- size += iov_len;
+
+ userbuf = vmalloc(len);
+ if (userbuf == NULL) {
+ err = -ENOMEM;
+ goto error;
}
+ iov[1].iov_base = userbuf;
+ iov[1].iov_len = len;
+ err = memcpy_fromiovec(userbuf, msg->msg_iov, len);
+ if (err)
+ goto error_free_buf;
+
/* Get a skbuff (no data, just holds our cb information) */
if ((skb = sock_alloc_send_skb(sk, 0,
msg->msg_flags & MSG_DONTWAIT,
- &err)) == NULL) {
- mutex_unlock(&econet_mutex);
- return err;
- }
+ &err)) == NULL)
+ goto error_free_buf;
eb = (struct ec_cb *)&skb->cb;
@@ -491,7 +477,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
udpmsg.msg_name = (void *)&udpdest;
udpmsg.msg_namelen = sizeof(udpdest);
udpmsg.msg_iov = &iov[0];
- udpmsg.msg_iovlen = msg->msg_iovlen + 1;
+ udpmsg.msg_iovlen = 2;
udpmsg.msg_control = NULL;
udpmsg.msg_controllen = 0;
udpmsg.msg_flags=0;
@@ -499,9 +485,13 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */
err = sock_sendmsg(udpsock, &udpmsg, size);
set_fs(oldfs);
+
+error_free_buf:
+ vfree(userbuf);
#else
err = -EPROTOTYPE;
#endif
+ error:
mutex_unlock(&econet_mutex);
return err;
@@ -671,6 +661,11 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg)
err = 0;
switch (cmd) {
case SIOCSIFADDR:
+ if (!capable(CAP_NET_ADMIN)) {
+ err = -EPERM;
+ break;
+ }
+
edev = dev->ec_ptr;
if (edev == NULL) {
/* Magic up a new one. */
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index 93c91b633a56..6df6ecf49708 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -52,11 +52,11 @@ struct net_device *ieee802154_get_dev(struct net *net,
switch (addr->addr_type) {
case IEEE802154_ADDR_LONG:
- rtnl_lock();
- dev = dev_getbyhwaddr(net, ARPHRD_IEEE802154, addr->hwaddr);
+ rcu_read_lock();
+ dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, addr->hwaddr);
if (dev)
dev_hold(dev);
- rtnl_unlock();
+ rcu_read_unlock();
break;
case IEEE802154_ADDR_SHORT:
if (addr->pan_id == 0xffff ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7833f17b648a..a2fc7b961dbc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -883,7 +883,7 @@ static int arp_process(struct sk_buff *skb)
dont_send = arp_ignore(in_dev, sip, tip);
if (!dont_send && IN_DEV_ARPFILTER(in_dev))
- dont_send |= arp_filter(sip, tip, dev);
+ dont_send = arp_filter(sip, tip, dev);
if (!dont_send) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n) {
@@ -1017,13 +1017,14 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
IPV4_DEVCONF_ALL(net, PROXY_ARP) = on;
return 0;
}
- if (__in_dev_get_rtnl(dev)) {
- IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on);
+ if (__in_dev_get_rcu(dev)) {
+ IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on);
return 0;
}
return -ENXIO;
}
+/* must be called with rcu_read_lock() */
static int arp_req_set_public(struct net *net, struct arpreq *r,
struct net_device *dev)
{
@@ -1033,7 +1034,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
if (mask && mask != htonl(0xFFFFFFFF))
return -EINVAL;
if (!dev && (r->arp_flags & ATF_COM)) {
- dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
+ dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
r->arp_ha.sa_data);
if (!dev)
return -ENODEV;
@@ -1225,10 +1226,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!(r.arp_flags & ATF_NETMASK))
((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
htonl(0xFFFFFFFFUL);
- rtnl_lock();
+ rcu_read_lock();
if (r.arp_dev[0]) {
err = -ENODEV;
- dev = __dev_get_by_name(net, r.arp_dev);
+ dev = dev_get_by_name_rcu(net, r.arp_dev);
if (dev == NULL)
goto out;
@@ -1252,12 +1253,12 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
break;
case SIOCGARP:
err = arp_req_get(&r, dev);
- if (!err && copy_to_user(arg, &r, sizeof(r)))
- err = -EFAULT;
break;
}
out:
- rtnl_unlock();
+ rcu_read_unlock();
+ if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r)))
+ err = -EFAULT;
return err;
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d9f71bae45c4..3b067704ab38 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1258,7 +1258,7 @@ errout:
static size_t inet_get_link_af_size(const struct net_device *dev)
{
- struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
if (!in_dev)
return 0;
@@ -1268,7 +1268,7 @@ static size_t inet_get_link_af_size(const struct net_device *dev)
static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
{
- struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
struct nlattr *nla;
int i;
@@ -1295,7 +1295,7 @@ static int inet_validate_link_af(const struct net_device *dev,
struct nlattr *a, *tb[IFLA_INET_MAX+1];
int err, rem;
- if (dev && !__in_dev_get_rcu(dev))
+ if (dev && !__in_dev_get_rtnl(dev))
return -EAFNOSUPPORT;
err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
@@ -1319,7 +1319,7 @@ static int inet_validate_link_af(const struct net_device *dev,
static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
{
- struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
struct nlattr *a, *tb[IFLA_INET_MAX+1];
int rem;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 200eb538fbb3..0f280348e0fd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -365,7 +365,7 @@ static struct tnode *tnode_alloc(size_t size)
if (size <= PAGE_SIZE)
return kzalloc(size, GFP_KERNEL);
else
- return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+ return vzalloc(size);
}
static void __tnode_vfree(struct work_struct *arg)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 06f5f8f482f0..25e318153f14 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -55,7 +55,6 @@ EXPORT_SYMBOL(inet_get_local_port_range);
int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb)
{
- const __be32 sk_rcv_saddr = inet_rcv_saddr(sk);
struct sock *sk2;
struct hlist_node *node;
int reuse = sk->sk_reuse;
@@ -75,9 +74,9 @@ int inet_csk_bind_conflict(const struct sock *sk,
sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
if (!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) {
- const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
- if (!sk2_rcv_saddr || !sk_rcv_saddr ||
- sk2_rcv_saddr == sk_rcv_saddr)
+ const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+ if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
+ sk2_rcv_saddr == sk_rcv_saddr(sk))
break;
}
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 1b344f30b463..3c0369a3a663 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -133,8 +133,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)
}
}
}
- sk_add_bind_node(child, &tb->owners);
- inet_csk(child)->icsk_bind_hash = tb;
+ inet_bind_hash(child, tb, port);
spin_unlock(&head->lock);
return 0;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 258c98d5fa79..ff4e7a4e33ed 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -818,7 +818,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
!ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED;
- skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
+ dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
}
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 1b48eb1ed453..b14ec7d03b6e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -253,6 +253,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
+ SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3843c2dfde82..26ac396eaa5e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1686,11 +1686,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
if (mtu < dst_mtu(&rth->dst)) {
dst_confirm(&rth->dst);
if (mtu < ip_rt_min_pmtu) {
+ u32 lock = dst_metric(&rth->dst,
+ RTAX_LOCK);
mtu = ip_rt_min_pmtu;
- rth->dst.metrics[RTAX_LOCK-1] |=
- (1 << RTAX_MTU);
+ lock |= (1 << RTAX_MTU);
+ dst_metric_set(&rth->dst, RTAX_LOCK,
+ lock);
}
- rth->dst.metrics[RTAX_MTU-1] = mtu;
+ dst_metric_set(&rth->dst, RTAX_MTU, mtu);
dst_set_expires(&rth->dst,
ip_rt_mtu_expires);
}
@@ -1708,10 +1711,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
if (dst_mtu(dst) > mtu && mtu >= 68 &&
!(dst_metric_locked(dst, RTAX_MTU))) {
if (mtu < ip_rt_min_pmtu) {
+ u32 lock = dst_metric(dst, RTAX_LOCK);
mtu = ip_rt_min_pmtu;
- dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU);
+ dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU));
}
- dst->metrics[RTAX_MTU-1] = mtu;
+ dst_metric_set(dst, RTAX_MTU, mtu);
dst_set_expires(dst, ip_rt_mtu_expires);
call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
}
@@ -1796,36 +1800,37 @@ static void set_class_tag(struct rtable *rt, u32 tag)
static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
{
+ struct dst_entry *dst = &rt->dst;
struct fib_info *fi = res->fi;
if (fi) {
if (FIB_RES_GW(*res) &&
FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = FIB_RES_GW(*res);
- memcpy(rt->dst.metrics, fi->fib_metrics,
- sizeof(rt->dst.metrics));
+ dst_import_metrics(dst, fi->fib_metrics);
if (fi->fib_mtu == 0) {
- rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
- if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
+ dst_metric_set(dst, RTAX_MTU, dst->dev->mtu);
+ if (dst_metric_locked(dst, RTAX_MTU) &&
rt->rt_gateway != rt->rt_dst &&
- rt->dst.dev->mtu > 576)
- rt->dst.metrics[RTAX_MTU-1] = 576;
+ dst->dev->mtu > 576)
+ dst_metric_set(dst, RTAX_MTU, 576);
}
#ifdef CONFIG_NET_CLS_ROUTE
- rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
+ dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
#endif
} else
- rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
-
- if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
- rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
- if (dst_mtu(&rt->dst) > IP_MAX_MTU)
- rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
- if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
- rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
- ip_rt_min_advmss);
- if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
- rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
+ dst_metric_set(dst, RTAX_MTU, dst->dev->mtu);
+
+ if (dst_metric(dst, RTAX_HOPLIMIT) == 0)
+ dst_metric_set(dst, RTAX_HOPLIMIT, sysctl_ip_default_ttl);
+ if (dst_mtu(dst) > IP_MAX_MTU)
+ dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU);
+ if (dst_metric(dst, RTAX_ADVMSS) == 0)
+ dst_metric_set(dst, RTAX_ADVMSS,
+ max_t(unsigned int, dst->dev->mtu - 40,
+ ip_rt_min_advmss));
+ if (dst_metric(dst, RTAX_ADVMSS) > 65535 - 40)
+ dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -2720,7 +2725,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
new->__use = 1;
new->input = dst_discard;
new->output = dst_discard;
- memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+ dst_copy_metrics(new, &ort->dst);
new->dev = ort->dst.dev;
if (new->dev)
@@ -2827,7 +2832,7 @@ static int rt_fill_info(struct net *net,
if (rt->rt_dst != rt->rt_gateway)
NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
- if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
+ if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
goto nla_put_failure;
if (rt->fl.mark)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e91911d7aae2..1b4ec21497a4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -26,6 +26,8 @@ static int zero;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
+static int tcp_adv_win_scale_min = -31;
+static int tcp_adv_win_scale_max = 31;
/* Update system visible IP port range */
static void set_local_port_range(int range[2])
@@ -426,7 +428,9 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_tcp_adv_win_scale,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &tcp_adv_win_scale_min,
+ .extra2 = &tcp_adv_win_scale_max,
},
{
.procname = "tcp_tw_reuse",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2bb46d55f40c..6c11eece262c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2244,7 +2244,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
* know which interface is going to be used */
- if (val < 64 || val > MAX_TCP_WINDOW) {
+ if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
err = -EINVAL;
break;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6d8ab1c4efc3..824e8c8a17ad 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -734,7 +734,7 @@ void tcp_update_metrics(struct sock *sk)
* Reset our results.
*/
if (!(dst_metric_locked(dst, RTAX_RTT)))
- dst->metrics[RTAX_RTT - 1] = 0;
+ dst_metric_set(dst, RTAX_RTT, 0);
return;
}
@@ -776,34 +776,38 @@ void tcp_update_metrics(struct sock *sk)
if (dst_metric(dst, RTAX_SSTHRESH) &&
!dst_metric_locked(dst, RTAX_SSTHRESH) &&
(tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
- dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
+ dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1);
if (!dst_metric_locked(dst, RTAX_CWND) &&
tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
- dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd;
+ dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd);
} else if (tp->snd_cwnd > tp->snd_ssthresh &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
if (!dst_metric_locked(dst, RTAX_SSTHRESH))
- dst->metrics[RTAX_SSTHRESH-1] =
- max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
+ dst_metric_set(dst, RTAX_SSTHRESH,
+ max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
if (!dst_metric_locked(dst, RTAX_CWND))
- dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1;
+ dst_metric_set(dst, RTAX_CWND,
+ (dst_metric(dst, RTAX_CWND) +
+ tp->snd_cwnd) >> 1);
} else {
/* Else slow start did not finish, cwnd is non-sense,
ssthresh may be also invalid.
*/
if (!dst_metric_locked(dst, RTAX_CWND))
- dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1;
+ dst_metric_set(dst, RTAX_CWND,
+ (dst_metric(dst, RTAX_CWND) +
+ tp->snd_ssthresh) >> 1);
if (dst_metric(dst, RTAX_SSTHRESH) &&
!dst_metric_locked(dst, RTAX_SSTHRESH) &&
tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
- dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
+ dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh);
}
if (!dst_metric_locked(dst, RTAX_REORDERING)) {
if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
tp->reordering != sysctl_tcp_reordering)
- dst->metrics[RTAX_REORDERING-1] = tp->reordering;
+ dst_metric_set(dst, RTAX_REORDERING, tp->reordering);
}
}
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dd555051ec8b..4fc3387aa994 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2013,7 +2013,9 @@ get_req:
}
get_sk:
sk_nulls_for_each_from(sk, node) {
- if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (sk->sk_family == st->family) {
cur = sk;
goto out;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 3527b51d6159..80b1f80759ab 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -392,7 +392,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
* socket up. We've got bigger problems than
* non-graceful socket closings.
*/
- LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n");
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
}
tcp_update_metrics(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 749b6498588e..97041f24cd27 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -231,11 +231,10 @@ void tcp_select_initial_window(int __space, __u32 mss,
/* when initializing use the value from init_rcv_wnd
* rather than the default from above
*/
- if (init_rcv_wnd &&
- (*rcv_wnd > init_rcv_wnd * mss))
- *rcv_wnd = init_rcv_wnd * mss;
- else if (*rcv_wnd > init_cwnd * mss)
- *rcv_wnd = init_cwnd * mss;
+ if (init_rcv_wnd)
+ *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
+ else
+ *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
}
/* Set the clamp no higher than max representable value */
@@ -386,27 +385,30 @@ struct tcp_out_options {
*/
static u8 tcp_cookie_size_check(u8 desired)
{
- if (desired > 0) {
+ int cookie_size;
+
+ if (desired > 0)
/* previously specified */
return desired;
- }
- if (sysctl_tcp_cookie_size <= 0) {
+
+ cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
+ if (cookie_size <= 0)
/* no default specified */
return 0;
- }
- if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) {
+
+ if (cookie_size <= TCP_COOKIE_MIN)
/* value too small, specify minimum */
return TCP_COOKIE_MIN;
- }
- if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) {
+
+ if (cookie_size >= TCP_COOKIE_MAX)
/* value too large, specify maximum */
return TCP_COOKIE_MAX;
- }
- if (0x1 & sysctl_tcp_cookie_size) {
+
+ if (cookie_size & 1)
/* 8-bit multiple, illegal, fix it */
- return (u8)(sysctl_tcp_cookie_size + 0x1);
- }
- return (u8)sysctl_tcp_cookie_size;
+ cookie_size++;
+
+ return (u8)cookie_size;
}
/* Write previously computed TCP options to the packet.
@@ -1516,6 +1518,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 send_win, cong_win, limit, in_flight;
+ int win_divisor;
if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
goto send_now;
@@ -1547,13 +1550,14 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
goto send_now;
- if (sysctl_tcp_tso_win_divisor) {
+ win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
+ if (win_divisor) {
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
/* If at least some fraction of a window is available,
* just use it.
*/
- chunk /= sysctl_tcp_tso_win_divisor;
+ chunk /= win_divisor;
if (limit >= chunk)
goto send_now;
} else {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b1155554bb18..4f4483e697bd 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1173,6 +1173,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
sizeof (struct ipv6hdr);
dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu-=8;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
@@ -1361,12 +1363,17 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
static void ip6_tnl_dev_setup(struct net_device *dev)
{
+ struct ip6_tnl *t;
+
dev->netdev_ops = &ip6_tnl_netdev_ops;
dev->destructor = ip6_dev_free;
dev->type = ARPHRD_TUNNEL6;
dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
+ t = netdev_priv(dev);
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu-=8;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
dev->features |= NETIF_F_NETNS_LOCAL;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index e18f84130203..2342545a5ee9 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1259,7 +1259,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (ra_msg->icmph.icmp6_hop_limit) {
in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
if (rt)
- rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
+ ra_msg->icmph.icmp6_hop_limit);
}
skip_defrtr:
@@ -1377,7 +1378,7 @@ skip_linkparms:
in6_dev->cnf.mtu6 = mtu;
if (rt)
- rt->dst.metrics[RTAX_MTU-1] = mtu;
+ dst_metric_set(&rt->dst, RTAX_MTU, mtu);
rt6_mtu_change(skb->dev, mtu);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 026caef0326c..4aed0812b512 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -129,7 +129,6 @@ static struct rt6_info ip6_null_entry_template = {
.__use = 1,
.obsolete = -1,
.error = -ENETUNREACH,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = ip6_pkt_discard,
.output = ip6_pkt_discard_out,
},
@@ -150,7 +149,6 @@ static struct rt6_info ip6_prohibit_entry_template = {
.__use = 1,
.obsolete = -1,
.error = -EACCES,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = ip6_pkt_prohibit,
.output = ip6_pkt_prohibit_out,
},
@@ -166,7 +164,6 @@ static struct rt6_info ip6_blk_hole_entry_template = {
.__use = 1,
.obsolete = -1,
.error = -EINVAL,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = dst_discard,
.output = dst_discard,
},
@@ -844,7 +841,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
new->input = dst_discard;
new->output = dst_discard;
- memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+ dst_copy_metrics(new, &ort->dst);
new->dev = ort->dst.dev;
if (new->dev)
dev_hold(new->dev);
@@ -928,10 +925,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU) {
+ u32 features = dst_metric(dst, RTAX_FEATURES);
mtu = IPV6_MIN_MTU;
- dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+ features |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(dst, RTAX_FEATURES, features);
}
- dst->metrics[RTAX_MTU-1] = mtu;
+ dst_metric_set(dst, RTAX_MTU, mtu);
call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
}
}
@@ -989,9 +988,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->rt6i_idev = idev;
rt->rt6i_nexthop = neigh;
atomic_set(&rt->dst.__refcnt, 1);
- rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
- rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
- rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
+ dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
+ dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
rt->dst.output = ip6_output;
#if 0 /* there's no chance to use these for ndisc */
@@ -1305,17 +1304,17 @@ install_route:
goto out;
}
- rt->dst.metrics[type - 1] = nla_get_u32(nla);
+ dst_metric_set(&rt->dst, type, nla_get_u32(nla));
}
}
}
if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
- rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
if (!dst_mtu(&rt->dst))
- rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
+ dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev));
if (!dst_metric(&rt->dst, RTAX_ADVMSS))
- rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+ dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
rt->dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
@@ -1541,9 +1540,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
nrt->rt6i_nexthop = neigh_clone(neigh);
/* Reset pmtu, it may be better */
- nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
- nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
- dst_mtu(&nrt->dst));
+ dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev));
+ dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev),
+ dst_mtu(&nrt->dst)));
if (ip6_ins_rt(nrt))
goto out;
@@ -1602,9 +1601,12 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
would return automatically.
*/
if (rt->rt6i_flags & RTF_CACHE) {
- rt->dst.metrics[RTAX_MTU-1] = pmtu;
- if (allfrag)
- rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
+ if (allfrag) {
+ u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
+ features |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(&rt->dst, RTAX_FEATURES, features);
+ }
dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
goto out;
@@ -1621,9 +1623,12 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
nrt = rt6_alloc_clone(rt, daddr);
if (nrt) {
- nrt->dst.metrics[RTAX_MTU-1] = pmtu;
- if (allfrag)
- nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
+ if (allfrag) {
+ u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
+ features |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
+ }
/* According to RFC 1981, detecting PMTU increase shouldn't be
* happened within 5 mins, the recommended timer is 10 mins.
@@ -1674,7 +1679,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
rt->dst.input = ort->dst.input;
rt->dst.output = ort->dst.output;
- memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+ dst_copy_metrics(&rt->dst, &ort->dst);
rt->dst.error = ort->dst.error;
rt->dst.dev = ort->dst.dev;
if (rt->dst.dev)
@@ -1966,9 +1971,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->dst.output = ip6_output;
rt->rt6i_dev = net->loopback_dev;
rt->rt6i_idev = idev;
- rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
- rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
- rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+ dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
+ dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
rt->dst.obsolete = -1;
rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
@@ -2068,8 +2073,8 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
(dst_mtu(&rt->dst) >= arg->mtu ||
(dst_mtu(&rt->dst) < arg->mtu &&
dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
- rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
- rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
+ dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
+ dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu));
}
return 0;
}
@@ -2295,7 +2300,7 @@ static int rt6_fill_node(struct net *net,
NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
}
- if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
+ if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
goto nla_put_failure;
if (rt->dst.neighbour)
@@ -2686,6 +2691,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_null_entry->dst.path =
(struct dst_entry *)net->ipv6.ip6_null_entry;
net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2696,6 +2702,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_prohibit_entry->dst.path =
(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2705,6 +2712,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_blk_hole_entry->dst.path =
(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
#endif
net->ipv6.sysctl.flush_delay = 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 6e48a80d0f25..8ce38f10a547 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -606,8 +606,9 @@ static int ipip6_rcv(struct sk_buff *skb)
return 0;
}
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+ /* no tunnel matched, let upstream know, ipsec may handle it */
rcu_read_unlock();
+ return 1;
out:
kfree_skb(skb);
return 0;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b541a4e009fb..7aad12770867 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -54,8 +54,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;
- __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
+ __be32 sk1_rcv_saddr = sk_rcv_saddr(sk);
+ __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
int sk_ipv6only = ipv6_only_sock(sk);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
int addr_type = ipv6_addr_type(sk_rcv_saddr6);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 04635e88e8ed..110efb704c9b 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -672,4 +672,8 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
MODULE_DESCRIPTION("L2TP over IP");
MODULE_VERSION("1.0");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, SOCK_DGRAM, IPPROTO_L2TP);
+
+/* Use the value of SOCK_DGRAM (2) directory, because __stringify does't like
+ * enums
+ */
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 582612998211..dfd3a648a551 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -316,9 +316,9 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
if (unlikely(addr->sllc_family != AF_LLC))
goto out;
rc = -ENODEV;
- rtnl_lock();
+ rcu_read_lock();
if (sk->sk_bound_dev_if) {
- llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+ llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
if (llc->dev) {
if (!addr->sllc_arphrd)
addr->sllc_arphrd = llc->dev->type;
@@ -329,14 +329,15 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
!llc_mac_match(addr->sllc_mac,
llc->dev->dev_addr)) {
rc = -EINVAL;
- dev_put(llc->dev);
llc->dev = NULL;
}
}
} else
- llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd,
+ llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
addr->sllc_mac);
- rtnl_unlock();
+ if (llc->dev)
+ dev_hold(llc->dev);
+ rcu_read_unlock();
if (!llc->dev)
goto out;
if (!addr->sllc_sap) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 422705d62b5b..246a04a13234 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -167,7 +167,6 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
#define PGV_FROM_VMALLOC 1
struct pgv {
char *buffer;
- unsigned char flags;
};
struct packet_ring_buffer {
@@ -224,6 +223,13 @@ struct packet_skb_cb {
#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
+static inline __pure struct page *pgv_to_page(void *addr)
+{
+ if (is_vmalloc_addr(addr))
+ return vmalloc_to_page(addr);
+ return virt_to_page(addr);
+}
+
static void __packet_set_status(struct packet_sock *po, void *frame, int status)
{
union {
@@ -236,11 +242,11 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
switch (po->tp_version) {
case TPACKET_V1:
h.h1->tp_status = status;
- flush_dcache_page(virt_to_page(&h.h1->tp_status));
+ flush_dcache_page(pgv_to_page(&h.h1->tp_status));
break;
case TPACKET_V2:
h.h2->tp_status = status;
- flush_dcache_page(virt_to_page(&h.h2->tp_status));
+ flush_dcache_page(pgv_to_page(&h.h2->tp_status));
break;
default:
pr_err("TPACKET version not supported\n");
@@ -263,10 +269,10 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
- flush_dcache_page(virt_to_page(&h.h1->tp_status));
+ flush_dcache_page(pgv_to_page(&h.h1->tp_status));
return h.h1->tp_status;
case TPACKET_V2:
- flush_dcache_page(virt_to_page(&h.h2->tp_status));
+ flush_dcache_page(pgv_to_page(&h.h2->tp_status));
return h.h2->tp_status;
default:
pr_err("TPACKET version not supported\n");
@@ -511,7 +517,8 @@ out_free:
return err;
}
-static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
+static inline unsigned int run_filter(const struct sk_buff *skb,
+ const struct sock *sk,
unsigned int res)
{
struct sk_filter *filter;
@@ -526,15 +533,15 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
}
/*
- This function makes lazy skb cloning in hope that most of packets
- are discarded by BPF.
-
- Note tricky part: we DO mangle shared skb! skb->data, skb->len
- and skb->cb are mangled. It works because (and until) packets
- falling here are owned by current CPU. Output packets are cloned
- by dev_queue_xmit_nit(), input packets are processed by net_bh
- sequencially, so that if we return skb to original state on exit,
- we will not harm anyone.
+ * This function makes lazy skb cloning in hope that most of packets
+ * are discarded by BPF.
+ *
+ * Note tricky part: we DO mangle shared skb! skb->data, skb->len
+ * and skb->cb are mangled. It works because (and until) packets
+ * falling here are owned by current CPU. Output packets are cloned
+ * by dev_queue_xmit_nit(), input packets are processed by net_bh
+ * sequencially, so that if we return skb to original state on exit,
+ * we will not harm anyone.
*/
static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -560,11 +567,11 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
if (dev->header_ops) {
/* The device has an explicit notion of ll header,
- exported to higher levels.
-
- Otherwise, the device hides datails of it frame
- structure, so that corresponding packet head
- never delivered to user.
+ * exported to higher levels.
+ *
+ * Otherwise, the device hides details of its frame
+ * structure, so that corresponding packet head is
+ * never delivered to user.
*/
if (sk->sk_type != SOCK_DGRAM)
skb_push(skb, skb->data - skb_mac_header(skb));
@@ -799,17 +806,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
__packet_set_status(po, h.raw, status);
smp_mb();
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
{
- struct page *p_start, *p_end;
- u8 *h_end = h.raw + macoff + snaplen - 1;
-
- p_start = virt_to_page(h.raw);
- p_end = virt_to_page(h_end);
- while (p_start <= p_end) {
- flush_dcache_page(p_start);
- p_start++;
- }
+ u8 *start, *end;
+
+ end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
+ for (start = h.raw; start < end; start += PAGE_SIZE)
+ flush_dcache_page(pgv_to_page(start));
}
+#endif
sk->sk_data_ready(sk, 0);
@@ -915,7 +920,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
}
err = -EFAULT;
- page = virt_to_page(data);
offset = offset_in_page(data);
len_max = PAGE_SIZE - offset;
len = ((to_write > len_max) ? len_max : to_write);
@@ -934,11 +938,11 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
return -EFAULT;
}
+ page = pgv_to_page(data);
+ data += len;
flush_dcache_page(page);
get_page(page);
- skb_fill_page_desc(skb,
- nr_frags,
- page++, offset, len);
+ skb_fill_page_desc(skb, nr_frags, page, offset, len);
to_write -= len;
offset = 0;
len_max = PAGE_SIZE;
@@ -2340,7 +2344,7 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
for (i = 0; i < len; i++) {
if (likely(pg_vec[i].buffer)) {
- if (pg_vec[i].flags & PGV_FROM_VMALLOC)
+ if (is_vmalloc_addr(pg_vec[i].buffer))
vfree(pg_vec[i].buffer);
else
free_pages((unsigned long)pg_vec[i].buffer,
@@ -2351,8 +2355,7 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
kfree(pg_vec);
}
-static inline char *alloc_one_pg_vec_page(unsigned long order,
- unsigned char *flags)
+static inline char *alloc_one_pg_vec_page(unsigned long order)
{
char *buffer = NULL;
gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
@@ -2366,7 +2369,6 @@ static inline char *alloc_one_pg_vec_page(unsigned long order,
/*
* __get_free_pages failed, fall back to vmalloc
*/
- *flags |= PGV_FROM_VMALLOC;
buffer = vzalloc((1 << order) * PAGE_SIZE);
if (buffer)
@@ -2375,7 +2377,6 @@ static inline char *alloc_one_pg_vec_page(unsigned long order,
/*
* vmalloc failed, lets dig into swap here
*/
- *flags = 0;
gfp_flags &= ~__GFP_NORETRY;
buffer = (char *)__get_free_pages(gfp_flags, order);
if (buffer)
@@ -2398,8 +2399,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
goto out;
for (i = 0; i < block_nr; i++) {
- pg_vec[i].buffer = alloc_one_pg_vec_page(order,
- &pg_vec[i].flags);
+ pg_vec[i].buffer = alloc_one_pg_vec_page(order);
if (unlikely(!pg_vec[i].buffer))
goto out_free_pgvec;
}
@@ -2409,7 +2409,6 @@ out:
out_free_pgvec:
free_pg_vec(pg_vec, order, block_nr);
- kfree(pg_vec);
pg_vec = NULL;
goto out;
}
@@ -2583,13 +2582,8 @@ static int packet_mmap(struct file *file, struct socket *sock,
void *kaddr = rb->pg_vec[i].buffer;
int pg_num;
- for (pg_num = 0; pg_num < rb->pg_vec_pages;
- pg_num++) {
- if (rb->pg_vec[i].flags & PGV_FROM_VMALLOC)
- page = vmalloc_to_page(kaddr);
- else
- page = virt_to_page(kaddr);
-
+ for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
+ page = pgv_to_page(kaddr);
err = vm_insert_page(vma, start, page);
if (unlikely(err))
goto out;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6bd554323a34..842c7f3650b9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6047,7 +6047,7 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
* will suddenly eat the receive_queue.
*
* Look at current nfs client by the way...
- * However, this function was corrent in any case. 8)
+ * However, this function was correct in any case. 8)
*/
if (flags & MSG_PEEK) {
spin_lock_bh(&sk->sk_receive_queue.lock);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7ff31c60186a..417d7a6c36cf 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1344,9 +1344,25 @@ static void unix_destruct_scm(struct sk_buff *skb)
sock_wfree(skb);
}
+#define MAX_RECURSION_LEVEL 4
+
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
int i;
+ unsigned char max_level = 0;
+ int unix_sock_count = 0;
+
+ for (i = scm->fp->count - 1; i >= 0; i--) {
+ struct sock *sk = unix_get_socket(scm->fp->fp[i]);
+
+ if (sk) {
+ unix_sock_count++;
+ max_level = max(max_level,
+ unix_sk(sk)->recursion_level);
+ }
+ }
+ if (unlikely(max_level > MAX_RECURSION_LEVEL))
+ return -ETOOMANYREFS;
/*
* Need to duplicate file references for the sake of garbage
@@ -1357,9 +1373,11 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
if (!UNIXCB(skb).fp)
return -ENOMEM;
- for (i = scm->fp->count-1; i >= 0; i--)
- unix_inflight(scm->fp->fp[i]);
- return 0;
+ if (unix_sock_count) {
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_inflight(scm->fp->fp[i]);
+ }
+ return max_level;
}
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1394,6 +1412,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct sk_buff *skb;
long timeo;
struct scm_cookie tmp_scm;
+ int max_level;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
@@ -1432,8 +1451,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto out;
err = unix_scm_to_skb(siocb->scm, skb, true);
- if (err)
+ if (err < 0)
goto out_free;
+ max_level = err + 1;
unix_get_secdata(siocb->scm, skb);
skb_reset_transport_header(skb);
@@ -1515,6 +1535,8 @@ restart:
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
skb_queue_tail(&other->sk_receive_queue, skb);
+ if (max_level > unix_sk(other)->recursion_level)
+ unix_sk(other)->recursion_level = max_level;
unix_state_unlock(other);
other->sk_data_ready(other, len);
sock_put(other);
@@ -1545,6 +1567,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
int sent = 0;
struct scm_cookie tmp_scm;
bool fds_sent = false;
+ int max_level;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
@@ -1608,10 +1631,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
/* Only send the fds in the first buffer */
err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
- if (err) {
+ if (err < 0) {
kfree_skb(skb);
goto out_err;
}
+ max_level = err + 1;
fds_sent = true;
err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
@@ -1627,6 +1651,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto pipe_err_free;
skb_queue_tail(&other->sk_receive_queue, skb);
+ if (max_level > unix_sk(other)->recursion_level)
+ unix_sk(other)->recursion_level = max_level;
unix_state_unlock(other);
other->sk_data_ready(other, size);
sent += size;
@@ -1847,6 +1873,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
unix_state_lock(sk);
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb == NULL) {
+ unix_sk(sk)->recursion_level = 0;
if (copied >= target)
goto unlock;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c8df6fda0b1f..f89f83bf828e 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -96,7 +96,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
unsigned int unix_tot_inflight;
-static struct sock *unix_get_socket(struct file *filp)
+struct sock *unix_get_socket(struct file *filp)
{
struct sock *u_sock = NULL;
struct inode *inode = filp->f_path.dentry->d_inode;
@@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
}
static bool gc_in_progress = false;
+#define UNIX_INFLIGHT_TRIGGER_GC 16000
void wait_for_unix_gc(void)
{
+ /*
+ * If number of inflight sockets is insane,
+ * force a garbage collect right now.
+ */
+ if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+ unix_gc();
wait_event(unix_gc_wait, gc_in_progress == false);
}
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 4c81f6abb65b..4cbc942f762a 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -398,6 +398,7 @@ void __exit x25_link_free(void)
list_for_each_safe(entry, tmp, &x25_neigh_list) {
nb = list_entry(entry, struct x25_neigh, node);
__x25_remove_neigh(nb);
+ dev_put(nb->dev);
}
write_unlock_bh(&x25_neigh_list_lock);
}
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index a2023ec52329..1e98bc0fe0a5 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -19,7 +19,7 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz)
if (sz <= PAGE_SIZE)
n = kzalloc(sz, GFP_KERNEL);
else if (hashdist)
- n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+ n = vzalloc(sz);
else
n = (struct hlist_head *)
__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 044e77898512..6e50ccd8c532 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1433,7 +1433,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
}
xdst->route = dst;
- memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
+ dst_copy_metrics(dst1, dst);
if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
family = xfrm[i]->props.family;
@@ -2271,7 +2271,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
if (pmtu > route_mtu_cached)
pmtu = route_mtu_cached;
- dst->metrics[RTAX_MTU-1] = pmtu;
+ dst_metric_set(dst, RTAX_MTU, pmtu);
} while ((dst = dst->next));
}
@@ -2349,7 +2349,7 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
mtu = xfrm_state_mtu(dst->xfrm, mtu);
if (mtu > last->route_mtu_cached)
mtu = last->route_mtu_cached;
- dst->metrics[RTAX_MTU-1] = mtu;
+ dst_metric_set(dst, RTAX_MTU, mtu);
if (last == first)
break;