diff options
Diffstat (limited to 'net')
50 files changed, 660 insertions, 370 deletions
diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 45c15f491401..798beac7f100 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -27,31 +27,16 @@ #include <linux/module.h> #include <linux/errno.h> +#include <linux/kernel.h> #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/stddef.h> #include <linux/types.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include "protocol.h" -#ifndef MIN -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) -#endif - -#ifndef MAX -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) -#endif - -#ifndef offset_of -#define offset_of(type, memb) \ - ((unsigned long)(&((type *)0)->memb)) -#endif -#ifndef container_of -#define container_of(obj, type, memb) \ - ((type *)(((char *)obj) - offset_of(type, memb))) -#endif - static int p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); @@ -104,7 +89,7 @@ EXPORT_SYMBOL(p9stat_free); static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) { - size_t len = MIN(pdu->size - pdu->offset, size); + size_t len = min(pdu->size - pdu->offset, size); memcpy(data, &pdu->sdata[pdu->offset], len); pdu->offset += len; return size - len; @@ -112,7 +97,7 @@ static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) { - size_t len = MIN(pdu->capacity - pdu->size, size); + size_t len = min(pdu->capacity - pdu->size, size); memcpy(&pdu->sdata[pdu->size], data, len); pdu->size += len; return size - len; @@ -121,7 +106,7 @@ static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) static size_t pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) { - size_t len = MIN(pdu->capacity - pdu->size, size); + size_t len = min(pdu->capacity - pdu->size, size); if (copy_from_user(&pdu->sdata[pdu->size], udata, len)) len = 0; @@ -201,7 +186,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, if (errcode) break; - size = MAX(len, 0); + size = max_t(int16_t, len, 0); *sptr = kmalloc(size + 1, GFP_KERNEL); if (*sptr == NULL) { @@ -256,8 +241,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, p9pdu_readf(pdu, proto_version, "d", count); if (!errcode) { *count = - MIN(*count, - pdu->size - pdu->offset); + min_t(int32_t, *count, + pdu->size - pdu->offset); *data = &pdu->sdata[pdu->offset]; } } @@ -421,7 +406,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, const char *sptr = va_arg(ap, const char *); int16_t len = 0; if (sptr) - len = MIN(strlen(sptr), USHRT_MAX); + len = min_t(int16_t, strlen(sptr), USHRT_MAX); errcode = p9pdu_writef(pdu, proto_version, "w", len); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 17cb0b633576..556443566e9c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -141,7 +141,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu) #ifdef CONFIG_BRIDGE_NETFILTER /* remember the MTU in the rtable for PMTU */ - br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu; + dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu); #endif return 0; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 6e1392093911..16f5c333596a 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -124,7 +124,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) atomic_set(&rt->dst.__refcnt, 1); rt->dst.dev = br->dev; rt->dst.path = &rt->dst; - rt->dst.metrics[RTAX_MTU - 1] = 1500; + dst_metric_set(&rt->dst, RTAX_MTU, 1500); rt->dst.flags = DST_NOXFRM; rt->dst.ops = &fake_dst_ops; } diff --git a/net/ceph/Makefile b/net/ceph/Makefile index 153bdec40835..e87ef435e11b 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -1,9 +1,6 @@ # # Makefile for CEPH filesystem. # - -ifneq ($(KERNELRELEASE),) - obj-$(CONFIG_CEPH_LIB) += libceph.o libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ @@ -16,22 +13,3 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ ceph_fs.o ceph_strings.o ceph_hash.o \ pagevec.o -else -#Otherwise we were called directly from the command -# line; invoke the kernel build system. - -KERNELDIR ?= /lib/modules/$(shell uname -r)/build -PWD := $(shell pwd) - -default: all - -all: - $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules - -modules_install: - $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install - -clean: - $(MAKE) -C $(KERNELDIR) M=$(PWD) clean - -endif diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c index 53d8abfa25d5..bf3e6a13c215 100644 --- a/net/ceph/buffer.c +++ b/net/ceph/buffer.c @@ -19,7 +19,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) if (b->vec.iov_base) { b->is_vmalloc = false; } else { - b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL); + b->vec.iov_base = __vmalloc(len, gfp | __GFP_HIGHMEM, PAGE_KERNEL); if (!b->vec.iov_base) { kfree(b); return NULL; diff --git a/net/core/datagram.c b/net/core/datagram.c index cd1e039c8755..18ac112ea7ae 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -177,7 +177,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, * interrupt level will suddenly eat the receive_queue. * * Look at current nfs client by the way... - * However, this function was corrent in any case. 8) + * However, this function was correct in any case. 8) */ unsigned long cpu_flags; diff --git a/net/core/dev.c b/net/core/dev.c index cd2437495428..d28b3a023bb2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -743,34 +743,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) EXPORT_SYMBOL(dev_get_by_index); /** - * dev_getbyhwaddr - find a device by its hardware address + * dev_getbyhwaddr_rcu - find a device by its hardware address * @net: the applicable net namespace * @type: media type of device * @ha: hardware address * * Search for an interface by MAC address. Returns NULL if the device - * is not found or a pointer to the device. The caller must hold the - * rtnl semaphore. The returned device has not had its ref count increased + * is not found or a pointer to the device. The caller must hold RCU + * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * - * BUGS: - * If the API was consistent this would be __dev_get_by_hwaddr */ -struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) +struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, + const char *ha) { struct net_device *dev; - ASSERT_RTNL(); - - for_each_netdev(net, dev) + for_each_netdev_rcu(net, dev) if (dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len)) return dev; return NULL; } -EXPORT_SYMBOL(dev_getbyhwaddr); +EXPORT_SYMBOL(dev_getbyhwaddr_rcu); struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) { @@ -2025,9 +2022,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { - if (!list_empty(&ptype_all)) - dev_queue_xmit_nit(skb, dev); - /* * If device doesnt need skb->dst, release it right now while * its hot in this cpu cache @@ -2035,6 +2029,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(skb); + if (!list_empty(&ptype_all)) + dev_queue_xmit_nit(skb, dev); + skb_orphan_try(skb); if (vlan_tx_tag_present(skb) && @@ -5041,10 +5038,13 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } if (features & NETIF_F_UFO) { - if (!(features & NETIF_F_GEN_CSUM)) { + /* maybe split UFO into V4 and V6? */ + if (!((features & NETIF_F_GEN_CSUM) || + (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) + == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { if (name) printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no NETIF_F_HW_CSUM feature.\n", + "since no checksum offload features.\n", name); features &= ~NETIF_F_UFO; } @@ -5109,11 +5109,21 @@ static int netif_alloc_rx_queues(struct net_device *dev) } #endif +static void netdev_init_one_queue(struct net_device *dev, + struct netdev_queue *queue, void *_unused) +{ + /* Initialize queue lock */ + spin_lock_init(&queue->_xmit_lock); + netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); + queue->xmit_lock_owner = -1; + netdev_queue_numa_node_write(queue, -1); + queue->dev = dev; +} + static int netif_alloc_netdev_queues(struct net_device *dev) { unsigned int count = dev->num_tx_queues; struct netdev_queue *tx; - int i; BUG_ON(count < 1); @@ -5125,27 +5135,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev) } dev->_tx = tx; - for (i = 0; i < count; i++) { - netdev_queue_numa_node_write(&tx[i], -1); - tx[i].dev = dev; - } - return 0; -} - -static void netdev_init_one_queue(struct net_device *dev, - struct netdev_queue *queue, - void *_unused) -{ - /* Initialize queue lock */ - spin_lock_init(&queue->_xmit_lock); - netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); - queue->xmit_lock_owner = -1; -} - -static void netdev_init_queues(struct net_device *dev) -{ netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); spin_lock_init(&dev->tx_global_lock); + + return 0; } /** @@ -5184,8 +5177,6 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; - netdev_init_queues(dev); - /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 956a9f4971cb..d5bc28818883 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1171,7 +1171,9 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) return -EFAULT; if (edata.data && !(dev->features & NETIF_F_SG)) return -EINVAL; - if (edata.data && !(dev->features & NETIF_F_HW_CSUM)) + if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) || + (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) + == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) return -EINVAL; return dev->ethtool_ops->set_ufo(dev, edata.data); } diff --git a/net/core/filter.c b/net/core/filter.c index a44d27f9f0f0..e8a6ac411ffb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -88,7 +88,7 @@ enum { }; /* No hurry in this branch */ -static void *__load_pointer(struct sk_buff *skb, int k) +static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size) { u8 *ptr = NULL; @@ -97,12 +97,12 @@ static void *__load_pointer(struct sk_buff *skb, int k) else if (k >= SKF_LL_OFF) ptr = skb_mac_header(skb) + k - SKF_LL_OFF; - if (ptr >= skb->head && ptr < skb_tail_pointer(skb)) + if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) return ptr; return NULL; } -static inline void *load_pointer(struct sk_buff *skb, int k, +static inline void *load_pointer(const struct sk_buff *skb, int k, unsigned int size, void *buffer) { if (k >= 0) @@ -110,7 +110,7 @@ static inline void *load_pointer(struct sk_buff *skb, int k, else { if (k >= SKF_AD_OFF) return NULL; - return __load_pointer(skb, k); + return __load_pointer(skb, k, size); } } @@ -160,17 +160,16 @@ EXPORT_SYMBOL(sk_filter); * and last instruction guaranteed to be a RET, we dont need to check * flen. (We used to pass to this function the length of filter) */ -unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry) +unsigned int sk_run_filter(const struct sk_buff *skb, + const struct sock_filter *fentry) { void *ptr; u32 A = 0; /* Accumulator */ u32 X = 0; /* Index Register */ u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ - unsigned long memvalid = 0; u32 tmp; int k; - BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG); /* * Process array of filter instructions. */ @@ -318,12 +317,10 @@ load_b: X = K; continue; case BPF_S_LD_MEM: - A = (memvalid & (1UL << K)) ? - mem[K] : 0; + A = mem[K]; continue; case BPF_S_LDX_MEM: - X = (memvalid & (1UL << K)) ? - mem[K] : 0; + X = mem[K]; continue; case BPF_S_MISC_TAX: X = A; @@ -336,11 +333,9 @@ load_b: case BPF_S_RET_A: return A; case BPF_S_ST: - memvalid |= 1UL << K; mem[K] = A; continue; case BPF_S_STX: - memvalid |= 1UL << K; mem[K] = X; continue; default: @@ -375,6 +370,12 @@ load_b: return 0; A = skb->dev->type; continue; + case SKF_AD_RXHASH: + A = skb->rxhash; + continue; + case SKF_AD_CPU: + A = raw_smp_processor_id(); + continue; case SKF_AD_NLATTR: { struct nlattr *nla; @@ -419,6 +420,66 @@ load_b: } EXPORT_SYMBOL(sk_run_filter); +/* + * Security : + * A BPF program is able to use 16 cells of memory to store intermediate + * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()) + * As we dont want to clear mem[] array for each packet going through + * sk_run_filter(), we check that filter loaded by user never try to read + * a cell if not previously written, and we check all branches to be sure + * a malicious user doesnt try to abuse us. + */ +static int check_load_and_stores(struct sock_filter *filter, int flen) +{ + u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */ + int pc, ret = 0; + + BUILD_BUG_ON(BPF_MEMWORDS > 16); + masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); + if (!masks) + return -ENOMEM; + memset(masks, 0xff, flen * sizeof(*masks)); + + for (pc = 0; pc < flen; pc++) { + memvalid &= masks[pc]; + + switch (filter[pc].code) { + case BPF_S_ST: + case BPF_S_STX: + memvalid |= (1 << filter[pc].k); + break; + case BPF_S_LD_MEM: + case BPF_S_LDX_MEM: + if (!(memvalid & (1 << filter[pc].k))) { + ret = -EINVAL; + goto error; + } + break; + case BPF_S_JMP_JA: + /* a jump must set masks on target */ + masks[pc + 1 + filter[pc].k] &= memvalid; + memvalid = ~0; + break; + case BPF_S_JMP_JEQ_K: + case BPF_S_JMP_JEQ_X: + case BPF_S_JMP_JGE_K: + case BPF_S_JMP_JGE_X: + case BPF_S_JMP_JGT_K: + case BPF_S_JMP_JGT_X: + case BPF_S_JMP_JSET_X: + case BPF_S_JMP_JSET_K: + /* a jump must set masks on targets */ + masks[pc + 1 + filter[pc].jt] &= memvalid; + masks[pc + 1 + filter[pc].jf] &= memvalid; + memvalid = ~0; + break; + } + } +error: + kfree(masks); + return ret; +} + /** * sk_chk_filter - verify socket filter code * @filter: filter to verify @@ -547,30 +608,23 @@ int sk_chk_filter(struct sock_filter *filter, int flen) switch (filter[flen - 1].code) { case BPF_S_RET_K: case BPF_S_RET_A: - return 0; + return check_load_and_stores(filter, flen); } return -EINVAL; } EXPORT_SYMBOL(sk_chk_filter); /** - * sk_filter_rcu_release - Release a socket filter by rcu_head + * sk_filter_release_rcu - Release a socket filter by rcu_head * @rcu: rcu_head that contains the sk_filter to free */ -static void sk_filter_rcu_release(struct rcu_head *rcu) +void sk_filter_release_rcu(struct rcu_head *rcu) { struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); - sk_filter_release(fp); -} - -static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp) -{ - unsigned int size = sk_filter_len(fp); - - atomic_sub(size, &sk->sk_omem_alloc); - call_rcu_bh(&fp->rcu, sk_filter_rcu_release); + kfree(fp); } +EXPORT_SYMBOL(sk_filter_release_rcu); /** * sk_attach_filter - attach a socket filter @@ -614,7 +668,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) rcu_assign_pointer(sk->sk_filter, fp); if (old_fp) - sk_filter_delayed_uncharge(sk, old_fp); + sk_filter_uncharge(sk, old_fp); return 0; } EXPORT_SYMBOL_GPL(sk_attach_filter); @@ -628,7 +682,7 @@ int sk_detach_filter(struct sock *sk) sock_owned_by_user(sk)); if (filter) { rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_delayed_uncharge(sk, filter); + sk_filter_uncharge(sk, filter); ret = 0; } return ret; diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 41d99435f62d..182236b2510a 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -46,9 +46,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); lopt_size += nr_table_entries * sizeof(struct request_sock *); if (lopt_size > PAGE_SIZE) - lopt = __vmalloc(lopt_size, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, - PAGE_KERNEL); + lopt = vzalloc(lopt_size); else lopt = kzalloc(lopt_size, GFP_KERNEL); if (lopt == NULL) diff --git a/net/core/sock.c b/net/core/sock.c index fb6080111461..bcdb6ff6e621 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -992,17 +992,18 @@ static inline void sock_lock_init(struct sock *sk) /* * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, * even temporarly, because of RCU lookups. sk_node should also be left as is. + * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end */ static void sock_copy(struct sock *nsk, const struct sock *osk) { #ifdef CONFIG_SECURITY_NETWORK void *sptr = nsk->sk_security; #endif - BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != - sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) + - sizeof(osk->sk_tx_queue_mapping)); - memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, - osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); + memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); + + memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, + osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); + #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; security_sk_clone(osk, nsk); diff --git a/net/core/timestamping.c b/net/core/timestamping.c index dac7ed687f60..b124d28ff1c8 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -26,7 +26,7 @@ static struct sock_filter ptp_filter[] = { PTP_FILTER }; -static unsigned int classify(struct sk_buff *skb) +static unsigned int classify(const struct sk_buff *skb) { if (likely(skb->dev && skb->dev->phydev && diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 2991efcc8dea..5c8362b037ed 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o -dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o - +dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \ + qpolicy.o # # CCID algorithms to be used by dccp.ko # diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 19fafd597465..48ad5d9da7cb 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -243,6 +243,19 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); +/* + * TX Packet Dequeueing Interface + */ +extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); +extern bool dccp_qpolicy_full(struct sock *sk); +extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); +extern struct sk_buff *dccp_qpolicy_top(struct sock *sk); +extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk); +extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param); + +/* + * TX Packet Output and TX Timers + */ extern void dccp_write_xmit(struct sock *sk); extern void dccp_write_space(struct sock *sk); extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); diff --git a/net/dccp/input.c b/net/dccp/input.c index 7d230d14ce22..15af247ea007 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -241,7 +241,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) dccp_update_gsr(sk, seqno); if (dh->dccph_type != DCCP_PKT_SYNC && - (ackno != DCCP_PKT_WITHOUT_ACK_SEQ)) + ackno != DCCP_PKT_WITHOUT_ACK_SEQ && + after48(ackno, dp->dccps_gar)) dp->dccps_gar = ackno; } else { unsigned long now = jiffies; diff --git a/net/dccp/output.c b/net/dccp/output.c index d96dd9d362ae..784d30210543 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk) { int err, len; struct dccp_sock *dp = dccp_sk(sk); - struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); + struct sk_buff *skb = dccp_qpolicy_pop(sk); if (unlikely(skb == NULL)) return; @@ -345,7 +345,7 @@ void dccp_write_xmit(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; - while ((skb = skb_peek(&sk->sk_write_queue))) { + while ((skb = dccp_qpolicy_top(sk))) { int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); switch (ccid_packet_dequeue_eval(rc)) { @@ -359,8 +359,7 @@ void dccp_write_xmit(struct sock *sk) dccp_xmit_packet(sk); break; case CCID_PACKET_ERR: - skb_dequeue(&sk->sk_write_queue); - kfree_skb(skb); + dccp_qpolicy_drop(sk, skb); dccp_pr_debug("packet discarded due to err=%d\n", rc); } } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ef343d53fcea..152975d942d9 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; + dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; dccp_init_xmit_timers(sk); @@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_RECV_CSCOV: err = dccp_setsockopt_cscov(sk, val, true); break; + case DCCP_SOCKOPT_QPOLICY_ID: + if (sk->sk_state != DCCP_CLOSED) + err = -EISCONN; + else if (val < 0 || val >= DCCPQ_POLICY_MAX) + err = -EINVAL; + else + dp->dccps_qpolicy = val; + break; + case DCCP_SOCKOPT_QPOLICY_TXQLEN: + if (val < 0) + err = -EINVAL; + else + dp->dccps_tx_qlen = val; + break; default: err = -ENOPROTOOPT; break; @@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_RECV_CSCOV: val = dp->dccps_pcrlen; break; + case DCCP_SOCKOPT_QPOLICY_ID: + val = dp->dccps_qpolicy; + break; + case DCCP_SOCKOPT_QPOLICY_TXQLEN: + val = dp->dccps_tx_qlen; + break; case 128 ... 191: return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, len, (u32 __user *)optval, optlen); @@ -681,6 +702,47 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); #endif +static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) +{ + struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); + + /* + * Assign an (opaque) qpolicy priority value to skb->priority. + * + * We are overloading this skb field for use with the qpolicy subystem. + * The skb->priority is normally used for the SO_PRIORITY option, which + * is initialised from sk_priority. Since the assignment of sk_priority + * to skb->priority happens later (on layer 3), we overload this field + * for use with queueing priorities as long as the skb is on layer 4. + * The default priority value (if nothing is set) is 0. + */ + skb->priority = 0; + + for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { + + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + if (cmsg->cmsg_level != SOL_DCCP) + continue; + + if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX && + !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type)) + return -EINVAL; + + switch (cmsg->cmsg_type) { + case DCCP_SCM_PRIORITY: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) + return -EINVAL; + skb->priority = *(__u32 *)CMSG_DATA(cmsg); + break; + default: + return -EINVAL; + } + } + return 0; +} + int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { @@ -696,8 +758,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, lock_sock(sk); - if (sysctl_dccp_tx_qlen && - (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { + if (dccp_qpolicy_full(sk)) { rc = -EAGAIN; goto out_release; } @@ -725,7 +786,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rc != 0) goto out_discard; - skb_queue_tail(&sk->sk_write_queue, skb); + rc = dccp_msghdr_parse(msg, skb); + if (rc != 0) + goto out_discard; + + dccp_qpolicy_push(sk, skb); /* * The xmit_timer is set if the TX CCID is rate-based and will expire * when congestion control permits to release further packets into the diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c new file mode 100644 index 000000000000..63c30bfa4703 --- /dev/null +++ b/net/dccp/qpolicy.c @@ -0,0 +1,137 @@ +/* + * net/dccp/qpolicy.c + * + * Policy-based packet dequeueing interface for DCCP. + * + * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License v2 + * as published by the Free Software Foundation. + */ +#include "dccp.h" + +/* + * Simple Dequeueing Policy: + * If tx_qlen is different from 0, enqueue up to tx_qlen elements. + */ +static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb) +{ + skb_queue_tail(&sk->sk_write_queue, skb); +} + +static bool qpolicy_simple_full(struct sock *sk) +{ + return dccp_sk(sk)->dccps_tx_qlen && + sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen; +} + +static struct sk_buff *qpolicy_simple_top(struct sock *sk) +{ + return skb_peek(&sk->sk_write_queue); +} + +/* + * Priority-based Dequeueing Policy: + * If tx_qlen is different from 0 and the queue has reached its upper bound + * of tx_qlen elements, replace older packets lowest-priority-first. + */ +static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk) +{ + struct sk_buff *skb, *best = NULL; + + skb_queue_walk(&sk->sk_write_queue, skb) + if (best == NULL || skb->priority > best->priority) + best = skb; + return best; +} + +static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk) +{ + struct sk_buff *skb, *worst = NULL; + + skb_queue_walk(&sk->sk_write_queue, skb) + if (worst == NULL || skb->priority < worst->priority) + worst = skb; + return worst; +} + +static bool qpolicy_prio_full(struct sock *sk) +{ + if (qpolicy_simple_full(sk)) + dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk)); + return false; +} + +/** + * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface + * @push: add a new @skb to the write queue + * @full: indicates that no more packets will be admitted + * @top: peeks at whatever the queueing policy defines as its `top' + */ +static struct dccp_qpolicy_operations { + void (*push) (struct sock *sk, struct sk_buff *skb); + bool (*full) (struct sock *sk); + struct sk_buff* (*top) (struct sock *sk); + __be32 params; + +} qpol_table[DCCPQ_POLICY_MAX] = { + [DCCPQ_POLICY_SIMPLE] = { + .push = qpolicy_simple_push, + .full = qpolicy_simple_full, + .top = qpolicy_simple_top, + .params = 0, + }, + [DCCPQ_POLICY_PRIO] = { + .push = qpolicy_simple_push, + .full = qpolicy_prio_full, + .top = qpolicy_prio_best_skb, + .params = DCCP_SCM_PRIORITY, + }, +}; + +/* + * Externally visible interface + */ +void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb) +{ + qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb); +} + +bool dccp_qpolicy_full(struct sock *sk) +{ + return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk); +} + +void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb) +{ + if (skb != NULL) { + skb_unlink(skb, &sk->sk_write_queue); + kfree_skb(skb); + } +} + +struct sk_buff *dccp_qpolicy_top(struct sock *sk) +{ + return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk); +} + +struct sk_buff *dccp_qpolicy_pop(struct sock *sk) +{ + struct sk_buff *skb = dccp_qpolicy_top(sk); + + if (skb != NULL) { + /* Clear any skb fields that we used internally */ + skb->priority = 0; + skb_unlink(skb, &sk->sk_write_queue); + } + return skb; +} + +bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param) +{ + /* check if exactly one bit is set */ + if (!param || (param & (param - 1))) + return false; + return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param; +} diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 9ecef9968c39..0065e7e14af4 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1556,6 +1556,8 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us if (r_len > sizeof(struct linkinfo_dn)) r_len = sizeof(struct linkinfo_dn); + memset(&link, 0, sizeof(link)); + switch(sock->state) { case SS_CONNECTING: link.idn_linkstate = LL_CONNECTING; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 8280e43c8861..e2e926841fe6 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -240,13 +240,13 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) { if (!(dst_metric_locked(dst, RTAX_MTU))) { - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); dst_set_expires(dst, dn_rt_mtu_expires); } if (!(dst_metric_locked(dst, RTAX_ADVMSS))) { u32 mss = mtu - DN_MAX_NSP_DATA_HEADER; if (dst_metric(dst, RTAX_ADVMSS) > mss) - dst->metrics[RTAX_ADVMSS-1] = mss; + dst_metric_set(dst, RTAX_ADVMSS, mss); } } } @@ -806,8 +806,7 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) if (DN_FIB_RES_GW(*res) && DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = DN_FIB_RES_GW(*res); - memcpy(rt->dst.metrics, fi->fib_metrics, - sizeof(rt->dst.metrics)); + dst_import_metrics(&rt->dst, fi->fib_metrics); } rt->rt_type = res->type; @@ -820,11 +819,11 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) if (dst_metric(&rt->dst, RTAX_MTU) == 0 || dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) - rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; + dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 || dst_metric(&rt->dst, RTAX_ADVMSS) > mss) - rt->dst.metrics[RTAX_ADVMSS-1] = mss; + dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); return 0; } @@ -1502,7 +1501,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src); if (rt->rt_daddr != rt->rt_gateway) RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway); - if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto rtattr_failure; expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires, diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index f8c1ae4b41f0..f180371fa415 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -31,6 +31,7 @@ #include <linux/skbuff.h> #include <linux/udp.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <net/sock.h> #include <net/inet_common.h> #include <linux/stat.h> @@ -276,12 +277,12 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, #endif #ifdef CONFIG_ECONET_AUNUDP struct msghdr udpmsg; - struct iovec iov[msg->msg_iovlen+1]; + struct iovec iov[2]; struct aunhdr ah; struct sockaddr_in udpdest; __kernel_size_t size; - int i; mm_segment_t oldfs; + char *userbuf; #endif /* @@ -297,23 +298,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, mutex_lock(&econet_mutex); - if (saddr == NULL) { - struct econet_sock *eo = ec_sk(sk); - - addr.station = eo->station; - addr.net = eo->net; - port = eo->port; - cb = eo->cb; - } else { - if (msg->msg_namelen < sizeof(struct sockaddr_ec)) { - mutex_unlock(&econet_mutex); - return -EINVAL; - } - addr.station = saddr->addr.station; - addr.net = saddr->addr.net; - port = saddr->port; - cb = saddr->cb; - } + if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) { + mutex_unlock(&econet_mutex); + return -EINVAL; + } + addr.station = saddr->addr.station; + addr.net = saddr->addr.net; + port = saddr->port; + cb = saddr->cb; /* Look for a device with the right network number. */ dev = net2dev_map[addr.net]; @@ -328,17 +320,17 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, } } - if (len + 15 > dev->mtu) { - mutex_unlock(&econet_mutex); - return -EMSGSIZE; - } - if (dev->type == ARPHRD_ECONET) { /* Real hardware Econet. We're not worthy etc. */ #ifdef CONFIG_ECONET_NATIVE unsigned short proto = 0; int res; + if (len + 15 > dev->mtu) { + mutex_unlock(&econet_mutex); + return -EMSGSIZE; + } + dev_hold(dev); skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev), @@ -351,7 +343,6 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, eb = (struct ec_cb *)&skb->cb; - /* BUG: saddr may be NULL */ eb->cookie = saddr->cookie; eb->sec = *saddr; eb->sent = ec_tx_done; @@ -415,6 +406,11 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, return -ENETDOWN; /* No socket - can't send */ } + if (len > 32768) { + err = -E2BIG; + goto error; + } + /* Make up a UDP datagram and hand it off to some higher intellect. */ memset(&udpdest, 0, sizeof(udpdest)); @@ -446,36 +442,26 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, /* tack our header on the front of the iovec */ size = sizeof(struct aunhdr); - /* - * XXX: that is b0rken. We can't mix userland and kernel pointers - * in iovec, since on a lot of platforms copy_from_user() will - * *not* work with the kernel and userland ones at the same time, - * regardless of what we do with set_fs(). And we are talking about - * econet-over-ethernet here, so "it's only ARM anyway" doesn't - * apply. Any suggestions on fixing that code? -- AV - */ iov[0].iov_base = (void *)&ah; iov[0].iov_len = size; - for (i = 0; i < msg->msg_iovlen; i++) { - void __user *base = msg->msg_iov[i].iov_base; - size_t iov_len = msg->msg_iov[i].iov_len; - /* Check it now since we switch to KERNEL_DS later. */ - if (!access_ok(VERIFY_READ, base, iov_len)) { - mutex_unlock(&econet_mutex); - return -EFAULT; - } - iov[i+1].iov_base = base; - iov[i+1].iov_len = iov_len; - size += iov_len; + + userbuf = vmalloc(len); + if (userbuf == NULL) { + err = -ENOMEM; + goto error; } + iov[1].iov_base = userbuf; + iov[1].iov_len = len; + err = memcpy_fromiovec(userbuf, msg->msg_iov, len); + if (err) + goto error_free_buf; + /* Get a skbuff (no data, just holds our cb information) */ if ((skb = sock_alloc_send_skb(sk, 0, msg->msg_flags & MSG_DONTWAIT, - &err)) == NULL) { - mutex_unlock(&econet_mutex); - return err; - } + &err)) == NULL) + goto error_free_buf; eb = (struct ec_cb *)&skb->cb; @@ -491,7 +477,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, udpmsg.msg_name = (void *)&udpdest; udpmsg.msg_namelen = sizeof(udpdest); udpmsg.msg_iov = &iov[0]; - udpmsg.msg_iovlen = msg->msg_iovlen + 1; + udpmsg.msg_iovlen = 2; udpmsg.msg_control = NULL; udpmsg.msg_controllen = 0; udpmsg.msg_flags=0; @@ -499,9 +485,13 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */ err = sock_sendmsg(udpsock, &udpmsg, size); set_fs(oldfs); + +error_free_buf: + vfree(userbuf); #else err = -EPROTOTYPE; #endif + error: mutex_unlock(&econet_mutex); return err; @@ -671,6 +661,11 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) err = 0; switch (cmd) { case SIOCSIFADDR: + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + edev = dev->ec_ptr; if (edev == NULL) { /* Magic up a new one. */ diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 93c91b633a56..6df6ecf49708 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -52,11 +52,11 @@ struct net_device *ieee802154_get_dev(struct net *net, switch (addr->addr_type) { case IEEE802154_ADDR_LONG: - rtnl_lock(); - dev = dev_getbyhwaddr(net, ARPHRD_IEEE802154, addr->hwaddr); + rcu_read_lock(); + dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, addr->hwaddr); if (dev) dev_hold(dev); - rtnl_unlock(); + rcu_read_unlock(); break; case IEEE802154_ADDR_SHORT: if (addr->pan_id == 0xffff || diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 7833f17b648a..a2fc7b961dbc 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -883,7 +883,7 @@ static int arp_process(struct sk_buff *skb) dont_send = arp_ignore(in_dev, sip, tip); if (!dont_send && IN_DEV_ARPFILTER(in_dev)) - dont_send |= arp_filter(sip, tip, dev); + dont_send = arp_filter(sip, tip, dev); if (!dont_send) { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) { @@ -1017,13 +1017,14 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; return 0; } - if (__in_dev_get_rtnl(dev)) { - IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on); + if (__in_dev_get_rcu(dev)) { + IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on); return 0; } return -ENXIO; } +/* must be called with rcu_read_lock() */ static int arp_req_set_public(struct net *net, struct arpreq *r, struct net_device *dev) { @@ -1033,7 +1034,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r, if (mask && mask != htonl(0xFFFFFFFF)) return -EINVAL; if (!dev && (r->arp_flags & ATF_COM)) { - dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, + dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family, r->arp_ha.sa_data); if (!dev) return -ENODEV; @@ -1225,10 +1226,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!(r.arp_flags & ATF_NETMASK)) ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = htonl(0xFFFFFFFFUL); - rtnl_lock(); + rcu_read_lock(); if (r.arp_dev[0]) { err = -ENODEV; - dev = __dev_get_by_name(net, r.arp_dev); + dev = dev_get_by_name_rcu(net, r.arp_dev); if (dev == NULL) goto out; @@ -1252,12 +1253,12 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; case SIOCGARP: err = arp_req_get(&r, dev); - if (!err && copy_to_user(arg, &r, sizeof(r))) - err = -EFAULT; break; } out: - rtnl_unlock(); + rcu_read_unlock(); + if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r))) + err = -EFAULT; return err; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d9f71bae45c4..3b067704ab38 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1258,7 +1258,7 @@ errout: static size_t inet_get_link_af_size(const struct net_device *dev) { - struct in_device *in_dev = __in_dev_get_rcu(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); if (!in_dev) return 0; @@ -1268,7 +1268,7 @@ static size_t inet_get_link_af_size(const struct net_device *dev) static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) { - struct in_device *in_dev = __in_dev_get_rcu(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); struct nlattr *nla; int i; @@ -1295,7 +1295,7 @@ static int inet_validate_link_af(const struct net_device *dev, struct nlattr *a, *tb[IFLA_INET_MAX+1]; int err, rem; - if (dev && !__in_dev_get_rcu(dev)) + if (dev && !__in_dev_get_rtnl(dev)) return -EAFNOSUPPORT; err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); @@ -1319,7 +1319,7 @@ static int inet_validate_link_af(const struct net_device *dev, static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) { - struct in_device *in_dev = __in_dev_get_rcu(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); struct nlattr *a, *tb[IFLA_INET_MAX+1]; int rem; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 200eb538fbb3..0f280348e0fd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -365,7 +365,7 @@ static struct tnode *tnode_alloc(size_t size) if (size <= PAGE_SIZE) return kzalloc(size, GFP_KERNEL); else - return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); + return vzalloc(size); } static void __tnode_vfree(struct work_struct *arg) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 06f5f8f482f0..25e318153f14 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -55,7 +55,6 @@ EXPORT_SYMBOL(inet_get_local_port_range); int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) { - const __be32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -75,9 +74,9 @@ int inet_csk_bind_conflict(const struct sock *sk, sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr || - sk2_rcv_saddr == sk_rcv_saddr) + const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); + if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || + sk2_rcv_saddr == sk_rcv_saddr(sk)) break; } } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 1b344f30b463..3c0369a3a663 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -133,8 +133,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child) } } } - sk_add_bind_node(child, &tb->owners); - inet_csk(child)->icsk_bind_hash = tb; + inet_bind_hash(child, tb, port); spin_unlock(&head->lock); return 0; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 258c98d5fa79..ff4e7a4e33ed 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -818,7 +818,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev !ipv4_is_multicast(tunnel->parms.iph.daddr)) || rt6->rt6i_dst.plen == 128) { rt6->rt6i_flags |= RTF_MODIFIED; - skb_dst(skb)->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); } } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 1b48eb1ed453..b14ec7d03b6e 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -253,6 +253,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), + SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3843c2dfde82..26ac396eaa5e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1686,11 +1686,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, if (mtu < dst_mtu(&rth->dst)) { dst_confirm(&rth->dst); if (mtu < ip_rt_min_pmtu) { + u32 lock = dst_metric(&rth->dst, + RTAX_LOCK); mtu = ip_rt_min_pmtu; - rth->dst.metrics[RTAX_LOCK-1] |= - (1 << RTAX_MTU); + lock |= (1 << RTAX_MTU); + dst_metric_set(&rth->dst, RTAX_LOCK, + lock); } - rth->dst.metrics[RTAX_MTU-1] = mtu; + dst_metric_set(&rth->dst, RTAX_MTU, mtu); dst_set_expires(&rth->dst, ip_rt_mtu_expires); } @@ -1708,10 +1711,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) if (dst_mtu(dst) > mtu && mtu >= 68 && !(dst_metric_locked(dst, RTAX_MTU))) { if (mtu < ip_rt_min_pmtu) { + u32 lock = dst_metric(dst, RTAX_LOCK); mtu = ip_rt_min_pmtu; - dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); + dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); } - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); dst_set_expires(dst, ip_rt_mtu_expires); call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } @@ -1796,36 +1800,37 @@ static void set_class_tag(struct rtable *rt, u32 tag) static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) { + struct dst_entry *dst = &rt->dst; struct fib_info *fi = res->fi; if (fi) { if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - memcpy(rt->dst.metrics, fi->fib_metrics, - sizeof(rt->dst.metrics)); + dst_import_metrics(dst, fi->fib_metrics); if (fi->fib_mtu == 0) { - rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; - if (dst_metric_locked(&rt->dst, RTAX_MTU) && + dst_metric_set(dst, RTAX_MTU, dst->dev->mtu); + if (dst_metric_locked(dst, RTAX_MTU) && rt->rt_gateway != rt->rt_dst && - rt->dst.dev->mtu > 576) - rt->dst.metrics[RTAX_MTU-1] = 576; + dst->dev->mtu > 576) + dst_metric_set(dst, RTAX_MTU, 576); } #ifdef CONFIG_NET_CLS_ROUTE - rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; + dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif } else - rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu; - - if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) - rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; - if (dst_mtu(&rt->dst) > IP_MAX_MTU) - rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; - if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0) - rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40, - ip_rt_min_advmss); - if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) - rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; + dst_metric_set(dst, RTAX_MTU, dst->dev->mtu); + + if (dst_metric(dst, RTAX_HOPLIMIT) == 0) + dst_metric_set(dst, RTAX_HOPLIMIT, sysctl_ip_default_ttl); + if (dst_mtu(dst) > IP_MAX_MTU) + dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); + if (dst_metric(dst, RTAX_ADVMSS) == 0) + dst_metric_set(dst, RTAX_ADVMSS, + max_t(unsigned int, dst->dev->mtu - 40, + ip_rt_min_advmss)); + if (dst_metric(dst, RTAX_ADVMSS) > 65535 - 40) + dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); #ifdef CONFIG_NET_CLS_ROUTE #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -2720,7 +2725,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi new->__use = 1; new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + dst_copy_metrics(new, &ort->dst); new->dev = ort->dst.dev; if (new->dev) @@ -2827,7 +2832,7 @@ static int rt_fill_info(struct net *net, if (rt->rt_dst != rt->rt_gateway) NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); - if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; if (rt->fl.mark) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e91911d7aae2..1b4ec21497a4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -26,6 +26,8 @@ static int zero; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; +static int tcp_adv_win_scale_min = -31; +static int tcp_adv_win_scale_max = 31; /* Update system visible IP port range */ static void set_local_port_range(int range[2]) @@ -426,7 +428,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_adv_win_scale, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_adv_win_scale_min, + .extra2 = &tcp_adv_win_scale_max, }, { .procname = "tcp_tw_reuse", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2bb46d55f40c..6c11eece262c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2244,7 +2244,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet * know which interface is going to be used */ - if (val < 64 || val > MAX_TCP_WINDOW) { + if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) { err = -EINVAL; break; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6d8ab1c4efc3..824e8c8a17ad 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -734,7 +734,7 @@ void tcp_update_metrics(struct sock *sk) * Reset our results. */ if (!(dst_metric_locked(dst, RTAX_RTT))) - dst->metrics[RTAX_RTT - 1] = 0; + dst_metric_set(dst, RTAX_RTT, 0); return; } @@ -776,34 +776,38 @@ void tcp_update_metrics(struct sock *sk) if (dst_metric(dst, RTAX_SSTHRESH) && !dst_metric_locked(dst, RTAX_SSTHRESH) && (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) - dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1; + dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); if (!dst_metric_locked(dst, RTAX_CWND) && tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd; + dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); } else if (tp->snd_cwnd > tp->snd_ssthresh && icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ if (!dst_metric_locked(dst, RTAX_SSTHRESH)) - dst->metrics[RTAX_SSTHRESH-1] = - max(tp->snd_cwnd >> 1, tp->snd_ssthresh); + dst_metric_set(dst, RTAX_SSTHRESH, + max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1; + dst_metric_set(dst, RTAX_CWND, + (dst_metric(dst, RTAX_CWND) + + tp->snd_cwnd) >> 1); } else { /* Else slow start did not finish, cwnd is non-sense, ssthresh may be also invalid. */ if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1; + dst_metric_set(dst, RTAX_CWND, + (dst_metric(dst, RTAX_CWND) + + tp->snd_ssthresh) >> 1); if (dst_metric(dst, RTAX_SSTHRESH) && !dst_metric_locked(dst, RTAX_SSTHRESH) && tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) - dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; + dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); } if (!dst_metric_locked(dst, RTAX_REORDERING)) { if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && tp->reordering != sysctl_tcp_reordering) - dst->metrics[RTAX_REORDERING-1] = tp->reordering; + dst_metric_set(dst, RTAX_REORDERING, tp->reordering); } } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index dd555051ec8b..4fc3387aa994 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2013,7 +2013,9 @@ get_req: } get_sk: sk_nulls_for_each_from(sk, node) { - if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { + if (!net_eq(sock_net(sk), net)) + continue; + if (sk->sk_family == st->family) { cur = sk; goto out; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3527b51d6159..80b1f80759ab 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -392,7 +392,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * socket up. We've got bigger problems than * non-graceful socket closings. */ - LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n"); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); } tcp_update_metrics(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 749b6498588e..97041f24cd27 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -231,11 +231,10 @@ void tcp_select_initial_window(int __space, __u32 mss, /* when initializing use the value from init_rcv_wnd * rather than the default from above */ - if (init_rcv_wnd && - (*rcv_wnd > init_rcv_wnd * mss)) - *rcv_wnd = init_rcv_wnd * mss; - else if (*rcv_wnd > init_cwnd * mss) - *rcv_wnd = init_cwnd * mss; + if (init_rcv_wnd) + *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); + else + *rcv_wnd = min(*rcv_wnd, init_cwnd * mss); } /* Set the clamp no higher than max representable value */ @@ -386,27 +385,30 @@ struct tcp_out_options { */ static u8 tcp_cookie_size_check(u8 desired) { - if (desired > 0) { + int cookie_size; + + if (desired > 0) /* previously specified */ return desired; - } - if (sysctl_tcp_cookie_size <= 0) { + + cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); + if (cookie_size <= 0) /* no default specified */ return 0; - } - if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) { + + if (cookie_size <= TCP_COOKIE_MIN) /* value too small, specify minimum */ return TCP_COOKIE_MIN; - } - if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) { + + if (cookie_size >= TCP_COOKIE_MAX) /* value too large, specify maximum */ return TCP_COOKIE_MAX; - } - if (0x1 & sysctl_tcp_cookie_size) { + + if (cookie_size & 1) /* 8-bit multiple, illegal, fix it */ - return (u8)(sysctl_tcp_cookie_size + 0x1); - } - return (u8)sysctl_tcp_cookie_size; + cookie_size++; + + return (u8)cookie_size; } /* Write previously computed TCP options to the packet. @@ -1516,6 +1518,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 send_win, cong_win, limit, in_flight; + int win_divisor; if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) goto send_now; @@ -1547,13 +1550,14 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) goto send_now; - if (sysctl_tcp_tso_win_divisor) { + win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor); + if (win_divisor) { u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); /* If at least some fraction of a window is available, * just use it. */ - chunk /= sysctl_tcp_tso_win_divisor; + chunk /= win_divisor; if (limit >= chunk) goto send_now; } else { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b1155554bb18..4f4483e697bd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1173,6 +1173,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) sizeof (struct ipv6hdr); dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu-=8; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1361,12 +1363,17 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { + struct ip6_tnl *t; + dev->netdev_ops = &ip6_tnl_netdev_ops; dev->destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr); + t = netdev_priv(dev); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu-=8; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index e18f84130203..2342545a5ee9 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1259,7 +1259,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; if (rt) - rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, + ra_msg->icmph.icmp6_hop_limit); } skip_defrtr: @@ -1377,7 +1378,7 @@ skip_linkparms: in6_dev->cnf.mtu6 = mtu; if (rt) - rt->dst.metrics[RTAX_MTU-1] = mtu; + dst_metric_set(&rt->dst, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 026caef0326c..4aed0812b512 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -129,7 +129,6 @@ static struct rt6_info ip6_null_entry_template = { .__use = 1, .obsolete = -1, .error = -ENETUNREACH, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, }, @@ -150,7 +149,6 @@ static struct rt6_info ip6_prohibit_entry_template = { .__use = 1, .obsolete = -1, .error = -EACCES, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, }, @@ -166,7 +164,6 @@ static struct rt6_info ip6_blk_hole_entry_template = { .__use = 1, .obsolete = -1, .error = -EINVAL, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = dst_discard, .output = dst_discard, }, @@ -844,7 +841,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + dst_copy_metrics(new, &ort->dst); new->dev = ort->dst.dev; if (new->dev) dev_hold(new->dev); @@ -928,10 +925,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { rt6->rt6i_flags |= RTF_MODIFIED; if (mtu < IPV6_MIN_MTU) { + u32 features = dst_metric(dst, RTAX_FEATURES); mtu = IPV6_MIN_MTU; - dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(dst, RTAX_FEATURES, features); } - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -989,9 +988,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->rt6i_idev = idev; rt->rt6i_nexthop = neigh; atomic_set(&rt->dst.__refcnt, 1); - rt->dst.metrics[RTAX_HOPLIMIT-1] = 255; - rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); + dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); + dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); rt->dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ @@ -1305,17 +1304,17 @@ install_route: goto out; } - rt->dst.metrics[type - 1] = nla_get_u32(nla); + dst_metric_set(&rt->dst, type, nla_get_u32(nla)); } } } if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) - rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); if (!dst_mtu(&rt->dst)) - rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); + dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev)); if (!dst_metric(&rt->dst, RTAX_ADVMSS)) - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -1541,9 +1540,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ - nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), - dst_mtu(&nrt->dst)); + dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev)); + dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev), + dst_mtu(&nrt->dst))); if (ip6_ins_rt(nrt)) goto out; @@ -1602,9 +1601,12 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, would return automatically. */ if (rt->rt6i_flags & RTF_CACHE) { - rt->dst.metrics[RTAX_MTU-1] = pmtu; - if (allfrag) - rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&rt->dst, RTAX_MTU, pmtu); + if (allfrag) { + u32 features = dst_metric(&rt->dst, RTAX_FEATURES); + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&rt->dst, RTAX_FEATURES, features); + } dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; @@ -1621,9 +1623,12 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, nrt = rt6_alloc_clone(rt, daddr); if (nrt) { - nrt->dst.metrics[RTAX_MTU-1] = pmtu; - if (allfrag) - nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); + if (allfrag) { + u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&nrt->dst, RTAX_FEATURES, features); + } /* According to RFC 1981, detecting PMTU increase shouldn't be * happened within 5 mins, the recommended timer is 10 mins. @@ -1674,7 +1679,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) rt->dst.input = ort->dst.input; rt->dst.output = ort->dst.output; - memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->dst.dev = ort->dst.dev; if (rt->dst.dev) @@ -1966,9 +1971,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.output = ip6_output; rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; - rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); - rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); + dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; @@ -2068,8 +2073,8 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) (dst_mtu(&rt->dst) >= arg->mtu || (dst_mtu(&rt->dst) < arg->mtu && dst_mtu(&rt->dst) == idev->cnf.mtu6))) { - rt->dst.metrics[RTAX_MTU-1] = arg->mtu; - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); + dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu)); } return 0; } @@ -2295,7 +2300,7 @@ static int rt6_fill_node(struct net *net, NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } - if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; if (rt->dst.neighbour) @@ -2686,6 +2691,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, @@ -2696,6 +2702,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_prohibit_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255); net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), @@ -2705,6 +2712,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_blk_hole_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255); #endif net->ipv6.sysctl.flush_delay = 0; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6e48a80d0f25..8ce38f10a547 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -606,8 +606,9 @@ static int ipip6_rcv(struct sk_buff *skb) return 0; } - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + /* no tunnel matched, let upstream know, ipsec may handle it */ rcu_read_unlock(); + return 1; out: kfree_skb(skb); return 0; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b541a4e009fb..7aad12770867 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -54,8 +54,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr; - __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + __be32 sk1_rcv_saddr = sk_rcv_saddr(sk); + __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 04635e88e8ed..110efb704c9b 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -672,4 +672,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); MODULE_DESCRIPTION("L2TP over IP"); MODULE_VERSION("1.0"); -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, SOCK_DGRAM, IPPROTO_L2TP); + +/* Use the value of SOCK_DGRAM (2) directory, because __stringify does't like + * enums + */ +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 582612998211..dfd3a648a551 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -316,9 +316,9 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) if (unlikely(addr->sllc_family != AF_LLC)) goto out; rc = -ENODEV; - rtnl_lock(); + rcu_read_lock(); if (sk->sk_bound_dev_if) { - llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); + llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); if (llc->dev) { if (!addr->sllc_arphrd) addr->sllc_arphrd = llc->dev->type; @@ -329,14 +329,15 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) !llc_mac_match(addr->sllc_mac, llc->dev->dev_addr)) { rc = -EINVAL; - dev_put(llc->dev); llc->dev = NULL; } } } else - llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, + llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); - rtnl_unlock(); + if (llc->dev) + dev_hold(llc->dev); + rcu_read_unlock(); if (!llc->dev) goto out; if (!addr->sllc_sap) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 422705d62b5b..246a04a13234 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -167,7 +167,6 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, #define PGV_FROM_VMALLOC 1 struct pgv { char *buffer; - unsigned char flags; }; struct packet_ring_buffer { @@ -224,6 +223,13 @@ struct packet_skb_cb { #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) +static inline __pure struct page *pgv_to_page(void *addr) +{ + if (is_vmalloc_addr(addr)) + return vmalloc_to_page(addr); + return virt_to_page(addr); +} + static void __packet_set_status(struct packet_sock *po, void *frame, int status) { union { @@ -236,11 +242,11 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) switch (po->tp_version) { case TPACKET_V1: h.h1->tp_status = status; - flush_dcache_page(virt_to_page(&h.h1->tp_status)); + flush_dcache_page(pgv_to_page(&h.h1->tp_status)); break; case TPACKET_V2: h.h2->tp_status = status; - flush_dcache_page(virt_to_page(&h.h2->tp_status)); + flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; default: pr_err("TPACKET version not supported\n"); @@ -263,10 +269,10 @@ static int __packet_get_status(struct packet_sock *po, void *frame) h.raw = frame; switch (po->tp_version) { case TPACKET_V1: - flush_dcache_page(virt_to_page(&h.h1->tp_status)); + flush_dcache_page(pgv_to_page(&h.h1->tp_status)); return h.h1->tp_status; case TPACKET_V2: - flush_dcache_page(virt_to_page(&h.h2->tp_status)); + flush_dcache_page(pgv_to_page(&h.h2->tp_status)); return h.h2->tp_status; default: pr_err("TPACKET version not supported\n"); @@ -511,7 +517,8 @@ out_free: return err; } -static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, +static inline unsigned int run_filter(const struct sk_buff *skb, + const struct sock *sk, unsigned int res) { struct sk_filter *filter; @@ -526,15 +533,15 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, } /* - This function makes lazy skb cloning in hope that most of packets - are discarded by BPF. - - Note tricky part: we DO mangle shared skb! skb->data, skb->len - and skb->cb are mangled. It works because (and until) packets - falling here are owned by current CPU. Output packets are cloned - by dev_queue_xmit_nit(), input packets are processed by net_bh - sequencially, so that if we return skb to original state on exit, - we will not harm anyone. + * This function makes lazy skb cloning in hope that most of packets + * are discarded by BPF. + * + * Note tricky part: we DO mangle shared skb! skb->data, skb->len + * and skb->cb are mangled. It works because (and until) packets + * falling here are owned by current CPU. Output packets are cloned + * by dev_queue_xmit_nit(), input packets are processed by net_bh + * sequencially, so that if we return skb to original state on exit, + * we will not harm anyone. */ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, @@ -560,11 +567,11 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, if (dev->header_ops) { /* The device has an explicit notion of ll header, - exported to higher levels. - - Otherwise, the device hides datails of it frame - structure, so that corresponding packet head - never delivered to user. + * exported to higher levels. + * + * Otherwise, the device hides details of its frame + * structure, so that corresponding packet head is + * never delivered to user. */ if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); @@ -799,17 +806,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, __packet_set_status(po, h.raw, status); smp_mb(); +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 { - struct page *p_start, *p_end; - u8 *h_end = h.raw + macoff + snaplen - 1; - - p_start = virt_to_page(h.raw); - p_end = virt_to_page(h_end); - while (p_start <= p_end) { - flush_dcache_page(p_start); - p_start++; - } + u8 *start, *end; + + end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); + for (start = h.raw; start < end; start += PAGE_SIZE) + flush_dcache_page(pgv_to_page(start)); } +#endif sk->sk_data_ready(sk, 0); @@ -915,7 +920,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, } err = -EFAULT; - page = virt_to_page(data); offset = offset_in_page(data); len_max = PAGE_SIZE - offset; len = ((to_write > len_max) ? len_max : to_write); @@ -934,11 +938,11 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return -EFAULT; } + page = pgv_to_page(data); + data += len; flush_dcache_page(page); get_page(page); - skb_fill_page_desc(skb, - nr_frags, - page++, offset, len); + skb_fill_page_desc(skb, nr_frags, page, offset, len); to_write -= len; offset = 0; len_max = PAGE_SIZE; @@ -2340,7 +2344,7 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order, for (i = 0; i < len; i++) { if (likely(pg_vec[i].buffer)) { - if (pg_vec[i].flags & PGV_FROM_VMALLOC) + if (is_vmalloc_addr(pg_vec[i].buffer)) vfree(pg_vec[i].buffer); else free_pages((unsigned long)pg_vec[i].buffer, @@ -2351,8 +2355,7 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order, kfree(pg_vec); } -static inline char *alloc_one_pg_vec_page(unsigned long order, - unsigned char *flags) +static inline char *alloc_one_pg_vec_page(unsigned long order) { char *buffer = NULL; gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | @@ -2366,7 +2369,6 @@ static inline char *alloc_one_pg_vec_page(unsigned long order, /* * __get_free_pages failed, fall back to vmalloc */ - *flags |= PGV_FROM_VMALLOC; buffer = vzalloc((1 << order) * PAGE_SIZE); if (buffer) @@ -2375,7 +2377,6 @@ static inline char *alloc_one_pg_vec_page(unsigned long order, /* * vmalloc failed, lets dig into swap here */ - *flags = 0; gfp_flags &= ~__GFP_NORETRY; buffer = (char *)__get_free_pages(gfp_flags, order); if (buffer) @@ -2398,8 +2399,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) goto out; for (i = 0; i < block_nr; i++) { - pg_vec[i].buffer = alloc_one_pg_vec_page(order, - &pg_vec[i].flags); + pg_vec[i].buffer = alloc_one_pg_vec_page(order); if (unlikely(!pg_vec[i].buffer)) goto out_free_pgvec; } @@ -2409,7 +2409,6 @@ out: out_free_pgvec: free_pg_vec(pg_vec, order, block_nr); - kfree(pg_vec); pg_vec = NULL; goto out; } @@ -2583,13 +2582,8 @@ static int packet_mmap(struct file *file, struct socket *sock, void *kaddr = rb->pg_vec[i].buffer; int pg_num; - for (pg_num = 0; pg_num < rb->pg_vec_pages; - pg_num++) { - if (rb->pg_vec[i].flags & PGV_FROM_VMALLOC) - page = vmalloc_to_page(kaddr); - else - page = virt_to_page(kaddr); - + for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) { + page = pgv_to_page(kaddr); err = vm_insert_page(vma, start, page); if (unlikely(err)) goto out; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6bd554323a34..842c7f3650b9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6047,7 +6047,7 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, * will suddenly eat the receive_queue. * * Look at current nfs client by the way... - * However, this function was corrent in any case. 8) + * However, this function was correct in any case. 8) */ if (flags & MSG_PEEK) { spin_lock_bh(&sk->sk_receive_queue.lock); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7ff31c60186a..417d7a6c36cf 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1344,9 +1344,25 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +#define MAX_RECURSION_LEVEL 4 + static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; + unsigned char max_level = 0; + int unix_sock_count = 0; + + for (i = scm->fp->count - 1; i >= 0; i--) { + struct sock *sk = unix_get_socket(scm->fp->fp[i]); + + if (sk) { + unix_sock_count++; + max_level = max(max_level, + unix_sk(sk)->recursion_level); + } + } + if (unlikely(max_level > MAX_RECURSION_LEVEL)) + return -ETOOMANYREFS; /* * Need to duplicate file references for the sake of garbage @@ -1357,9 +1373,11 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - for (i = scm->fp->count-1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - return 0; + if (unix_sock_count) { + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->fp[i]); + } + return max_level; } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) @@ -1394,6 +1412,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sk_buff *skb; long timeo; struct scm_cookie tmp_scm; + int max_level; if (NULL == siocb->scm) siocb->scm = &tmp_scm; @@ -1432,8 +1451,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; err = unix_scm_to_skb(siocb->scm, skb, true); - if (err) + if (err < 0) goto out_free; + max_level = err + 1; unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); @@ -1515,6 +1535,8 @@ restart: if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); skb_queue_tail(&other->sk_receive_queue, skb); + if (max_level > unix_sk(other)->recursion_level) + unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other, len); sock_put(other); @@ -1545,6 +1567,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, int sent = 0; struct scm_cookie tmp_scm; bool fds_sent = false; + int max_level; if (NULL == siocb->scm) siocb->scm = &tmp_scm; @@ -1608,10 +1631,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, /* Only send the fds in the first buffer */ err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); - if (err) { + if (err < 0) { kfree_skb(skb); goto out_err; } + max_level = err + 1; fds_sent = true; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); @@ -1627,6 +1651,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, goto pipe_err_free; skb_queue_tail(&other->sk_receive_queue, skb); + if (max_level > unix_sk(other)->recursion_level) + unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other, size); sent += size; @@ -1847,6 +1873,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, unix_state_lock(sk); skb = skb_dequeue(&sk->sk_receive_queue); if (skb == NULL) { + unix_sk(sk)->recursion_level = 0; if (copied >= target) goto unlock; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index c8df6fda0b1f..f89f83bf828e 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -96,7 +96,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); unsigned int unix_tot_inflight; -static struct sock *unix_get_socket(struct file *filp) +struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; struct inode *inode = filp->f_path.dentry->d_inode; @@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u) } static bool gc_in_progress = false; +#define UNIX_INFLIGHT_TRIGGER_GC 16000 void wait_for_unix_gc(void) { + /* + * If number of inflight sockets is insane, + * force a garbage collect right now. + */ + if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) + unix_gc(); wait_event(unix_gc_wait, gc_in_progress == false); } diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 4c81f6abb65b..4cbc942f762a 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -398,6 +398,7 @@ void __exit x25_link_free(void) list_for_each_safe(entry, tmp, &x25_neigh_list) { nb = list_entry(entry, struct x25_neigh, node); __x25_remove_neigh(nb); + dev_put(nb->dev); } write_unlock_bh(&x25_neigh_list_lock); } diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c index a2023ec52329..1e98bc0fe0a5 100644 --- a/net/xfrm/xfrm_hash.c +++ b/net/xfrm/xfrm_hash.c @@ -19,7 +19,7 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz) if (sz <= PAGE_SIZE) n = kzalloc(sz, GFP_KERNEL); else if (hashdist) - n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); + n = vzalloc(sz); else n = (struct hlist_head *) __get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 044e77898512..6e50ccd8c532 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1433,7 +1433,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, } xdst->route = dst; - memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics)); + dst_copy_metrics(dst1, dst); if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { family = xfrm[i]->props.family; @@ -2271,7 +2271,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) if (pmtu > route_mtu_cached) pmtu = route_mtu_cached; - dst->metrics[RTAX_MTU-1] = pmtu; + dst_metric_set(dst, RTAX_MTU, pmtu); } while ((dst = dst->next)); } @@ -2349,7 +2349,7 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, mtu = xfrm_state_mtu(dst->xfrm, mtu); if (mtu > last->route_mtu_cached) mtu = last->route_mtu_cached; - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); if (last == first) break; |