diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 30 | ||||
-rw-r--r-- | net/core/ethtool.c | 2 | ||||
-rw-r--r-- | net/core/fib_rules.c | 25 | ||||
-rw-r--r-- | net/core/filter.c | 277 | ||||
-rw-r--r-- | net/core/neighbour.c | 97 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 115 | ||||
-rw-r--r-- | net/core/net_namespace.c | 11 | ||||
-rw-r--r-- | net/core/request_sock.c | 35 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 21 | ||||
-rw-r--r-- | net/core/skbuff.c | 54 | ||||
-rw-r--r-- | net/core/sock.c | 101 | ||||
-rw-r--r-- | net/core/sock_diag.c | 37 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 2 |
13 files changed, 487 insertions, 320 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 962ee9d71964..5d43e010ef87 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1385,7 +1385,7 @@ static int __dev_close(struct net_device *dev) return retval; } -static int dev_close_many(struct list_head *head) +int dev_close_many(struct list_head *head, bool unlink) { struct net_device *dev, *tmp; @@ -1399,11 +1399,13 @@ static int dev_close_many(struct list_head *head) list_for_each_entry_safe(dev, tmp, head, close_list) { rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); call_netdevice_notifiers(NETDEV_DOWN, dev); - list_del_init(&dev->close_list); + if (unlink) + list_del_init(&dev->close_list); } return 0; } +EXPORT_SYMBOL(dev_close_many); /** * dev_close - shutdown an interface. @@ -1420,7 +1422,7 @@ int dev_close(struct net_device *dev) LIST_HEAD(single); list_add(&dev->close_list, &single); - dev_close_many(&single); + dev_close_many(&single, true); list_del(&single); } return 0; @@ -5912,6 +5914,24 @@ int dev_get_phys_port_id(struct net_device *dev, EXPORT_SYMBOL(dev_get_phys_port_id); /** + * dev_get_phys_port_name - Get device physical port name + * @dev: device + * @name: port name + * + * Get device physical port name + */ +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_get_phys_port_name) + return -EOPNOTSUPP; + return ops->ndo_get_phys_port_name(dev, name, len); +} +EXPORT_SYMBOL(dev_get_phys_port_name); + +/** * dev_new_index - allocate an ifindex * @net: the applicable net namespace * @@ -5968,7 +5988,7 @@ static void rollback_registered_many(struct list_head *head) /* If device is running, close it first. */ list_for_each_entry(dev, head, unreg_list) list_add_tail(&dev->close_list, &close_head); - dev_close_many(&close_head); + dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ @@ -6841,8 +6861,6 @@ void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; - release_net(dev_net(dev)); - netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS kvfree(dev->_rx); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index aa378ecef186..1d00b8922902 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -790,7 +790,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (ops->get_rxfh_indir_size) dev_indir_size = ops->get_rxfh_indir_size(dev); if (ops->get_rxfh_key_size) - dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev); + dev_key_size = ops->get_rxfh_key_size(dev); if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) return -EFAULT; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 44706e81b2e0..68ea6950cad1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -31,7 +31,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; - r->fr_net = hold_net(ops->fro_net); + r->fr_net = ops->fro_net; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; @@ -116,7 +116,6 @@ static int __fib_rules_register(struct fib_rules_ops *ops) if (ops->family == o->family) goto errout; - hold_net(net); list_add_tail_rcu(&ops->list, &net->rules_ops); err = 0; errout: @@ -160,15 +159,6 @@ static void fib_rules_cleanup_ops(struct fib_rules_ops *ops) } } -static void fib_rules_put_rcu(struct rcu_head *head) -{ - struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu); - struct net *net = ops->fro_net; - - release_net(net); - kfree(ops); -} - void fib_rules_unregister(struct fib_rules_ops *ops) { struct net *net = ops->fro_net; @@ -178,7 +168,7 @@ void fib_rules_unregister(struct fib_rules_ops *ops) fib_rules_cleanup_ops(ops); spin_unlock(&net->rules_mod_lock); - call_rcu(&ops->rcu, fib_rules_put_rcu); + kfree_rcu(ops, rcu); } EXPORT_SYMBOL_GPL(fib_rules_unregister); @@ -303,7 +293,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) err = -ENOMEM; goto errout; } - rule->fr_net = hold_net(net); + rule->fr_net = net; if (tb[FRA_PRIORITY]) rule->pref = nla_get_u32(tb[FRA_PRIORITY]); @@ -423,7 +413,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) return 0; errout_free: - release_net(rule->fr_net); kfree(rule); errout: rules_ops_put(ops); @@ -492,6 +481,12 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) goto errout; } + if (ops->delete) { + err = ops->delete(rule); + if (err) + goto errout; + } + list_del_rcu(&rule->list); if (rule->action == FR_ACT_GOTO) { @@ -517,8 +512,6 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); - if (ops->delete) - ops->delete(rule); fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); diff --git a/net/core/filter.c b/net/core/filter.c index f6bdc2b1ba01..bdaac5895def 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -150,10 +150,62 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return prandom_u32(); } +static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, + struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + + switch (skb_field) { + case SKF_AD_MARK: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, mark)); + break; + + case SKF_AD_PKTTYPE: + *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET()); + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX); +#ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5); +#endif + break; + + case SKF_AD_QUEUE: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, queue_mapping)); + break; + + case SKF_AD_VLAN_TAG: + case SKF_AD_VLAN_TAG_PRESENT: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + + /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */ + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, vlan_tci)); + if (skb_field == SKF_AD_VLAN_TAG) { + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, + ~VLAN_TAG_PRESENT); + } else { + /* dst_reg >>= 12 */ + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12); + /* dst_reg &= 1 */ + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1); + } + break; + } + + return insn - insn_buf; +} + static bool convert_bpf_extensions(struct sock_filter *fp, struct bpf_insn **insnp) { struct bpf_insn *insn = *insnp; + u32 cnt; switch (fp->k) { case SKF_AD_OFF + SKF_AD_PROTOCOL: @@ -167,13 +219,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_PKTTYPE: - *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, - PKT_TYPE_OFFSET()); - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); -#ifdef __BIG_ENDIAN_BITFIELD - insn++; - *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5); -#endif + cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_IFINDEX: @@ -197,10 +244,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_MARK: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - - *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, mark)); + cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_RXHASH: @@ -211,29 +256,20 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_QUEUE: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - - *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, queue_mapping)); + cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_VLAN_TAG: - case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + cnt = convert_skb_access(SKF_AD_VLAN_TAG, + BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; + break; - /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */ - *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, vlan_tci)); - if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, - ~VLAN_TAG_PRESENT); - } else { - /* A >>= 12 */ - *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12); - /* A &= 1 */ - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1); - } + case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: + cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, + BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_PAY_OFFSET: @@ -814,7 +850,7 @@ static void bpf_release_orig_filter(struct bpf_prog *fp) static void __bpf_prog_release(struct bpf_prog *prog) { - if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { + if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) { bpf_prog_put(prog); } else { bpf_release_orig_filter(prog); @@ -1019,6 +1055,32 @@ void bpf_prog_destroy(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(bpf_prog_destroy); +static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) +{ + struct sk_filter *fp, *old_fp; + + fp = kmalloc(sizeof(*fp), GFP_KERNEL); + if (!fp) + return -ENOMEM; + + fp->prog = prog; + atomic_set(&fp->refcnt, 0); + + if (!sk_filter_charge(sk, fp)) { + kfree(fp); + return -ENOMEM; + } + + old_fp = rcu_dereference_protected(sk->sk_filter, + sock_owned_by_user(sk)); + rcu_assign_pointer(sk->sk_filter, fp); + + if (old_fp) + sk_filter_uncharge(sk, old_fp); + + return 0; +} + /** * sk_attach_filter - attach a socket filter * @fprog: the filter program @@ -1031,7 +1093,6 @@ EXPORT_SYMBOL_GPL(bpf_prog_destroy); */ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { - struct sk_filter *fp, *old_fp; unsigned int fsize = bpf_classic_proglen(fprog); unsigned int bpf_fsize = bpf_prog_size(fprog->len); struct bpf_prog *prog; @@ -1068,36 +1129,20 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - fp = kmalloc(sizeof(*fp), GFP_KERNEL); - if (!fp) { + err = __sk_attach_prog(prog, sk); + if (err < 0) { __bpf_prog_release(prog); - return -ENOMEM; - } - fp->prog = prog; - - atomic_set(&fp->refcnt, 0); - - if (!sk_filter_charge(sk, fp)) { - __sk_filter_release(fp); - return -ENOMEM; + return err; } - old_fp = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); - rcu_assign_pointer(sk->sk_filter, fp); - - if (old_fp) - sk_filter_uncharge(sk, old_fp); - return 0; } EXPORT_SYMBOL_GPL(sk_attach_filter); -#ifdef CONFIG_BPF_SYSCALL int sk_attach_bpf(u32 ufd, struct sock *sk) { - struct sk_filter *fp, *old_fp; struct bpf_prog *prog; + int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; @@ -1106,40 +1151,22 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { - /* valid fd, but invalid program type */ + if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { bpf_prog_put(prog); return -EINVAL; } - fp = kmalloc(sizeof(*fp), GFP_KERNEL); - if (!fp) { + err = __sk_attach_prog(prog, sk); + if (err < 0) { bpf_prog_put(prog); - return -ENOMEM; - } - fp->prog = prog; - - atomic_set(&fp->refcnt, 0); - - if (!sk_filter_charge(sk, fp)) { - __sk_filter_release(fp); - return -ENOMEM; + return err; } - old_fp = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); - rcu_assign_pointer(sk->sk_filter, fp); - - if (old_fp) - sk_filter_uncharge(sk, old_fp); - return 0; } -/* allow socket filters to call - * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem() - */ -static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id) +static const struct bpf_func_proto * +sk_filter_func_proto(enum bpf_func_id func_id) { switch (func_id) { case BPF_FUNC_map_lookup_elem: @@ -1148,39 +1175,103 @@ static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; + case BPF_FUNC_get_prandom_u32: + return &bpf_get_prandom_u32_proto; + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; default: return NULL; } } -static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type) +static bool sk_filter_is_valid_access(int off, int size, + enum bpf_access_type type) { - /* skb fields cannot be accessed yet */ - return false; + /* only read is allowed */ + if (type != BPF_READ) + return false; + + /* check bounds */ + if (off < 0 || off >= sizeof(struct __sk_buff)) + return false; + + /* disallow misaligned access */ + if (off % size != 0) + return false; + + /* all __sk_buff fields are __u32 */ + if (size != 4) + return false; + + return true; } -static struct bpf_verifier_ops sock_filter_ops = { - .get_func_proto = sock_filter_func_proto, - .is_valid_access = sock_filter_is_valid_access, +static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, + struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct __sk_buff, len): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, len)); + break; + + case offsetof(struct __sk_buff, protocol): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, protocol)); + break; + + case offsetof(struct __sk_buff, mark): + return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, pkt_type): + return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, queue_mapping): + return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, vlan_present): + return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, + dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, vlan_tci): + return convert_skb_access(SKF_AD_VLAN_TAG, + dst_reg, src_reg, insn); + } + + return insn - insn_buf; +} + +static const struct bpf_verifier_ops sk_filter_ops = { + .get_func_proto = sk_filter_func_proto, + .is_valid_access = sk_filter_is_valid_access, + .convert_ctx_access = sk_filter_convert_ctx_access, }; -static struct bpf_prog_type_list tl = { - .ops = &sock_filter_ops, +static struct bpf_prog_type_list sk_filter_type __read_mostly = { + .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, }; -static int __init register_sock_filter_ops(void) +static struct bpf_prog_type_list sched_cls_type __read_mostly = { + .ops = &sk_filter_ops, + .type = BPF_PROG_TYPE_SCHED_CLS, +}; + +static int __init register_sk_filter_ops(void) { - bpf_register_prog_type(&tl); + bpf_register_prog_type(&sk_filter_type); + bpf_register_prog_type(&sched_cls_type); + return 0; } -late_initcall(register_sock_filter_ops); -#else -int sk_attach_bpf(u32 ufd, struct sock *sk) -{ - return -EOPNOTSUPP; -} -#endif +late_initcall(register_sk_filter_ops); + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 70fe9e10ac86..0e8b32efc031 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -397,25 +397,15 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { struct neighbour *n; - int key_len = tbl->key_len; - u32 hash_val; - struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, lookups); rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); - - for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); - n != NULL; - n = rcu_dereference_bh(n->next)) { - if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { - if (!atomic_inc_not_zero(&n->refcnt)) - n = NULL; - NEIGH_CACHE_STAT_INC(tbl, hits); - break; - } + n = __neigh_lookup_noref(tbl, pkey, dev); + if (n) { + if (!atomic_inc_not_zero(&n->refcnt)) + n = NULL; + NEIGH_CACHE_STAT_INC(tbl, hits); } rcu_read_unlock_bh(); @@ -601,7 +591,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (!n) goto out; - write_pnet(&n->net, hold_net(net)); + write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -610,7 +600,6 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (tbl->pconstructor && tbl->pconstructor(n)) { if (dev) dev_put(dev); - release_net(net); kfree(n); n = NULL; goto out; @@ -644,7 +633,6 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, tbl->pdestructor(n); if (n->dev) dev_put(n->dev); - release_net(pneigh_net(n)); kfree(n); return 0; } @@ -667,7 +655,6 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); - release_net(pneigh_net(n)); kfree(n); continue; } @@ -1263,10 +1250,10 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, EXPORT_SYMBOL(neigh_event_ns); /* called with read_lock_bh(&n->lock); */ -static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst) +static void neigh_hh_init(struct neighbour *n) { - struct net_device *dev = dst->dev; - __be16 prot = dst->ops->protocol; + struct net_device *dev = n->dev; + __be16 prot = n->tbl->protocol; struct hh_cache *hh = &n->hh; write_lock_bh(&n->lock); @@ -1280,43 +1267,19 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst) write_unlock_bh(&n->lock); } -/* This function can be used in contexts, where only old dev_queue_xmit - * worked, f.e. if you want to override normal output path (eql, shaper), - * but resolution is not made yet. - */ - -int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - - __skb_pull(skb, skb_network_offset(skb)); - - if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, - skb->len) < 0 && - dev_rebuild_header(skb)) - return 0; - - return dev_queue_xmit(skb); -} -EXPORT_SYMBOL(neigh_compat_output); - /* Slow and careful. */ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); int rc = 0; - if (!dst) - goto discard; - if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; unsigned int seq; if (dev->header_ops->cache && !neigh->hh.hh_len) - neigh_hh_init(neigh, dst); + neigh_hh_init(neigh); do { __skb_pull(skb, skb_network_offset(skb)); @@ -1332,8 +1295,6 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) } out: return rc; -discard: - neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); @@ -1464,11 +1425,10 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); dev_hold(dev); p->dev = dev; - write_pnet(&p->net, hold_net(net)); + write_pnet(&p->net, net); p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { - release_net(net); dev_put(dev); kfree(p); return NULL; @@ -1508,7 +1468,6 @@ EXPORT_SYMBOL(neigh_parms_release); static void neigh_parms_destroy(struct neigh_parms *parms) { - release_net(neigh_parms_net(parms)); kfree(parms); } @@ -2427,6 +2386,40 @@ void __neigh_for_each_release(struct neigh_table *tbl, } EXPORT_SYMBOL(__neigh_for_each_release); +int neigh_xmit(int index, struct net_device *dev, + const void *addr, struct sk_buff *skb) +{ + int err = -EAFNOSUPPORT; + if (likely(index < NEIGH_NR_TABLES)) { + struct neigh_table *tbl; + struct neighbour *neigh; + + tbl = neigh_tables[index]; + if (!tbl) + goto out; + neigh = __neigh_lookup_noref(tbl, addr, dev); + if (!neigh) + neigh = __neigh_create(tbl, addr, dev, false); + err = PTR_ERR(neigh); + if (IS_ERR(neigh)) + goto out_kfree_skb; + err = neigh->output(neigh, skb); + } + else if (index == NEIGH_LINK_TABLE) { + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + addr, NULL, skb->len); + if (err < 0) + goto out_kfree_skb; + err = dev_queue_xmit(skb); + } +out: + return err; +out_kfree_skb: + kfree_skb(skb); + goto out; +} +EXPORT_SYMBOL(neigh_xmit); + #ifdef CONFIG_PROC_FS static struct neighbour *neigh_get_first(struct seq_file *seq) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f2aa73bfb0e4..cc5cf689809c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -23,6 +23,7 @@ #include <linux/export.h> #include <linux/jiffies.h> #include <linux/pm_runtime.h> +#include <linux/of.h> #include "net-sysfs.h" @@ -417,6 +418,28 @@ static ssize_t phys_port_id_show(struct device *dev, } static DEVICE_ATTR_RO(phys_port_id); +static ssize_t phys_port_name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + ssize_t ret = -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (dev_isalive(netdev)) { + char name[IFNAMSIZ]; + + ret = dev_get_phys_port_name(netdev, name, sizeof(name)); + if (!ret) + ret = sprintf(buf, "%s\n", name); + } + rtnl_unlock(); + + return ret; +} +static DEVICE_ATTR_RO(phys_port_name); + static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -464,6 +487,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_tx_queue_len.attr, &dev_attr_gro_flush_timeout.attr, &dev_attr_phys_port_id.attr, + &dev_attr_phys_port_name.attr, &dev_attr_phys_switch_id.attr, NULL, }; @@ -950,6 +974,60 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue, return sprintf(buf, "%lu", trans_timeout); } +#ifdef CONFIG_XPS +static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) +{ + struct net_device *dev = queue->dev; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) + if (queue == &dev->_tx[i]) + break; + + BUG_ON(i >= dev->num_tx_queues); + + return i; +} + +static ssize_t show_tx_maxrate(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + char *buf) +{ + return sprintf(buf, "%lu\n", queue->tx_maxrate); +} + +static ssize_t set_tx_maxrate(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct net_device *dev = queue->dev; + int err, index = get_netdev_queue_index(queue); + u32 rate = 0; + + err = kstrtou32(buf, 10, &rate); + if (err < 0) + return err; + + if (!rtnl_trylock()) + return restart_syscall(); + + err = -EOPNOTSUPP; + if (dev->netdev_ops->ndo_set_tx_maxrate) + err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); + + rtnl_unlock(); + if (!err) { + queue->tx_maxrate = rate; + return len; + } + return err; +} + +static struct netdev_queue_attribute queue_tx_maxrate = + __ATTR(tx_maxrate, S_IRUGO | S_IWUSR, + show_tx_maxrate, set_tx_maxrate); +#endif + static struct netdev_queue_attribute queue_trans_timeout = __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); @@ -1064,18 +1142,6 @@ static struct attribute_group dql_group = { #endif /* CONFIG_BQL */ #ifdef CONFIG_XPS -static unsigned int get_netdev_queue_index(struct netdev_queue *queue) -{ - struct net_device *dev = queue->dev; - unsigned int i; - - i = queue - dev->_tx; - BUG_ON(i >= dev->num_tx_queues); - - return i; -} - - static ssize_t show_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, char *buf) { @@ -1152,6 +1218,7 @@ static struct attribute *netdev_queue_default_attrs[] = { &queue_trans_timeout.attr, #ifdef CONFIG_XPS &xps_cpus_attribute.attr, + &queue_tx_maxrate.attr, #endif NULL }; @@ -1374,6 +1441,30 @@ static struct class net_class = { .namespace = net_namespace, }; +#ifdef CONFIG_OF_NET +static int of_dev_node_match(struct device *dev, const void *data) +{ + int ret = 0; + + if (dev->parent) + ret = dev->parent->of_node == data; + + return ret == 0 ? dev->of_node == data : ret; +} + +struct net_device *of_find_net_device_by_node(struct device_node *np) +{ + struct device *dev; + + dev = class_find_device(&net_class, NULL, np, of_dev_node_match); + if (!dev) + return NULL; + + return to_net_dev(dev); +} +EXPORT_SYMBOL(of_find_net_device_by_node); +#endif + /* Delete sysfs entries but hold kobject reference until after all * netdev references are gone. */ diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index cb5290b8c428..e5e96b0f6717 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -236,10 +236,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) net->user_ns = user_ns; idr_init(&net->netns_ids); -#ifdef NETNS_REFCNT_DEBUG - atomic_set(&net->use_count, 0); -#endif - list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); if (error < 0) @@ -294,13 +290,6 @@ out_free: static void net_free(struct net *net) { -#ifdef NETNS_REFCNT_DEBUG - if (unlikely(atomic_read(&net->use_count) != 0)) { - pr_emerg("network namespace not free! Usage: %d\n", - atomic_read(&net->use_count)); - return; - } -#endif kfree(rcu_access_pointer(net->gen)); kmem_cache_free(net_cachep, net); } diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 04db318e6218..cdc0ddd9ac9f 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -94,21 +94,26 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) /* make all the listen_opt local to us */ struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); - if (lopt->qlen != 0) { + if (listen_sock_qlen(lopt) != 0) { unsigned int i; for (i = 0; i < lopt->nr_table_entries; i++) { struct request_sock *req; + write_lock_bh(&queue->syn_wait_lock); while ((req = lopt->syn_table[i]) != NULL) { lopt->syn_table[i] = req->dl_next; - lopt->qlen--; - reqsk_free(req); + atomic_inc(&lopt->qlen_dec); + if (del_timer(&req->rsk_timer)) + reqsk_put(req); + reqsk_put(req); } + write_unlock_bh(&queue->syn_wait_lock); } } - WARN_ON(lopt->qlen != 0); + if (WARN_ON(listen_sock_qlen(lopt) != 0)) + pr_err("qlen %u\n", listen_sock_qlen(lopt)); kvfree(lopt); } @@ -153,24 +158,22 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) * case might also exist in tcp_v4_hnd_req() that will trigger this locking * order. * - * When a TFO req is created, it needs to sock_hold its listener to prevent - * the latter data structure from going away. - * - * This function also sets "treq->listener" to NULL and unreference listener - * socket. treq->listener is used by the listener so it is protected by the + * This function also sets "treq->tfo_listener" to false. + * treq->tfo_listener is used by the listener so it is protected by the * fastopenq->lock in this function. */ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, bool reset) { - struct sock *lsk = tcp_rsk(req)->listener; - struct fastopen_queue *fastopenq = - inet_csk(lsk)->icsk_accept_queue.fastopenq; + struct sock *lsk = req->rsk_listener; + struct fastopen_queue *fastopenq; + + fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq; tcp_sk(sk)->fastopen_rsk = NULL; spin_lock_bh(&fastopenq->lock); fastopenq->qlen--; - tcp_rsk(req)->listener = NULL; + tcp_rsk(req)->tfo_listener = false; if (req->sk) /* the child socket hasn't been accepted yet */ goto out; @@ -179,8 +182,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, * special RST handling below. */ spin_unlock_bh(&fastopenq->lock); - sock_put(lsk); - reqsk_free(req); + reqsk_put(req); return; } /* Wait for 60secs before removing a req that has triggered RST. @@ -190,7 +192,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, * * For more details see CoNext'11 "TCP Fast Open" paper. */ - req->expires = jiffies + 60*HZ; + req->rsk_timer.expires = jiffies + 60*HZ; if (fastopenq->rskq_rst_head == NULL) fastopenq->rskq_rst_head = req; else @@ -201,5 +203,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, fastopenq->qlen++; out: spin_unlock_bh(&fastopenq->lock); - sock_put(lsk); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ee0608bb3bc0..e7695104dbf0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -982,6 +982,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev) return 0; } +static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) +{ + char name[IFNAMSIZ]; + int err; + + err = dev_get_phys_port_name(dev, name, sizeof(name)); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name)) + return -EMSGSIZE; + + return 0; +} + static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; @@ -1072,6 +1090,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_phys_port_id_fill(skb, dev)) goto nla_put_failure; + if (rtnl_phys_port_name_fill(skb, dev)) + goto nla_put_failure; + if (rtnl_phys_switch_id_fill(skb, dev)) goto nla_put_failure; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8e4ac97c8477..cdb939b731aa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2865,7 +2865,6 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) * @from: search offset * @to: search limit * @config: textsearch configuration - * @state: uninitialized textsearch state variable * * Finds a pattern in the skb data according to the specified * textsearch configuration. Use textsearch_next() to retrieve @@ -2873,17 +2872,17 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) * to the first occurrence or UINT_MAX if no match was found. */ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, - unsigned int to, struct ts_config *config, - struct ts_state *state) + unsigned int to, struct ts_config *config) { + struct ts_state state; unsigned int ret; config->get_next_block = skb_ts_get_next_block; config->finish = skb_ts_finish; - skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); + skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state)); - ret = textsearch_find(config, state); + ret = textsearch_find(config, &state); return (ret <= to - from ? ret : UINT_MAX); } EXPORT_SYMBOL(skb_find_text); @@ -3207,10 +3206,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb); unsigned int offset = skb_gro_offset(skb); unsigned int headlen = skb_headlen(skb); - struct sk_buff *nskb, *lp, *p = *head; unsigned int len = skb_gro_len(skb); + struct sk_buff *lp, *p = *head; unsigned int delta_truesize; - unsigned int headroom; if (unlikely(p->len + len >= 65536)) return -E2BIG; @@ -3277,48 +3275,6 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; } - /* switch back to head shinfo */ - pinfo = skb_shinfo(p); - - if (pinfo->frag_list) - goto merge; - if (skb_gro_len(p) != pinfo->gso_size) - return -E2BIG; - - headroom = skb_headroom(p); - nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); - if (unlikely(!nskb)) - return -ENOMEM; - - __copy_skb_header(nskb, p); - nskb->mac_len = p->mac_len; - - skb_reserve(nskb, headroom); - __skb_put(nskb, skb_gro_offset(p)); - - skb_set_mac_header(nskb, skb_mac_header(p) - p->data); - skb_set_network_header(nskb, skb_network_offset(p)); - skb_set_transport_header(nskb, skb_transport_offset(p)); - - __skb_pull(p, skb_gro_offset(p)); - memcpy(skb_mac_header(nskb), skb_mac_header(p), - p->data - skb_mac_header(p)); - - skb_shinfo(nskb)->frag_list = p; - skb_shinfo(nskb)->gso_size = pinfo->gso_size; - pinfo->gso_size = 0; - __skb_header_release(p); - NAPI_GRO_CB(nskb)->last = p; - - nskb->data_len += p->len; - nskb->truesize += p->truesize; - nskb->len += p->len; - - *head = nskb; - nskb->next = p->next; - p->next = NULL; - - p = nskb; merge: delta_truesize = skb->truesize; diff --git a/net/core/sock.c b/net/core/sock.c index 78e89eb7eb70..841108b5649f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -466,7 +466,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) skb_dst_force(skb); spin_lock_irqsave(&list->lock, flags); - skb->dropcount = atomic_read(&sk->sk_drops); + sock_skb_set_dropcount(sk, skb); __skb_queue_tail(list, skb); spin_unlock_irqrestore(&list->lock, flags); @@ -1454,9 +1454,8 @@ void sk_release_kernel(struct sock *sk) return; sock_hold(sk); - sock_release(sk->sk_socket); - release_net(sock_net(sk)); sock_net_set(sk, get_net(&init_net)); + sock_release(sk->sk_socket); sock_put(sk); } EXPORT_SYMBOL(sk_release_kernel); @@ -1538,6 +1537,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_err = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); + atomic64_set(&newsk->sk_cookie, 0); /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) @@ -1665,19 +1665,6 @@ void sock_efree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_efree); -#ifdef CONFIG_INET -void sock_edemux(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - if (sk->sk_state == TCP_TIME_WAIT) - inet_twsk_put(inet_twsk(sk)); - else - sock_put(sk); -} -EXPORT_SYMBOL(sock_edemux); -#endif - kuid_t sock_i_uid(struct sock *sk) { kuid_t uid; @@ -2167,15 +2154,14 @@ int sock_no_getsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(sock_no_getsockopt); -int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t len) +int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_sendmsg); -int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t len, int flags) +int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len, + int flags) { return -EOPNOTSUPP; } @@ -2547,14 +2533,14 @@ int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, EXPORT_SYMBOL(compat_sock_common_getsockopt); #endif -int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; int addr_len = 0; int err; - err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, + err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; @@ -2731,6 +2717,42 @@ static inline void release_proto_idx(struct proto *prot) } #endif +static void req_prot_cleanup(struct request_sock_ops *rsk_prot) +{ + if (!rsk_prot) + return; + kfree(rsk_prot->slab_name); + rsk_prot->slab_name = NULL; + if (rsk_prot->slab) { + kmem_cache_destroy(rsk_prot->slab); + rsk_prot->slab = NULL; + } +} + +static int req_prot_init(const struct proto *prot) +{ + struct request_sock_ops *rsk_prot = prot->rsk_prot; + + if (!rsk_prot) + return 0; + + rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", + prot->name); + if (!rsk_prot->slab_name) + return -ENOMEM; + + rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, + rsk_prot->obj_size, 0, + 0, NULL); + + if (!rsk_prot->slab) { + pr_crit("%s: Can't create request sock SLAB cache!\n", + prot->name); + return -ENOMEM; + } + return 0; +} + int proto_register(struct proto *prot, int alloc_slab) { if (alloc_slab) { @@ -2744,21 +2766,8 @@ int proto_register(struct proto *prot, int alloc_slab) goto out; } - if (prot->rsk_prot != NULL) { - prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name); - if (prot->rsk_prot->slab_name == NULL) - goto out_free_sock_slab; - - prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, - prot->rsk_prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL); - - if (prot->rsk_prot->slab == NULL) { - pr_crit("%s: Can't create request sock SLAB cache!\n", - prot->name); - goto out_free_request_sock_slab_name; - } - } + if (req_prot_init(prot)) + goto out_free_request_sock_slab; if (prot->twsk_prot != NULL) { prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name); @@ -2787,14 +2796,8 @@ int proto_register(struct proto *prot, int alloc_slab) out_free_timewait_sock_slab_name: kfree(prot->twsk_prot->twsk_slab_name); out_free_request_sock_slab: - if (prot->rsk_prot && prot->rsk_prot->slab) { - kmem_cache_destroy(prot->rsk_prot->slab); - prot->rsk_prot->slab = NULL; - } -out_free_request_sock_slab_name: - if (prot->rsk_prot) - kfree(prot->rsk_prot->slab_name); -out_free_sock_slab: + req_prot_cleanup(prot->rsk_prot); + kmem_cache_destroy(prot->slab); prot->slab = NULL; out: @@ -2814,11 +2817,7 @@ void proto_unregister(struct proto *prot) prot->slab = NULL; } - if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { - kmem_cache_destroy(prot->rsk_prot->slab); - kfree(prot->rsk_prot->slab_name); - prot->rsk_prot->slab = NULL; - } + req_prot_cleanup(prot->rsk_prot); if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { kmem_cache_destroy(prot->twsk_prot->twsk_slab); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index ad704c757bb4..74dddf84adcd 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -13,22 +13,39 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); -int sock_diag_check_cookie(void *sk, __u32 *cookie) +static u64 sock_gen_cookie(struct sock *sk) { - if ((cookie[0] != INET_DIAG_NOCOOKIE || - cookie[1] != INET_DIAG_NOCOOKIE) && - ((u32)(unsigned long)sk != cookie[0] || - (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1])) - return -ESTALE; - else + while (1) { + u64 res = atomic64_read(&sk->sk_cookie); + + if (res) + return res; + res = atomic64_inc_return(&sock_net(sk)->cookie_gen); + atomic64_cmpxchg(&sk->sk_cookie, 0, res); + } +} + +int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie) +{ + u64 res; + + if (cookie[0] == INET_DIAG_NOCOOKIE && cookie[1] == INET_DIAG_NOCOOKIE) return 0; + + res = sock_gen_cookie(sk); + if ((u32)res != cookie[0] || (u32)(res >> 32) != cookie[1]) + return -ESTALE; + + return 0; } EXPORT_SYMBOL_GPL(sock_diag_check_cookie); -void sock_diag_save_cookie(void *sk, __u32 *cookie) +void sock_diag_save_cookie(struct sock *sk, __u32 *cookie) { - cookie[0] = (u32)(unsigned long)sk; - cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); + u64 res = sock_gen_cookie(sk); + + cookie[0] = (u32)res; + cookie[1] = (u32)(res >> 32); } EXPORT_SYMBOL_GPL(sock_diag_save_cookie); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8ce351ffceb1..95b6139d710c 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -24,7 +24,6 @@ static int zero = 0; static int one = 1; -static int ushort_max = USHRT_MAX; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; @@ -403,7 +402,6 @@ static struct ctl_table netns_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .extra1 = &zero, - .extra2 = &ushort_max, .proc_handler = proc_dointvec_minmax }, { } |