diff options
Diffstat (limited to 'net/core/net_namespace.c')
-rw-r--r-- | net/core/net_namespace.c | 100 |
1 files changed, 93 insertions, 7 deletions
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f4183c4c1ec8..4f7a61688d18 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -69,12 +69,15 @@ DEFINE_COOKIE(net_cookie); static struct net_generic *net_alloc_generic(void) { + unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs); + unsigned int generic_size; struct net_generic *ng; - unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); + + generic_size = offsetof(struct net_generic, ptr[gen_ptrs]); ng = kzalloc(generic_size, GFP_KERNEL); if (ng) - ng->s.len = max_gen_ptrs; + ng->s.len = gen_ptrs; return ng; } @@ -318,8 +321,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { /* Must be called with pernet_ops_rwsem held */ const struct pernet_operations *ops, *saved_ops; - int error = 0; LIST_HEAD(net_exit_list); + LIST_HEAD(dev_kill_list); + int error = 0; refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt"); @@ -358,6 +362,15 @@ out_undo: synchronize_rcu(); ops = saved_ops; + rtnl_lock(); + list_for_each_entry_continue_reverse(ops, &pernet_list, list) { + if (ops->exit_batch_rtnl) + ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + + ops = saved_ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); @@ -372,6 +385,10 @@ out_undo: static int __net_init net_defaults_init_net(struct net *net) { net->core.sysctl_somaxconn = SOMAXCONN; + /* Limits per socket sk_omem_alloc usage. + * TCP zerocopy regular usage needs 128 KB. + */ + net->core.sysctl_optmem_max = 128 * 1024; net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED; return 0; @@ -569,6 +586,7 @@ static void cleanup_net(struct work_struct *work) struct net *net, *tmp, *last; struct llist_node *net_kill_list; LIST_HEAD(net_exit_list); + LIST_HEAD(dev_kill_list); /* Atomically snapshot the list of namespaces to cleanup */ net_kill_list = llist_del_all(&cleanup_list); @@ -607,7 +625,15 @@ static void cleanup_net(struct work_struct *work) * the rcu_barrier() below isn't sufficient alone. * Also the pre_exit() and exit() methods need this barrier. */ - synchronize_rcu(); + synchronize_rcu_expedited(); + + rtnl_lock(); + list_for_each_entry_reverse(ops, &pernet_list, list) { + if (ops->exit_batch_rtnl) + ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); /* Run all of the network namespace exit methods */ list_for_each_entry_reverse(ops, &pernet_list, list) @@ -1067,7 +1093,7 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) end: if (net_cb.fillargs.add_ref) put_net(net_cb.tgt_net); - return err < 0 ? err : skb->len; + return err; } static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, @@ -1099,11 +1125,56 @@ out: rtnl_set_sk_err(net, RTNLGRP_NSID, err); } +#ifdef CONFIG_NET_NS +static void __init netns_ipv4_struct_check(void) +{ + /* TX readonly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_early_retrans); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_tso_win_divisor); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_tso_rtt_log); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_autocorking); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_min_snd_mss); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_notsent_lowat); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_limit_output_bytes); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_min_rtt_wlen); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_wmem); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_ip_fwd_use_pmtu); + CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, 33); + + /* TXRX readonly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx, + sysctl_tcp_moderate_rcvbuf); + CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, 1); + + /* RX readonly hotpath cache line */ + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_ip_early_demux); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_tcp_early_demux); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_tcp_reordering); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_tcp_rmem); + CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18); +} +#endif + void __init net_ns_init(void) { struct net_generic *ng; #ifdef CONFIG_NET_NS + netns_ipv4_struct_check(); net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, SLAB_PANIC|SLAB_ACCOUNT, NULL); @@ -1137,14 +1208,25 @@ void __init net_ns_init(void) rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, - RTNL_FLAG_DOIT_UNLOCKED); + RTNL_FLAG_DOIT_UNLOCKED | + RTNL_FLAG_DUMP_UNLOCKED); } static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) { ops_pre_exit_list(ops, net_exit_list); synchronize_rcu(); + + if (ops->exit_batch_rtnl) { + LIST_HEAD(dev_kill_list); + + rtnl_lock(); + ops->exit_batch_rtnl(net_exit_list, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + } ops_exit_list(ops, net_exit_list); + ops_free_list(ops, net_exit_list); } @@ -1229,7 +1311,11 @@ static int register_pernet_operations(struct list_head *list, if (error < 0) return error; *ops->id = error; - max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); + /* This does not require READ_ONCE as writers already hold + * pernet_ops_rwsem. But WRITE_ONCE is needed to protect + * net_alloc_generic. + */ + WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1)); } error = __register_pernet_operations(list, ops); if (error) { |