diff options
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 108 |
1 files changed, 78 insertions, 30 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index bbf7c2cf15b4..e830c1006935 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -58,6 +58,8 @@ #include <linux/memcontrol.h> #include <linux/static_key.h> #include <linux/sched.h> +#include <linux/wait.h> +#include <linux/cgroup-defs.h> #include <linux/filter.h> #include <linux/rculist_nulls.h> @@ -254,7 +256,6 @@ struct cg_proto; * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux * @sk_dst_cache: destination cache - * @sk_dst_lock: destination cache lock * @sk_policy: flow policy * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed @@ -288,7 +289,6 @@ struct cg_proto; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_priority: %SO_PRIORITY setting - * @sk_cgrp_prioidx: socket group's priority map index * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family * @sk_peer_pid: &struct pid for this socket's peer @@ -309,7 +309,7 @@ struct cg_proto; * @sk_send_head: front of stuff to transmit * @sk_security: used by security modules * @sk_mark: generic packet mark - * @sk_classid: this socket's cgroup classid + * @sk_cgrp_data: cgroup data for this cgroup * @sk_cgrp: this socket's cgroup-specific proto data * @sk_write_pending: a write to stream socket waits to start * @sk_state_change: callback to indicate change in the state of the sock @@ -318,6 +318,7 @@ struct cg_proto; * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) * @sk_backlog_rcv: callback to process the backlog * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 + * @sk_reuseport_cb: reuseport group container */ struct sock { /* @@ -384,14 +385,16 @@ struct sock { int sk_rcvbuf; struct sk_filter __rcu *sk_filter; - struct socket_wq __rcu *sk_wq; - + union { + struct socket_wq __rcu *sk_wq; + struct socket_wq *sk_wq_raw; + }; #ifdef CONFIG_XFRM - struct xfrm_policy *sk_policy[2]; + struct xfrm_policy __rcu *sk_policy[2]; #endif struct dst_entry *sk_rx_dst; struct dst_entry __rcu *sk_dst_cache; - spinlock_t sk_dst_lock; + /* Note: 32bit hole on 64bit arches */ atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; @@ -403,6 +406,7 @@ struct sock { sk_userlocks : 4, sk_protocol : 8, sk_type : 16; +#define SK_PROTOCOL_MAX U8_MAX kmemcheck_bitfield_end(flags); int sk_wmem_queued; gfp_t sk_allocation; @@ -423,9 +427,7 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; __u32 sk_priority; -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) - __u32 sk_cgrp_prioidx; -#endif + __u32 sk_mark; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; @@ -443,10 +445,7 @@ struct sock { #ifdef CONFIG_SECURITY void *sk_security; #endif - __u32 sk_mark; -#ifdef CONFIG_CGROUP_NET_CLASSID - u32 sk_classid; -#endif + struct sock_cgroup_data sk_cgrp_data; struct cg_proto *sk_cgrp; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk); @@ -455,6 +454,7 @@ struct sock { int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); void (*sk_destruct)(struct sock *sk); + struct sock_reuseport __rcu *sk_reuseport_cb; }; #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) @@ -739,6 +739,8 @@ enum sock_flags { SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ }; +#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) + static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) { nsk->sk_flags = osk->sk_flags; @@ -774,9 +776,9 @@ static inline int sk_memalloc_socks(void) #endif -static inline gfp_t sk_gfp_atomic(const struct sock *sk, gfp_t gfp_mask) +static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask) { - return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC); + return gfp_mask | (sk->sk_allocation & __GFP_MEMALLOC); } static inline void sk_acceptq_removed(struct sock *sk) @@ -813,7 +815,7 @@ void sk_stream_write_space(struct sock *sk); static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) { /* dont let skb dst not refcounted, we are going to leave rcu lock */ - skb_dst_force(skb); + skb_dst_force_safe(skb); if (!sk->sk_backlog.tail) sk->sk_backlog.head = skb; @@ -1063,6 +1065,7 @@ struct proto { void (*destroy_cgroup)(struct mem_cgroup *memcg); struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); #endif + int (*diag_destroy)(struct sock *sk, int err); }; int proto_register(struct proto *prot, int alloc_slab); @@ -1794,6 +1797,15 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) sk->sk_route_caps &= ~flags; } +static inline bool sk_check_csum_caps(struct sock *sk) +{ + return (sk->sk_route_caps & NETIF_F_HW_CSUM) || + (sk->sk_family == PF_INET && + (sk->sk_route_caps & NETIF_F_IP_CSUM)) || + (sk->sk_family == PF_INET6 && + (sk->sk_route_caps & NETIF_F_IPV6_CSUM)); +} + static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, struct iov_iter *from, char *to, int copy, int offset) @@ -1879,12 +1891,12 @@ static inline bool sk_has_allocations(const struct sock *sk) } /** - * wq_has_sleeper - check if there are any waiting processes + * skwq_has_sleeper - check if there are any waiting processes * @wq: struct socket_wq * * Returns true if socket_wq has waiting processes * - * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory + * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory * barrier call. They were added due to the race found within the tcp code. * * Consider following tcp code paths: @@ -1910,15 +1922,9 @@ static inline bool sk_has_allocations(const struct sock *sk) * data on the socket. * */ -static inline bool wq_has_sleeper(struct socket_wq *wq) +static inline bool skwq_has_sleeper(struct socket_wq *wq) { - /* We need to be sure we are in sync with the - * add_wait_queue modifications to the wait queue. - * - * This memory barrier is paired in the sock_poll_wait. - */ - smp_mb(); - return wq && waitqueue_active(&wq->wait); + return wq && wq_has_sleeper(&wq->wait); } /** @@ -2005,10 +2011,27 @@ static inline unsigned long sock_wspace(struct sock *sk) return amt; } -static inline void sk_wake_async(struct sock *sk, int how, int band) +/* Note: + * We use sk->sk_wq_raw, from contexts knowing this + * pointer is not NULL and cannot disappear/change. + */ +static inline void sk_set_bit(int nr, struct sock *sk) +{ + set_bit(nr, &sk->sk_wq_raw->flags); +} + +static inline void sk_clear_bit(int nr, struct sock *sk) { - if (sock_flag(sk, SOCK_FASYNC)) - sock_wake_async(sk->sk_socket, how, band); + clear_bit(nr, &sk->sk_wq_raw->flags); +} + +static inline void sk_wake_async(const struct sock *sk, int how, int band) +{ + if (sock_flag(sk, SOCK_FASYNC)) { + rcu_read_lock(); + sock_wake_async(rcu_dereference(sk->sk_wq), how, band); + rcu_read_unlock(); + } } /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might @@ -2226,6 +2249,31 @@ static inline bool sk_listener(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); } +/** + * sk_state_load - read sk->sk_state for lockless contexts + * @sk: socket pointer + * + * Paired with sk_state_store(). Used in places we do not hold socket lock : + * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... + */ +static inline int sk_state_load(const struct sock *sk) +{ + return smp_load_acquire(&sk->sk_state); +} + +/** + * sk_state_store - update sk->sk_state + * @sk: socket pointer + * @newstate: new state + * + * Paired with sk_state_load(). Should be used in contexts where + * state change might impact lockless readers. + */ +static inline void sk_state_store(struct sock *sk, int newstate) +{ + smp_store_release(&sk->sk_state, newstate); +} + void sock_enable_timestamp(struct sock *sk, int flag); int sock_get_timestamp(struct sock *, struct timeval __user *); int sock_get_timestampns(struct sock *, struct timespec __user *); |