aboutsummaryrefslogtreecommitdiff
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h108
1 files changed, 78 insertions, 30 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index bbf7c2cf15b4..e830c1006935 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -58,6 +58,8 @@
#include <linux/memcontrol.h>
#include <linux/static_key.h>
#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/cgroup-defs.h>
#include <linux/filter.h>
#include <linux/rculist_nulls.h>
@@ -254,7 +256,6 @@ struct cg_proto;
* @sk_wq: sock wait queue and async head
* @sk_rx_dst: receive input route used by early demux
* @sk_dst_cache: destination cache
- * @sk_dst_lock: destination cache lock
* @sk_policy: flow policy
* @sk_receive_queue: incoming packets
* @sk_wmem_alloc: transmit queue bytes committed
@@ -288,7 +289,6 @@ struct cg_proto;
* @sk_ack_backlog: current listen backlog
* @sk_max_ack_backlog: listen backlog set in listen()
* @sk_priority: %SO_PRIORITY setting
- * @sk_cgrp_prioidx: socket group's priority map index
* @sk_type: socket type (%SOCK_STREAM, etc)
* @sk_protocol: which protocol this socket belongs in this network family
* @sk_peer_pid: &struct pid for this socket's peer
@@ -309,7 +309,7 @@ struct cg_proto;
* @sk_send_head: front of stuff to transmit
* @sk_security: used by security modules
* @sk_mark: generic packet mark
- * @sk_classid: this socket's cgroup classid
+ * @sk_cgrp_data: cgroup data for this cgroup
* @sk_cgrp: this socket's cgroup-specific proto data
* @sk_write_pending: a write to stream socket waits to start
* @sk_state_change: callback to indicate change in the state of the sock
@@ -318,6 +318,7 @@ struct cg_proto;
* @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE)
* @sk_backlog_rcv: callback to process the backlog
* @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
+ * @sk_reuseport_cb: reuseport group container
*/
struct sock {
/*
@@ -384,14 +385,16 @@ struct sock {
int sk_rcvbuf;
struct sk_filter __rcu *sk_filter;
- struct socket_wq __rcu *sk_wq;
-
+ union {
+ struct socket_wq __rcu *sk_wq;
+ struct socket_wq *sk_wq_raw;
+ };
#ifdef CONFIG_XFRM
- struct xfrm_policy *sk_policy[2];
+ struct xfrm_policy __rcu *sk_policy[2];
#endif
struct dst_entry *sk_rx_dst;
struct dst_entry __rcu *sk_dst_cache;
- spinlock_t sk_dst_lock;
+ /* Note: 32bit hole on 64bit arches */
atomic_t sk_wmem_alloc;
atomic_t sk_omem_alloc;
int sk_sndbuf;
@@ -403,6 +406,7 @@ struct sock {
sk_userlocks : 4,
sk_protocol : 8,
sk_type : 16;
+#define SK_PROTOCOL_MAX U8_MAX
kmemcheck_bitfield_end(flags);
int sk_wmem_queued;
gfp_t sk_allocation;
@@ -423,9 +427,7 @@ struct sock {
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
__u32 sk_priority;
-#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
- __u32 sk_cgrp_prioidx;
-#endif
+ __u32 sk_mark;
struct pid *sk_peer_pid;
const struct cred *sk_peer_cred;
long sk_rcvtimeo;
@@ -443,10 +445,7 @@ struct sock {
#ifdef CONFIG_SECURITY
void *sk_security;
#endif
- __u32 sk_mark;
-#ifdef CONFIG_CGROUP_NET_CLASSID
- u32 sk_classid;
-#endif
+ struct sock_cgroup_data sk_cgrp_data;
struct cg_proto *sk_cgrp;
void (*sk_state_change)(struct sock *sk);
void (*sk_data_ready)(struct sock *sk);
@@ -455,6 +454,7 @@ struct sock {
int (*sk_backlog_rcv)(struct sock *sk,
struct sk_buff *skb);
void (*sk_destruct)(struct sock *sk);
+ struct sock_reuseport __rcu *sk_reuseport_cb;
};
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
@@ -739,6 +739,8 @@ enum sock_flags {
SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
};
+#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
+
static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
{
nsk->sk_flags = osk->sk_flags;
@@ -774,9 +776,9 @@ static inline int sk_memalloc_socks(void)
#endif
-static inline gfp_t sk_gfp_atomic(const struct sock *sk, gfp_t gfp_mask)
+static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask)
{
- return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC);
+ return gfp_mask | (sk->sk_allocation & __GFP_MEMALLOC);
}
static inline void sk_acceptq_removed(struct sock *sk)
@@ -813,7 +815,7 @@ void sk_stream_write_space(struct sock *sk);
static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
{
/* dont let skb dst not refcounted, we are going to leave rcu lock */
- skb_dst_force(skb);
+ skb_dst_force_safe(skb);
if (!sk->sk_backlog.tail)
sk->sk_backlog.head = skb;
@@ -1063,6 +1065,7 @@ struct proto {
void (*destroy_cgroup)(struct mem_cgroup *memcg);
struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg);
#endif
+ int (*diag_destroy)(struct sock *sk, int err);
};
int proto_register(struct proto *prot, int alloc_slab);
@@ -1794,6 +1797,15 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
sk->sk_route_caps &= ~flags;
}
+static inline bool sk_check_csum_caps(struct sock *sk)
+{
+ return (sk->sk_route_caps & NETIF_F_HW_CSUM) ||
+ (sk->sk_family == PF_INET &&
+ (sk->sk_route_caps & NETIF_F_IP_CSUM)) ||
+ (sk->sk_family == PF_INET6 &&
+ (sk->sk_route_caps & NETIF_F_IPV6_CSUM));
+}
+
static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
struct iov_iter *from, char *to,
int copy, int offset)
@@ -1879,12 +1891,12 @@ static inline bool sk_has_allocations(const struct sock *sk)
}
/**
- * wq_has_sleeper - check if there are any waiting processes
+ * skwq_has_sleeper - check if there are any waiting processes
* @wq: struct socket_wq
*
* Returns true if socket_wq has waiting processes
*
- * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory
+ * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory
* barrier call. They were added due to the race found within the tcp code.
*
* Consider following tcp code paths:
@@ -1910,15 +1922,9 @@ static inline bool sk_has_allocations(const struct sock *sk)
* data on the socket.
*
*/
-static inline bool wq_has_sleeper(struct socket_wq *wq)
+static inline bool skwq_has_sleeper(struct socket_wq *wq)
{
- /* We need to be sure we are in sync with the
- * add_wait_queue modifications to the wait queue.
- *
- * This memory barrier is paired in the sock_poll_wait.
- */
- smp_mb();
- return wq && waitqueue_active(&wq->wait);
+ return wq && wq_has_sleeper(&wq->wait);
}
/**
@@ -2005,10 +2011,27 @@ static inline unsigned long sock_wspace(struct sock *sk)
return amt;
}
-static inline void sk_wake_async(struct sock *sk, int how, int band)
+/* Note:
+ * We use sk->sk_wq_raw, from contexts knowing this
+ * pointer is not NULL and cannot disappear/change.
+ */
+static inline void sk_set_bit(int nr, struct sock *sk)
+{
+ set_bit(nr, &sk->sk_wq_raw->flags);
+}
+
+static inline void sk_clear_bit(int nr, struct sock *sk)
{
- if (sock_flag(sk, SOCK_FASYNC))
- sock_wake_async(sk->sk_socket, how, band);
+ clear_bit(nr, &sk->sk_wq_raw->flags);
+}
+
+static inline void sk_wake_async(const struct sock *sk, int how, int band)
+{
+ if (sock_flag(sk, SOCK_FASYNC)) {
+ rcu_read_lock();
+ sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
+ rcu_read_unlock();
+ }
}
/* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
@@ -2226,6 +2249,31 @@ static inline bool sk_listener(const struct sock *sk)
return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
}
+/**
+ * sk_state_load - read sk->sk_state for lockless contexts
+ * @sk: socket pointer
+ *
+ * Paired with sk_state_store(). Used in places we do not hold socket lock :
+ * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...
+ */
+static inline int sk_state_load(const struct sock *sk)
+{
+ return smp_load_acquire(&sk->sk_state);
+}
+
+/**
+ * sk_state_store - update sk->sk_state
+ * @sk: socket pointer
+ * @newstate: new state
+ *
+ * Paired with sk_state_load(). Should be used in contexts where
+ * state change might impact lockless readers.
+ */
+static inline void sk_state_store(struct sock *sk, int newstate)
+{
+ smp_store_release(&sk->sk_state, newstate);
+}
+
void sock_enable_timestamp(struct sock *sk, int flag);
int sock_get_timestamp(struct sock *, struct timeval __user *);
int sock_get_timestampns(struct sock *, struct timespec __user *);