aboutsummaryrefslogtreecommitdiff
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h132
1 files changed, 94 insertions, 38 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 92b269709b9a..282d065e286b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -343,6 +343,9 @@ struct sock {
#define sk_rxhash __sk_common.skc_rxhash
socket_lock_t sk_lock;
+ atomic_t sk_drops;
+ int sk_rcvlowat;
+ struct sk_buff_head sk_error_queue;
struct sk_buff_head sk_receive_queue;
/*
* The backlog queue is special, it is always used with
@@ -359,14 +362,13 @@ struct sock {
struct sk_buff *tail;
} sk_backlog;
#define sk_rmem_alloc sk_backlog.rmem_alloc
- int sk_forward_alloc;
- __u32 sk_txhash;
+ int sk_forward_alloc;
#ifdef CONFIG_NET_RX_BUSY_POLL
- unsigned int sk_napi_id;
unsigned int sk_ll_usec;
+ /* ===== mostly read cache line ===== */
+ unsigned int sk_napi_id;
#endif
- atomic_t sk_drops;
int sk_rcvbuf;
struct sk_filter __rcu *sk_filter;
@@ -379,16 +381,50 @@ struct sock {
#endif
struct dst_entry *sk_rx_dst;
struct dst_entry __rcu *sk_dst_cache;
- /* Note: 32bit hole on 64bit arches */
- atomic_t sk_wmem_alloc;
atomic_t sk_omem_alloc;
int sk_sndbuf;
+
+ /* ===== cache line for TX ===== */
+ int sk_wmem_queued;
+ atomic_t sk_wmem_alloc;
+ unsigned long sk_tsq_flags;
+ struct sk_buff *sk_send_head;
struct sk_buff_head sk_write_queue;
+ __s32 sk_peek_off;
+ int sk_write_pending;
+ long sk_sndtimeo;
+ struct timer_list sk_timer;
+ __u32 sk_priority;
+ __u32 sk_mark;
+ u32 sk_pacing_rate; /* bytes per second */
+ u32 sk_max_pacing_rate;
+ struct page_frag sk_frag;
+ netdev_features_t sk_route_caps;
+ netdev_features_t sk_route_nocaps;
+ int sk_gso_type;
+ unsigned int sk_gso_max_size;
+ gfp_t sk_allocation;
+ __u32 sk_txhash;
/*
* Because of non atomicity rules, all
* changes are protected by socket lock.
*/
+ unsigned int __sk_flags_offset[0];
+#ifdef __BIG_ENDIAN_BITFIELD
+#define SK_FL_PROTO_SHIFT 16
+#define SK_FL_PROTO_MASK 0x00ff0000
+
+#define SK_FL_TYPE_SHIFT 0
+#define SK_FL_TYPE_MASK 0x0000ffff
+#else
+#define SK_FL_PROTO_SHIFT 8
+#define SK_FL_PROTO_MASK 0x0000ff00
+
+#define SK_FL_TYPE_SHIFT 16
+#define SK_FL_TYPE_MASK 0xffff0000
+#endif
+
kmemcheck_bitfield_begin(flags);
unsigned int sk_padding : 2,
sk_no_check_tx : 1,
@@ -399,41 +435,24 @@ struct sock {
#define SK_PROTOCOL_MAX U8_MAX
kmemcheck_bitfield_end(flags);
- int sk_wmem_queued;
- gfp_t sk_allocation;
- u32 sk_pacing_rate; /* bytes per second */
- u32 sk_max_pacing_rate;
- netdev_features_t sk_route_caps;
- netdev_features_t sk_route_nocaps;
- int sk_gso_type;
- unsigned int sk_gso_max_size;
u16 sk_gso_max_segs;
- int sk_rcvlowat;
unsigned long sk_lingertime;
- struct sk_buff_head sk_error_queue;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
int sk_err,
sk_err_soft;
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
- __u32 sk_priority;
- __u32 sk_mark;
+ kuid_t sk_uid;
struct pid *sk_peer_pid;
const struct cred *sk_peer_cred;
long sk_rcvtimeo;
- long sk_sndtimeo;
- struct timer_list sk_timer;
ktime_t sk_stamp;
u16 sk_tsflags;
u8 sk_shutdown;
u32 sk_tskey;
struct socket *sk_socket;
void *sk_user_data;
- struct page_frag sk_frag;
- struct sk_buff *sk_send_head;
- __s32 sk_peek_off;
- int sk_write_pending;
#ifdef CONFIG_SECURITY
void *sk_security;
#endif
@@ -894,7 +913,20 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
static inline void sock_rps_record_flow(const struct sock *sk)
{
#ifdef CONFIG_RPS
- sock_rps_record_flow_hash(sk->sk_rxhash);
+ if (static_key_false(&rfs_needed)) {
+ /* Reading sk->sk_rxhash might incur an expensive cache line
+ * miss.
+ *
+ * TCP_ESTABLISHED does cover almost all states where RFS
+ * might be useful, and is cheaper [1] than testing :
+ * IPv4: inet_sk(sk)->inet_daddr
+ * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
+ * OR an additional socket flag
+ * [1] : sk_state and sk_prot are in the same cache line.
+ */
+ if (sk->sk_state == TCP_ESTABLISHED)
+ sock_rps_record_flow_hash(sk->sk_rxhash);
+ }
#endif
}
@@ -914,14 +946,16 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
#endif
}
-#define sk_wait_event(__sk, __timeo, __condition) \
+#define sk_wait_event(__sk, __timeo, __condition, __wait) \
({ int __rc; \
release_sock(__sk); \
__rc = __condition; \
if (!__rc) { \
- *(__timeo) = schedule_timeout(*(__timeo)); \
+ *(__timeo) = wait_woken(__wait, \
+ TASK_INTERRUPTIBLE, \
+ *(__timeo)); \
} \
- sched_annotate_sleep(); \
+ sched_annotate_sleep(); \
lock_sock(__sk); \
__rc = __condition; \
__rc; \
@@ -1162,11 +1196,6 @@ static inline void sk_enter_memory_pressure(struct sock *sk)
sk->sk_prot->enter_memory_pressure(sk);
}
-static inline long sk_prot_mem_limits(const struct sock *sk, int index)
-{
- return sk->sk_prot->sysctl_mem[index];
-}
-
static inline long
sk_memory_allocated(const struct sock *sk)
{
@@ -1276,14 +1305,32 @@ static inline struct inode *SOCK_INODE(struct socket *socket)
/*
* Functions for memory accounting
*/
+int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind);
int __sk_mem_schedule(struct sock *sk, int size, int kind);
+void __sk_mem_reduce_allocated(struct sock *sk, int amount);
void __sk_mem_reclaim(struct sock *sk, int amount);
-#define SK_MEM_QUANTUM ((int)PAGE_SIZE)
+/* We used to have PAGE_SIZE here, but systems with 64KB pages
+ * do not necessarily have 16x time more memory than 4KB ones.
+ */
+#define SK_MEM_QUANTUM 4096
#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
#define SK_MEM_SEND 0
#define SK_MEM_RECV 1
+/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */
+static inline long sk_prot_mem_limits(const struct sock *sk, int index)
+{
+ long val = sk->sk_prot->sysctl_mem[index];
+
+#if PAGE_SIZE > SK_MEM_QUANTUM
+ val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT;
+#elif PAGE_SIZE < SK_MEM_QUANTUM
+ val >>= SK_MEM_QUANTUM_SHIFT - PAGE_SHIFT;
+#endif
+ return val;
+}
+
static inline int sk_mem_pages(int amt)
{
return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT;
@@ -1651,6 +1698,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
sk->sk_wq = parent->wq;
parent->sk = sk;
sk_set_socket(sk, parent);
+ sk->sk_uid = SOCK_INODE(parent)->i_uid;
security_sock_graft(sk, parent);
write_unlock_bh(&sk->sk_callback_lock);
}
@@ -1658,6 +1706,11 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
kuid_t sock_i_uid(struct sock *sk);
unsigned long sock_i_ino(struct sock *sk);
+static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
+{
+ return sk ? sk->sk_uid : make_kuid(net->user_ns, 0);
+}
+
static inline u32 net_tx_rndhash(void)
{
u32 v = prandom_u32();
@@ -1783,13 +1836,13 @@ static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
{
if (skb->ip_summed == CHECKSUM_NONE) {
__wsum csum = 0;
- if (csum_and_copy_from_iter(to, copy, &csum, from) != copy)
+ if (!csum_and_copy_from_iter_full(to, copy, &csum, from))
return -EFAULT;
skb->csum = csum_block_add(skb->csum, csum, offset);
} else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
- if (copy_from_iter_nocache(to, copy, from) != copy)
+ if (!copy_from_iter_full_nocache(to, copy, from))
return -EFAULT;
- } else if (copy_from_iter(to, copy, from) != copy)
+ } else if (!copy_from_iter_full(to, copy, from))
return -EFAULT;
return 0;
@@ -1952,6 +2005,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer,
void sk_stop_timer(struct sock *sk, struct timer_list *timer);
+int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
+ unsigned int flags);
int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
@@ -2108,7 +2163,8 @@ struct sock_skb_cb {
static inline void
sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
{
- SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops);
+ SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ?
+ atomic_read(&sk->sk_drops) : 0;
}
static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)