diff options
author | David S. Miller <davem@davemloft.net> | 2019-10-13 10:13:08 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-10-13 10:13:08 -0700 |
commit | 3f233809aa7f00da6b42c695cc20801a2050edd4 (patch) | |
tree | 7be26ef02ab2f2979be9a3cdeb36ed1bafc51354 | |
parent | 8caf8a91f34d55e8e3b1355ee8d658cb472146e2 (diff) | |
parent | ab4e846a82d0ae00176de19f2db3c5c64f8eb5f2 (diff) |
Merge branch 'tcp-address-KCSAN-reports-in-tcp_poll-part-I'
Eric Dumazet says:
====================
tcp: address KCSAN reports in tcp_poll() (part I)
This all started with a KCSAN report (included
in "tcp: annotate tp->rcv_nxt lockless reads" changelog)
tcp_poll() runs in a lockless way. This means that about
all accesses of tcp socket fields done in tcp_poll() context
need annotations otherwise KCSAN will complain about data-races.
While doing this detective work, I found a more serious bug,
addressed by the first patch ("tcp: add rcu protection around
tp->fastopen_rsk").
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/tcp.h | 6 | ||||
-rw-r--r-- | include/net/sock.h | 29 | ||||
-rw-r--r-- | include/net/tcp.h | 7 | ||||
-rw-r--r-- | include/trace/events/sock.h | 4 | ||||
-rw-r--r-- | net/core/datagram.c | 2 | ||||
-rw-r--r-- | net/core/filter.c | 6 | ||||
-rw-r--r-- | net/core/request_sock.c | 2 | ||||
-rw-r--r-- | net/core/skbuff.c | 2 | ||||
-rw-r--r-- | net/core/sock.c | 22 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 4 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 65 | ||||
-rw-r--r-- | net/ipv4/tcp_diag.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_fastopen.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 37 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 28 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 17 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 32 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 11 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 18 | ||||
-rw-r--r-- | net/sched/em_meta.c | 2 |
21 files changed, 175 insertions, 128 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 99617e528ea2..668e25a76d69 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -393,7 +393,7 @@ struct tcp_sock { /* fastopen_rsk points to request_sock that resulted in this big * socket. Used to retransmit SYNACKs etc. */ - struct request_sock *fastopen_rsk; + struct request_sock __rcu *fastopen_rsk; u32 *saved_syn; }; @@ -447,8 +447,8 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) static inline bool tcp_passive_fastopen(const struct sock *sk) { - return (sk->sk_state == TCP_SYN_RECV && - tcp_sk(sk)->fastopen_rsk != NULL); + return sk->sk_state == TCP_SYN_RECV && + rcu_access_pointer(tcp_sk(sk)->fastopen_rsk) != NULL; } static inline void fastopen_queue_tune(struct sock *sk, int backlog) diff --git a/include/net/sock.h b/include/net/sock.h index 79f54e1f8827..f69b58bff7e5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -878,12 +878,17 @@ static inline bool sk_acceptq_is_full(const struct sock *sk) */ static inline int sk_stream_min_wspace(const struct sock *sk) { - return sk->sk_wmem_queued >> 1; + return READ_ONCE(sk->sk_wmem_queued) >> 1; } static inline int sk_stream_wspace(const struct sock *sk) { - return sk->sk_sndbuf - sk->sk_wmem_queued; + return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued); +} + +static inline void sk_wmem_queued_add(struct sock *sk, int val) +{ + WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val); } void sk_stream_write_space(struct sock *sk); @@ -1207,7 +1212,7 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { - if (sk->sk_wmem_queued >= sk->sk_sndbuf) + if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) return false; return sk->sk_prot->stream_memory_free ? @@ -1467,7 +1472,7 @@ DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { sock_set_flag(sk, SOCK_QUEUE_SHRUNK); - sk->sk_wmem_queued -= skb->truesize; + sk_wmem_queued_add(sk, -skb->truesize); sk_mem_uncharge(sk, skb->truesize); if (static_branch_unlikely(&tcp_tx_skb_cache_key) && !sk->sk_tx_skb_cache && !skb_cloned(skb)) { @@ -2014,7 +2019,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro skb->len += copy; skb->data_len += copy; skb->truesize += copy; - sk->sk_wmem_queued += copy; + sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); return 0; } @@ -2220,10 +2225,14 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band) static inline void sk_stream_moderate_sndbuf(struct sock *sk) { - if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) { - sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); - sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF); - } + u32 val; + + if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) + return; + + val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); + + WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF)); } struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, @@ -2251,7 +2260,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); */ static inline bool sock_writeable(const struct sock *sk) { - return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); + return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1); } static inline gfp_t gfp_any(void) diff --git a/include/net/tcp.h b/include/net/tcp.h index 35f6f7e0fdc2..ab4eb5eb5d07 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1380,14 +1380,14 @@ static inline int tcp_win_from_space(const struct sock *sk, int space) /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { - return tcp_win_from_space(sk, sk->sk_rcvbuf - + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - READ_ONCE(sk->sk_backlog.len) - atomic_read(&sk->sk_rmem_alloc)); } static inline int tcp_full_space(const struct sock *sk) { - return tcp_win_from_space(sk, sk->sk_rcvbuf); + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } extern void tcp_openreq_init_rwin(struct request_sock *req, @@ -1917,7 +1917,8 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) { const struct tcp_sock *tp = tcp_sk(sk); - u32 notsent_bytes = tp->write_seq - tp->snd_nxt; + u32 notsent_bytes = READ_ONCE(tp->write_seq) - + READ_ONCE(tp->snd_nxt); return (notsent_bytes << wake) < tcp_notsent_lowat(tp); } diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index a0c4b8a30966..51fe9f6719eb 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -82,7 +82,7 @@ TRACE_EVENT(sock_rcvqueue_full, TP_fast_assign( __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); __entry->truesize = skb->truesize; - __entry->sk_rcvbuf = sk->sk_rcvbuf; + __entry->sk_rcvbuf = READ_ONCE(sk->sk_rcvbuf); ), TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d", @@ -115,7 +115,7 @@ TRACE_EVENT(sock_exceed_buf_limit, __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); __entry->sysctl_wmem = sk_get_wmem0(sk, prot); __entry->wmem_alloc = refcount_read(&sk->sk_wmem_alloc); - __entry->wmem_queued = sk->sk_wmem_queued; + __entry->wmem_queued = READ_ONCE(sk->sk_wmem_queued); __entry->kind = kind; ), diff --git a/net/core/datagram.c b/net/core/datagram.c index 4cc8dc5db2b7..c210fc116103 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -640,7 +640,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, skb->len += copied; skb->truesize += truesize; if (sk && sk->sk_type == SOCK_STREAM) { - sk->sk_wmem_queued += truesize; + sk_wmem_queued_add(sk, truesize); sk_mem_charge(sk, truesize); } else { refcount_add(truesize, &skb->sk->sk_wmem_alloc); diff --git a/net/core/filter.c b/net/core/filter.c index a50c0b6846f2..3fed5755494b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4252,12 +4252,14 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, case SO_RCVBUF: val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); + WRITE_ONCE(sk->sk_rcvbuf, + max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_SNDBUF: val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); + WRITE_ONCE(sk->sk_sndbuf, + max_t(int, val * 2, SOCK_MIN_SNDBUF)); break; case SO_MAX_PACING_RATE: /* 32bit version */ if (val != ~0U) diff --git a/net/core/request_sock.c b/net/core/request_sock.c index c9bb00008528..f35c2e998406 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -96,7 +96,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq; - tcp_sk(sk)->fastopen_rsk = NULL; + RCU_INIT_POINTER(tcp_sk(sk)->fastopen_rsk, NULL); spin_lock_bh(&fastopenq->lock); fastopenq->qlen--; tcp_rsk(req)->tfo_listener = false; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 529133611ea2..8c178703467b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4415,7 +4415,7 @@ static void skb_set_err_queue(struct sk_buff *skb) int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned int)sk->sk_rcvbuf) + (unsigned int)READ_ONCE(sk->sk_rcvbuf)) return -ENOMEM; skb_orphan(skb); diff --git a/net/core/sock.c b/net/core/sock.c index 2a053999df11..a515392ba84b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -785,7 +785,8 @@ set_sndbuf: */ val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); + WRITE_ONCE(sk->sk_sndbuf, + max_t(int, val * 2, SOCK_MIN_SNDBUF)); /* Wake up sending tasks if we upped the value. */ sk->sk_write_space(sk); break; @@ -831,7 +832,8 @@ set_rcvbuf: * returning the value we actually used in getsockopt * is the most desirable behavior. */ - sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); + WRITE_ONCE(sk->sk_rcvbuf, + max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_RCVBUFFORCE: @@ -2088,8 +2090,10 @@ EXPORT_SYMBOL(sock_i_ino); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority) { - if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { + if (force || + refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) { struct sk_buff *skb = alloc_skb(size, priority); + if (skb) { skb_set_owner_w(skb, sk); return skb; @@ -2190,7 +2194,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) + if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) break; if (sk->sk_shutdown & SEND_SHUTDOWN) break; @@ -2225,7 +2229,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, if (sk->sk_shutdown & SEND_SHUTDOWN) goto failure; - if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf) + if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf)) break; sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -2806,7 +2810,7 @@ static void sock_def_write_space(struct sock *sk) /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ - if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | @@ -3204,11 +3208,11 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); - mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; + mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); - mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; + mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; - mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; + mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index dbcf34ec8dd2..eb30fc1770de 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -906,7 +906,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, percpu_counter_inc(sk->sk_prot->orphan_count); if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { - BUG_ON(tcp_sk(child)->fastopen_rsk != req); + BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); BUG_ON(sk != req->rsk_listener); /* Paranoid, to prevent race condition if @@ -915,7 +915,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, * Also to satisfy an assertion in * tcp_v4_destroy_sock(). */ - tcp_sk(child)->fastopen_rsk = NULL; + RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL); } inet_csk_destroy_sock(child); } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index bbb005eb5218..7dc79b973e6e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -193,7 +193,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), - .idiag_wmem = sk->sk_wmem_queued, + .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), .idiag_fmem = sk->sk_forward_alloc, .idiag_tmem = sk_wmem_alloc_get(sk), }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8781a92ea4b6..b2ac4f074e2d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -450,8 +450,8 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; - sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; + WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); + WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); sk_sockets_allocated_inc(sk); sk->sk_route_forced_caps = NETIF_F_GSO; @@ -477,7 +477,7 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, int target, struct sock *sk) { - return (tp->rcv_nxt - tp->copied_seq >= target) || + return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) || (sk->sk_prot->stream_memory_read ? sk->sk_prot->stream_memory_read(sk) : false); } @@ -543,10 +543,10 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Connected or passive Fast Open socket? */ if (state != TCP_SYN_SENT && - (state != TCP_SYN_RECV || tp->fastopen_rsk)) { + (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) { int target = sock_rcvlowat(sk, 0, INT_MAX); - if (tp->urg_seq == tp->copied_seq && + if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) && !sock_flag(sk, SOCK_URGINLINE) && tp->urg_data) target++; @@ -607,7 +607,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) unlock_sock_fast(sk, slow); break; case SIOCATMARK: - answ = tp->urg_data && tp->urg_seq == tp->copied_seq; + answ = tp->urg_data && + READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq); break; case SIOCOUTQ: if (sk->sk_state == TCP_LISTEN) @@ -616,7 +617,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; else - answ = tp->write_seq - tp->snd_una; + answ = READ_ONCE(tp->write_seq) - tp->snd_una; break; case SIOCOUTQNSD: if (sk->sk_state == TCP_LISTEN) @@ -625,7 +626,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; else - answ = tp->write_seq - tp->snd_nxt; + answ = READ_ONCE(tp->write_seq) - + READ_ONCE(tp->snd_nxt); break; default: return -ENOIOCTLCMD; @@ -657,7 +659,7 @@ static void skb_entail(struct sock *sk, struct sk_buff *skb) tcb->sacked = 0; __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); - sk->sk_wmem_queued += skb->truesize; + sk_wmem_queued_add(sk, skb->truesize); sk_mem_charge(sk, skb->truesize); if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; @@ -1032,10 +1034,10 @@ new_segment: skb->len += copy; skb->data_len += copy; skb->truesize += copy; - sk->sk_wmem_queued += copy; + sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); skb->ip_summed = CHECKSUM_PARTIAL; - tp->write_seq += copy; + WRITE_ONCE(tp->write_seq, tp->write_seq + copy); TCP_SKB_CB(skb)->end_seq += copy; tcp_skb_pcount_set(skb, 0); @@ -1362,7 +1364,7 @@ new_segment: if (!copied) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; - tp->write_seq += copy; + WRITE_ONCE(tp->write_seq, tp->write_seq + copy); TCP_SKB_CB(skb)->end_seq += copy; tcp_skb_pcount_set(skb, 0); @@ -1668,9 +1670,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_eat_skb(sk, skb); if (!desc->count) break; - tp->copied_seq = seq; + WRITE_ONCE(tp->copied_seq, seq); } - tp->copied_seq = seq; + WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk); @@ -1709,7 +1711,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) val <<= 1; if (val > sk->sk_rcvbuf) { - sk->sk_rcvbuf = val; + WRITE_ONCE(sk->sk_rcvbuf, val); tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val); } return 0; @@ -1819,7 +1821,7 @@ static int tcp_zerocopy_receive(struct sock *sk, out: up_read(¤t->mm->mmap_sem); if (length) { - tp->copied_seq = seq; + WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ @@ -2117,7 +2119,7 @@ found_ok_skb: if (urg_offset < used) { if (!urg_offset) { if (!sock_flag(sk, SOCK_URGINLINE)) { - ++*seq; + WRITE_ONCE(*seq, *seq + 1); urg_hole++; offset++; used--; @@ -2139,7 +2141,7 @@ found_ok_skb: } } - *seq += used; + WRITE_ONCE(*seq, *seq + used); copied += used; len -= used; @@ -2166,7 +2168,7 @@ skip_copy: found_fin_ok: /* Process the FIN. */ - ++*seq; + WRITE_ONCE(*seq, *seq + 1); if (!(flags & MSG_PEEK)) sk_eat_skb(sk, skb); break; @@ -2487,7 +2489,10 @@ adjudge_to_death: } if (sk->sk_state == TCP_CLOSE) { - struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + struct request_sock *req; + + req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, + lockdep_sock_is_held(sk)); /* We could get here with a non-NULL req if the socket is * aborted (e.g., closed with unread data) before 3WHS * finishes. @@ -2559,6 +2564,7 @@ int tcp_disconnect(struct sock *sk, int flags) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int old_state = sk->sk_state; + u32 seq; if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); @@ -2585,7 +2591,7 @@ int tcp_disconnect(struct sock *sk, int flags) __kfree_skb(sk->sk_rx_skb_cache); sk->sk_rx_skb_cache = NULL; } - tp->copied_seq = tp->rcv_nxt; + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); tp->urg_data = 0; tcp_write_queue_purge(sk); tcp_fastopen_active_disable_ofo_check(sk); @@ -2601,9 +2607,12 @@ int tcp_disconnect(struct sock *sk, int flags) tp->srtt_us = 0; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); tp->rcv_rtt_last_tsecr = 0; - tp->write_seq += tp->max_window + 2; - if (tp->write_seq == 0) - tp->write_seq = 1; + + seq = tp->write_seq + tp->max_window + 2; + if (!seq) + seq = 1; + WRITE_ONCE(tp->write_seq, seq); + icsk->icsk_backoff = 0; tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; @@ -2930,9 +2939,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (sk->sk_state != TCP_CLOSE) err = -EPERM; else if (tp->repair_queue == TCP_SEND_QUEUE) - tp->write_seq = val; + WRITE_ONCE(tp->write_seq, val); else if (tp->repair_queue == TCP_RECV_QUEUE) - tp->rcv_nxt = val; + WRITE_ONCE(tp->rcv_nxt, val); else err = -EINVAL; break; @@ -3831,8 +3840,10 @@ EXPORT_SYMBOL(tcp_md5_hash_key); void tcp_done(struct sock *sk) { - struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + struct request_sock *req; + req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, + lockdep_sock_is_held(sk)); if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 81a8221d650a..549506162dde 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -26,8 +26,9 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } else if (sk->sk_type == SOCK_STREAM) { const struct tcp_sock *tp = tcp_sk(sk); - r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); - r->idiag_wqueue = tp->write_seq - tp->snd_una; + r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - + READ_ONCE(tp->copied_seq), 0); + r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una; } if (info) tcp_get_info(sk, info); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 3fd451271a70..a915ade0c818 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -253,7 +253,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, */ tp = tcp_sk(child); - tp->fastopen_rsk = req; + rcu_assign_pointer(tp->fastopen_rsk, req); tcp_rsk(req)->tfo_listener = true; /* RFC1323: The window in SYN & SYN/ACK segments is never diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3578357abe30..a2e52ad7cdab 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -359,7 +359,8 @@ static void tcp_sndbuf_expand(struct sock *sk) sndmem *= nr_segs * per_mss; if (sk->sk_sndbuf < sndmem) - sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]); + WRITE_ONCE(sk->sk_sndbuf, + min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2])); } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -483,8 +484,9 @@ static void tcp_clamp_window(struct sock *sk) !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { - sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), - net->ipv4.sysctl_tcp_rmem[2]); + WRITE_ONCE(sk->sk_rcvbuf, + min(atomic_read(&sk->sk_rmem_alloc), + net->ipv4.sysctl_tcp_rmem[2])); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); @@ -648,7 +650,7 @@ void tcp_rcv_space_adjust(struct sock *sk) rcvbuf = min_t(u64, rcvwin * rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); if (rcvbuf > sk->sk_rcvbuf) { - sk->sk_rcvbuf = rcvbuf; + WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); /* Make the window clamp follow along. */ tp->window_clamp = tcp_win_from_space(sk, rcvbuf); @@ -2666,7 +2668,7 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack, struct tcp_sock *tp = tcp_sk(sk); bool recovered = !before(tp->snd_una, tp->high_seq); - if ((flag & FLAG_SND_UNA_ADVANCED || tp->fastopen_rsk) && + if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) && tcp_try_undo_loss(sk, false)) return; @@ -2990,7 +2992,7 @@ void tcp_rearm_rto(struct sock *sk) /* If the retrans timer is currently being used by Fast Open * for SYN-ACK retrans purpose, stay put. */ - if (tp->fastopen_rsk) + if (rcu_access_pointer(tp->fastopen_rsk)) return; if (!tp->packets_out) { @@ -3362,7 +3364,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) sock_owned_by_me((struct sock *)tp); tp->bytes_received += delta; - tp->rcv_nxt = seq; + WRITE_ONCE(tp->rcv_nxt, seq); } /* Update our send window. @@ -5356,7 +5358,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) } tp->urg_data = TCP_URG_NOTYET; - tp->urg_seq = ptr; + WRITE_ONCE(tp->urg_seq, ptr); /* Disable header prediction. */ tp->pred_flags = 0; @@ -5932,7 +5934,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, /* Ok.. it's good. Set up sequence numbers and * move to established. */ - tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1); tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; /* RFC1323: The window in SYN & SYN/ACK segments is @@ -5961,7 +5963,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, /* Remember, tcp_poll() does not lock socket! * Change state from SYN-SENT only after copied_seq * is initialized. */ - tp->copied_seq = tp->rcv_nxt; + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); smc_check_reset_syn(tp); @@ -6035,8 +6037,8 @@ discard: tp->tcp_header_len = sizeof(struct tcphdr); } - tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - tp->copied_seq = tp->rcv_nxt; + WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1); + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; /* RFC1323: The window in SYN & SYN/ACK segments is @@ -6087,6 +6089,8 @@ reset_and_undo: static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) { + struct request_sock *req; + tcp_try_undo_loss(sk, false); /* Reset rtx states to prevent spurious retransmits_timed_out() */ @@ -6096,7 +6100,9 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1, * we no longer need req so release it. */ - reqsk_fastopen_remove(sk, tcp_sk(sk)->fastopen_rsk, false); + req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, + lockdep_sock_is_held(sk)); + reqsk_fastopen_remove(sk, req, false); /* Re-arm the timer because data may have been sent out. * This is similar to the regular data transmission case @@ -6171,7 +6177,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_mstamp_refresh(tp); tp->rx_opt.saw_tstamp = 0; - req = tp->fastopen_rsk; + req = rcu_dereference_protected(tp->fastopen_rsk, + lockdep_sock_is_held(sk)); if (req) { bool req_stolen; @@ -6211,7 +6218,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_try_undo_spurious_syn(sk); tp->retrans_stamp = 0; tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); - tp->copied_seq = tp->rcv_nxt; + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); } smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 492bf6a6b023..6be568334848 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -164,9 +164,11 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) * without appearing to create any others. */ if (likely(!tp->repair)) { - tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; - if (tp->write_seq == 0) - tp->write_seq = 1; + u32 seq = tcptw->tw_snd_nxt + 65535 + 2; + + if (!seq) + seq = 1; + WRITE_ONCE(tp->write_seq, seq); tp->rx_opt.ts_recent = tcptw->tw_ts_recent; tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; } @@ -253,7 +255,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; if (likely(!tp->repair)) - tp->write_seq = 0; + WRITE_ONCE(tp->write_seq, 0); } inet->inet_dport = usin->sin_port; @@ -291,10 +293,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (likely(!tp->repair)) { if (!tp->write_seq) - tp->write_seq = secure_tcp_seq(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port); + WRITE_ONCE(tp->write_seq, + secure_tcp_seq(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port)); tp->tsoffset = secure_tcp_ts_off(sock_net(sk), inet->inet_saddr, inet->inet_daddr); @@ -478,7 +481,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) icsk = inet_csk(sk); tp = tcp_sk(sk); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ - fastopen = tp->fastopen_rsk; + fastopen = rcu_dereference(tp->fastopen_rsk); snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && !between(seq, snd_una, tp->snd_nxt)) { @@ -2121,7 +2124,7 @@ void tcp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); - BUG_ON(tp->fastopen_rsk); + BUG_ON(rcu_access_pointer(tp->fastopen_rsk)); /* If socket is aborted during connect operation */ tcp_free_fastopen_req(tp); @@ -2455,12 +2458,13 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) /* Because we don't lock the socket, * we might find a transient negative value. */ - rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); + rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - + READ_ONCE(tp->copied_seq), 0); seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", i, src, srcp, dest, destp, state, - tp->write_seq - tp->snd_una, + READ_ONCE(tp->write_seq) - tp->snd_una, rx_queue, timer_active, jiffies_delta_to_clock_t(timer_expires - jiffies), diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index bb140a5db8c0..c802bc80c400 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -462,6 +462,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk; struct tcp_sock *oldtp, *newtp; + u32 seq; if (!newsk) return NULL; @@ -475,12 +476,16 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, /* Now setup tcp_sock */ newtp->pred_flags = 0; - newtp->rcv_wup = newtp->copied_seq = - newtp->rcv_nxt = treq->rcv_isn + 1; + seq = treq->rcv_isn + 1; + newtp->rcv_wup = seq; + WRITE_ONCE(newtp->copied_seq, seq); + WRITE_ONCE(newtp->rcv_nxt, seq); newtp->segs_in = 1; - newtp->snd_sml = newtp->snd_una = - newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; + seq = treq->snt_isn + 1; + newtp->snd_sml = newtp->snd_una = seq; + WRITE_ONCE(newtp->snd_nxt, seq); + newtp->snd_up = seq; INIT_LIST_HEAD(&newtp->tsq_node); INIT_LIST_HEAD(&newtp->tsorted_sent_queue); @@ -495,7 +500,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->total_retrans = req->num_retrans; tcp_init_xmit_timers(newsk); - newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; + WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1); if (sock_flag(newsk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(newsk, @@ -541,7 +546,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rx_opt.mss_clamp = req->mss; tcp_ecn_openreq_child(newtp, req); newtp->fastopen_req = NULL; - newtp->fastopen_rsk = NULL; + RCU_INIT_POINTER(newtp->fastopen_rsk, NULL); __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fec6d67bfd14..0488607c5cd3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -67,7 +67,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; - tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; + WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(skb)->end_seq); __skb_unlink(skb, &sk->sk_write_queue); tcp_rbtree_insert(&sk->tcp_rtx_queue, skb); @@ -1196,10 +1196,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); /* Advance write_seq and place onto the write_queue. */ - tp->write_seq = TCP_SKB_CB(skb)->end_seq; + WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq); __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); - sk->sk_wmem_queued += skb->truesize; + sk_wmem_queued_add(sk, skb->truesize); sk_mem_charge(sk, skb->truesize); } @@ -1333,7 +1333,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, return -ENOMEM; /* We'll just try again later. */ skb_copy_decrypted(buff, skb); - sk->sk_wmem_queued += buff->truesize; + sk_wmem_queued_add(sk, buff->truesize); sk_mem_charge(sk, buff->truesize); nlen = skb->len - len - nsize; buff->truesize += nlen; @@ -1443,7 +1443,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) if (delta_truesize) { skb->truesize -= delta_truesize; - sk->sk_wmem_queued -= delta_truesize; + sk_wmem_queued_add(sk, -delta_truesize); sk_mem_uncharge(sk, delta_truesize); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); } @@ -1888,7 +1888,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, return -ENOMEM; skb_copy_decrypted(buff, skb); - sk->sk_wmem_queued += buff->truesize; + sk_wmem_queued_add(sk, buff->truesize); sk_mem_charge(sk, buff->truesize); buff->truesize += nlen; skb->truesize -= nlen; @@ -2152,7 +2152,7 @@ static int tcp_mtu_probe(struct sock *sk) nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); if (!nskb) return -1; - sk->sk_wmem_queued += nskb->truesize; + sk_wmem_queued_add(sk, nskb->truesize); sk_mem_charge(sk, nskb->truesize); skb = tcp_send_head(sk); @@ -2482,7 +2482,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) /* Don't do any loss probe on a Fast Open connection before 3WHS * finishes. */ - if (tp->fastopen_rsk) + if (rcu_access_pointer(tp->fastopen_rsk)) return false; early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; @@ -3142,7 +3142,7 @@ void tcp_send_fin(struct sock *sk) * if FIN had been sent. This is because retransmit path * does not change tp->snd_nxt. */ - tp->snd_nxt++; + WRITE_ONCE(tp->snd_nxt, tp->snd_nxt + 1); return; } } else { @@ -3222,7 +3222,7 @@ int tcp_send_synack(struct sock *sk) tcp_rtx_queue_unlink_and_free(skb, sk); __skb_header_release(nskb); tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb); - sk->sk_wmem_queued += nskb->truesize; + sk_wmem_queued_add(sk, nskb->truesize); sk_mem_charge(sk, nskb->truesize); skb = nskb; } @@ -3426,14 +3426,14 @@ static void tcp_connect_init(struct sock *sk) tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; tp->snd_up = tp->write_seq; - tp->snd_nxt = tp->write_seq; + WRITE_ONCE(tp->snd_nxt, tp->write_seq); if (likely(!tp->repair)) tp->rcv_nxt = 0; else tp->rcv_tstamp = tcp_jiffies32; tp->rcv_wup = tp->rcv_nxt; - tp->copied_seq = tp->rcv_nxt; + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); inet_csk(sk)->icsk_rto = tcp_timeout_init(sk); inet_csk(sk)->icsk_retransmits = 0; @@ -3447,9 +3447,9 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) tcb->end_seq += skb->len; __skb_header_release(skb); - sk->sk_wmem_queued += skb->truesize; + sk_wmem_queued_add(sk, skb->truesize); sk_mem_charge(sk, skb->truesize); - tp->write_seq = tcb->end_seq; + WRITE_ONCE(tp->write_seq, tcb->end_seq); tp->packets_out += tcp_skb_pcount(skb); } @@ -3586,11 +3586,11 @@ int tcp_connect(struct sock *sk) /* We change tp->snd_nxt after the tcp_transmit_skb() call * in order to make this packet get counted in tcpOutSegs. */ - tp->snd_nxt = tp->write_seq; + WRITE_ONCE(tp->snd_nxt, tp->write_seq); tp->pushed_seq = tp->write_seq; buff = tcp_send_head(sk); if (unlikely(buff)) { - tp->snd_nxt = TCP_SKB_CB(buff)->seq; + WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(buff)->seq); tp->pushed_seq = TCP_SKB_CB(buff)->seq; } TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 05be564414e9..dd5a6317a801 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -386,15 +386,13 @@ abort: tcp_write_err(sk); * Timer for Fast Open socket to retransmit SYNACK. Note that the * sk here is the child socket, not the parent (listener) socket. */ -static void tcp_fastopen_synack_timer(struct sock *sk) +static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) { struct inet_connection_sock *icsk = inet_csk(sk); int max_retries = icsk->icsk_syn_retries ? : sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ struct tcp_sock *tp = tcp_sk(sk); - struct request_sock *req; - req = tcp_sk(sk)->fastopen_rsk; req->rsk_ops->syn_ack_timeout(req); if (req->num_timeout >= max_retries) { @@ -435,11 +433,14 @@ void tcp_retransmit_timer(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct request_sock *req; - if (tp->fastopen_rsk) { + req = rcu_dereference_protected(tp->fastopen_rsk, + lockdep_sock_is_held(sk)); + if (req) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); - tcp_fastopen_synack_timer(sk); + tcp_fastopen_synack_timer(sk, req); /* Before we receive ACK to our SYN-ACK don't retransmit * anything else (e.g., data or FIN segments). */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e3d9f4559c99..4804b6dc5e65 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -215,7 +215,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; - tp->write_seq = 0; + WRITE_ONCE(tp->write_seq, 0); } sk->sk_v6_daddr = usin->sin6_addr; @@ -311,10 +311,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (likely(!tp->repair)) { if (!tp->write_seq) - tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport); + WRITE_ONCE(tp->write_seq, + secure_tcpv6_seq(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport)); tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32); @@ -406,7 +407,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tp = tcp_sk(sk); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ - fastopen = tp->fastopen_rsk; + fastopen = rcu_dereference(tp->fastopen_rsk); snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && !between(seq, snd_una, tp->snd_nxt)) { @@ -1895,7 +1896,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) /* Because we don't lock the socket, * we might find a transient negative value. */ - rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); + rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - + READ_ONCE(tp->copied_seq), 0); seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " @@ -1906,7 +1908,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, state, - tp->write_seq - tp->snd_una, + READ_ONCE(tp->write_seq) - tp->snd_una, rx_queue, timer_active, jiffies_delta_to_clock_t(timer_expires - jiffies), diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 4c9122fc35c9..3177dcb17316 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -446,7 +446,7 @@ META_COLLECTOR(int_sk_wmem_queued) *err = -1; return; } - dst->value = sk->sk_wmem_queued; + dst->value = READ_ONCE(sk->sk_wmem_queued); } META_COLLECTOR(int_sk_fwd_alloc) |