aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c79
1 files changed, 50 insertions, 29 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5f693bbd578d..30ef0c8f5e92 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -70,6 +70,7 @@
#include <net/xfrm.h>
#include <net/secure_seq.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
@@ -154,6 +155,12 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
if (tcptw->tw_ts_recent_stamp &&
(!twp || (reuse && time_after32(ktime_get_seconds(),
tcptw->tw_ts_recent_stamp)))) {
+ /* inet_twsk_hashdance() sets sk_refcnt after putting twsk
+ * and releasing the bucket lock.
+ */
+ if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt)))
+ return 0;
+
/* In case of repair and re-using TIME-WAIT sockets we still
* want to be sure that it is safe as above but honor the
* sequence numbers and time stamps set as part of the repair
@@ -174,7 +181,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
}
- sock_hold(sktw);
+
return 1;
}
@@ -690,7 +697,7 @@ static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb,
reply_options[0] = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key) << 16) |
(aoh->rnext_keyid << 8) | keyid);
- arg->iov[0].iov_len += round_up(tcp_ao_len(key), 4);
+ arg->iov[0].iov_len += tcp_ao_len_aligned(key);
reply->doff = arg->iov[0].iov_len / 4;
if (tcp_ao_hash_hdr(AF_INET, (char *)&reply_options[1],
@@ -723,7 +730,8 @@ out:
* Exception: precedence violation. We do not implement it in any case.
*/
-static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason)
{
const struct tcphdr *th = tcp_hdr(skb);
struct {
@@ -866,11 +874,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* routing might fail in this case. No choice here, if we choose to force
* input interface, we will misroute in case of asymmetric route.
*/
- if (sk) {
+ if (sk)
arg.bound_dev_if = sk->sk_bound_dev_if;
- if (sk_fullsock(sk))
- trace_tcp_send_reset(sk, skb);
- }
+
+ trace_tcp_send_reset(sk, skb, reason);
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -978,7 +985,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
(tcp_ao_len(key->ao_key) << 16) |
(key->ao_key->sndid << 8) |
key->rcv_next);
- arg.iov[0].iov_len += round_up(tcp_ao_len(key->ao_key), 4);
+ arg.iov[0].iov_len += tcp_ao_len_aligned(key->ao_key);
rep.th.doff = arg.iov[0].iov_len / 4;
tcp_ao_hash_hdr(AF_INET, (char *)&rep.opt[offset],
@@ -1667,7 +1674,8 @@ static void tcp_v4_init_req(struct request_sock *req,
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
- struct request_sock *req)
+ struct request_sock *req,
+ u32 tw_isn)
{
tcp_v4_init_req(req, sk, skb);
@@ -1907,7 +1915,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
- reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (tcp_checksum_complete(skb))
goto csum_err;
@@ -1915,9 +1922,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
struct sock *nsk = tcp_v4_cookie_check(sk, skb);
if (!nsk)
- goto discard;
+ return 0;
if (nsk != sk) {
- if (tcp_child_process(sk, nsk, skb)) {
+ reason = tcp_child_process(sk, nsk, skb);
+ if (reason) {
rsk = nsk;
goto reset;
}
@@ -1926,14 +1934,15 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
} else
sock_rps_save_rxhash(sk, skb);
- if (tcp_rcv_state_process(sk, skb)) {
+ reason = tcp_rcv_state_process(sk, skb);
+ if (reason) {
rsk = sk;
goto reset;
}
return 0;
reset:
- tcp_v4_send_reset(rsk, skb);
+ tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason));
discard:
kfree_skb_reason(skb, reason);
/* Be careful here. If this function gets more complicated and
@@ -1994,7 +2003,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
enum skb_drop_reason *reason)
{
- u32 limit, tail_gso_size, tail_gso_segs;
+ u32 tail_gso_size, tail_gso_segs;
struct skb_shared_info *shinfo;
const struct tcphdr *th;
struct tcphdr *thtail;
@@ -2003,6 +2012,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
bool fragstolen;
u32 gso_segs;
u32 gso_size;
+ u64 limit;
int delta;
/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
@@ -2044,10 +2054,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
((TCP_SKB_CB(tail)->tcp_flags ^
TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
-#ifdef CONFIG_TLS_DEVICE
- tail->decrypted != skb->decrypted ||
-#endif
!mptcp_skb_can_collapse(tail, skb) ||
+ skb_cmp_decrypted(tail, skb) ||
thtail->doff != th->doff ||
memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
goto no_coalesce;
@@ -2100,7 +2108,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
__skb_push(skb, hdrlen);
no_coalesce:
- limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1);
+ /* sk->sk_backlog.len is reset only at the end of __release_sock().
+ * Both sk->sk_backlog.len and sk->sk_rmem_alloc could reach
+ * sk_rcvbuf in normal conditions.
+ */
+ limit = ((u64)READ_ONCE(sk->sk_rcvbuf)) << 1;
+
+ limit += ((u32)READ_ONCE(sk->sk_sndbuf)) >> 1;
/* Only socket owner can try to collapse/prune rx queues
* to reduce memory overhead, so add a little headroom here.
@@ -2108,6 +2122,8 @@ no_coalesce:
*/
limit += 64 * 1024;
+ limit = min_t(u64, limit, UINT_MAX);
+
if (unlikely(sk_add_backlog(sk, skb, limit))) {
bh_unlock_sock(sk);
*reason = SKB_DROP_REASON_SOCKET_BACKLOG;
@@ -2147,7 +2163,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
- TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
@@ -2169,6 +2184,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
bool refcounted;
struct sock *sk;
int ret;
+ u32 isn;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
@@ -2206,7 +2222,6 @@ lookup:
if (!sk)
goto no_tcp_socket;
-process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
@@ -2275,15 +2290,21 @@ process:
if (nsk == sk) {
reqsk_put(req);
tcp_v4_restore_cb(skb);
- } else if (tcp_child_process(sk, nsk, skb)) {
- tcp_v4_send_reset(nsk, skb);
- goto discard_and_relse;
} else {
+ drop_reason = tcp_child_process(sk, nsk, skb);
+ if (drop_reason) {
+ enum sk_rst_reason rst_reason;
+
+ rst_reason = sk_rst_convert_drop_reason(drop_reason);
+ tcp_v4_send_reset(nsk, skb, rst_reason);
+ goto discard_and_relse;
+ }
sock_put(sk);
return 0;
}
}
+process:
if (static_branch_unlikely(&ip4_min_ttl)) {
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
@@ -2354,7 +2375,7 @@ csum_error:
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
- tcp_v4_send_reset(NULL, skb);
+ tcp_v4_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
}
discard_it:
@@ -2382,7 +2403,7 @@ do_time_wait:
inet_twsk_put(inet_twsk(sk));
goto csum_error;
}
- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+ switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(net,
net->ipv4.tcp_death_row.hashinfo,
@@ -2396,6 +2417,7 @@ do_time_wait:
sk = sk2;
tcp_v4_restore_cb(skb);
refcounted = false;
+ __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
}
@@ -2405,7 +2427,7 @@ do_time_wait:
tcp_v4_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- tcp_v4_send_reset(sk, skb);
+ tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
case TCP_TW_SUCCESS:;
@@ -2415,7 +2437,6 @@ do_time_wait:
static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp_timewait_sock),
- .twsk_unique = tcp_twsk_unique,
.twsk_destructor= tcp_twsk_destructor,
};
@@ -3498,7 +3519,7 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
- tcp_twsk_purge(net_exit_list, AF_INET);
+ tcp_twsk_purge(net_exit_list);
list_for_each_entry(net, net_exit_list, exit_list) {
inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo);