diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 97 |
1 files changed, 77 insertions, 20 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f12d500ec85c..61082065b26a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -317,6 +317,11 @@ struct tcp_splice_state { unsigned long tcp_memory_pressure __read_mostly; EXPORT_SYMBOL_GPL(tcp_memory_pressure); +DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); +EXPORT_SYMBOL(tcp_rx_skb_cache_key); + +DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); + void tcp_enter_memory_pressure(struct sock *sk) { unsigned long val; @@ -930,6 +935,22 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } +/* In some cases, both sendpage() and sendmsg() could have added + * an skb to the write queue, but failed adding payload on it. + * We need to remove it to consume less memory, but more + * importantly be able to generate EPOLLOUT for Edge Trigger epoll() + * users. + */ +static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) +{ + if (skb && !skb->len) { + tcp_unlink_write_queue(skb, sk); + if (tcp_write_queue_empty(sk)) + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); + sk_wmem_free_skb(sk, skb); + } +} + ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -979,6 +1000,9 @@ new_segment: if (!skb) goto wait_for_memory; +#ifdef CONFIG_TLS_DEVICE + skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); +#endif skb_entail(sk, skb); copy = size_goal; } @@ -1056,6 +1080,7 @@ out: return copied; do_error: + tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk)); if (copied) goto out; out_err: @@ -1380,18 +1405,11 @@ out_nopush: sock_zerocopy_put(uarg); return copied + copied_syn; +do_error: + skb = tcp_write_queue_tail(sk); do_fault: - if (!skb->len) { - tcp_unlink_write_queue(skb, sk); - /* It is the one place in all of TCP, except connection - * reset, where we can be unlinking the send_head. - */ - if (tcp_write_queue_empty(sk)) - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - sk_wmem_free_skb(sk, skb); - } + tcp_remove_empty_skb(sk, skb); -do_error: if (copied + copied_syn) goto out; out_err: @@ -2609,6 +2627,8 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_saved_syn_free(tp); tp->compressed_ack = 0; tp->bytes_sent = 0; + tp->bytes_acked = 0; + tp->bytes_received = 0; tp->bytes_retrans = 0; tp->duplicate_sack[0].start_seq = 0; tp->duplicate_sack[0].end_seq = 0; @@ -2736,6 +2756,21 @@ static int tcp_repair_options_est(struct sock *sk, return 0; } +DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); +EXPORT_SYMBOL(tcp_tx_delay_enabled); + +static void tcp_enable_tx_delay(void) +{ + if (!static_branch_unlikely(&tcp_tx_delay_enabled)) { + static int __tcp_tx_delay_enabled = 0; + + if (cmpxchg(&__tcp_tx_delay_enabled, 0, 1) == 0) { + static_branch_enable(&tcp_tx_delay_enabled); + pr_info("TCP_TX_DELAY enabled\n"); + } + } +} + /* * Socket option code for TCP. */ @@ -2763,7 +2798,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, name[val] = 0; lock_sock(sk); - err = tcp_set_congestion_control(sk, name, true, true); + err = tcp_set_congestion_control(sk, name, true, true, + ns_capable(sock_net(sk)->user_ns, + CAP_NET_ADMIN)); release_sock(sk); return err; } @@ -2786,15 +2823,23 @@ static int do_tcp_setsockopt(struct sock *sk, int level, return err; } case TCP_FASTOPEN_KEY: { - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH]; + __u8 *backup_key = NULL; - if (optlen != sizeof(key)) + /* Allow a backup key as well to facilitate key rotation + * First key is the active one. + */ + if (optlen != TCP_FASTOPEN_KEY_LENGTH && + optlen != TCP_FASTOPEN_KEY_BUF_LENGTH) return -EINVAL; if (copy_from_user(key, optval, optlen)) return -EFAULT; - return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key)); + if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH) + backup_key = key + TCP_FASTOPEN_KEY_LENGTH; + + return tcp_fastopen_reset_cipher(net, sk, key, backup_key); } default: /* fallthru */ @@ -3078,6 +3123,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else tp->recvmsg_inq = val; break; + case TCP_TX_DELAY: + if (val) + tcp_enable_tx_delay(); + tp->tcp_tx_delay = val; + break; default: err = -ENOPROTOOPT; break; @@ -3448,21 +3498,23 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return 0; case TCP_FASTOPEN_KEY: { - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH]; struct tcp_fastopen_context *ctx; + unsigned int key_len = 0; if (get_user(len, optlen)) return -EFAULT; rcu_read_lock(); ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); - if (ctx) - memcpy(key, ctx->key, sizeof(key)); - else - len = 0; + if (ctx) { + key_len = tcp_fastopen_context_len(ctx) * + TCP_FASTOPEN_KEY_LENGTH; + memcpy(&key[0], &ctx->key[0], key_len); + } rcu_read_unlock(); - len = min_t(unsigned int, len, sizeof(key)); + len = min_t(unsigned int, len, key_len); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, key, len)) @@ -3535,6 +3587,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = tp->fastopen_no_cookie; break; + case TCP_TX_DELAY: + val = tp->tcp_tx_delay; + break; + case TCP_TIMESTAMP: val = tcp_time_stamp_raw() + tp->tsoffset; break; @@ -3868,6 +3924,7 @@ void __init tcp_init(void) unsigned long limit; unsigned int i; + BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); |