diff options
Diffstat (limited to 'net/ipv4/tcp.c')
| -rw-r--r-- | net/ipv4/tcp.c | 116 | 
1 files changed, 83 insertions, 33 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 814af89c1bd3..1ef3165114ba 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,7 +279,6 @@  #include <asm/uaccess.h>  #include <asm/ioctls.h> -#include <asm/unaligned.h>  #include <net/busy_poll.h>  int sysctl_tcp_min_tso_segs __read_mostly = 2; @@ -405,7 +404,6 @@ void tcp_init_sock(struct sock *sk)  	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;  	tp->snd_cwnd_clamp = ~0;  	tp->mss_cache = TCP_MSS_DEFAULT; -	u64_stats_init(&tp->syncp);  	tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;  	tcp_enable_early_retrans(tp); @@ -665,9 +663,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,  	if (tcp_should_autocork(sk, skb, size_goal)) {  		/* avoid atomic op if TSQ_THROTTLED bit is already set */ -		if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) { +		if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {  			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); -			set_bit(TSQ_THROTTLED, &tp->tsq_flags); +			set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);  		}  		/* It is possible TX completion already happened  		 * before we set TSQ_THROTTLED. @@ -998,8 +996,11 @@ do_error:  		goto out;  out_err:  	/* make sure we wake any epoll edge trigger waiter */ -	if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) +	if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && +		     err == -EAGAIN)) {  		sk->sk_write_space(sk); +		tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); +	}  	return sk_stream_error(sk, flags, err);  } @@ -1333,8 +1334,11 @@ do_error:  out_err:  	err = sk_stream_error(sk, flags, err);  	/* make sure we wake any epoll edge trigger waiter */ -	if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) +	if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && +		     err == -EAGAIN)) {  		sk->sk_write_space(sk); +		tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); +	}  	release_sock(sk);  	return err;  } @@ -2302,7 +2306,7 @@ EXPORT_SYMBOL(tcp_disconnect);  static inline bool tcp_can_repair_sock(const struct sock *sk)  {  	return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && -		((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); +		(sk->sk_state != TCP_LISTEN);  }  static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len) @@ -2704,15 +2708,33 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname,  EXPORT_SYMBOL(compat_tcp_setsockopt);  #endif +static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, +				      struct tcp_info *info) +{ +	u64 stats[__TCP_CHRONO_MAX], total = 0; +	enum tcp_chrono i; + +	for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) { +		stats[i] = tp->chrono_stat[i - 1]; +		if (i == tp->chrono_type) +			stats[i] += tcp_time_stamp - tp->chrono_start; +		stats[i] *= USEC_PER_SEC / HZ; +		total += stats[i]; +	} + +	info->tcpi_busy_time = total; +	info->tcpi_rwnd_limited = stats[TCP_CHRONO_RWND_LIMITED]; +	info->tcpi_sndbuf_limited = stats[TCP_CHRONO_SNDBUF_LIMITED]; +} +  /* Return information about state of tcp endpoint in API format. */  void tcp_get_info(struct sock *sk, struct tcp_info *info)  {  	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */  	const struct inet_connection_sock *icsk = inet_csk(sk);  	u32 now = tcp_time_stamp, intv; -	unsigned int start; -	int notsent_bytes;  	u64 rate64; +	bool slow;  	u32 rate;  	memset(info, 0, sizeof(*info)); @@ -2721,6 +2743,27 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)  	info->tcpi_state = sk_state_load(sk); +	/* Report meaningful fields for all TCP states, including listeners */ +	rate = READ_ONCE(sk->sk_pacing_rate); +	rate64 = rate != ~0U ? rate : ~0ULL; +	info->tcpi_pacing_rate = rate64; + +	rate = READ_ONCE(sk->sk_max_pacing_rate); +	rate64 = rate != ~0U ? rate : ~0ULL; +	info->tcpi_max_pacing_rate = rate64; + +	info->tcpi_reordering = tp->reordering; +	info->tcpi_snd_cwnd = tp->snd_cwnd; + +	if (info->tcpi_state == TCP_LISTEN) { +		/* listeners aliased fields : +		 * tcpi_unacked -> Number of children ready for accept() +		 * tcpi_sacked  -> max backlog +		 */ +		info->tcpi_unacked = sk->sk_ack_backlog; +		info->tcpi_sacked = sk->sk_max_ack_backlog; +		return; +	}  	info->tcpi_ca_state = icsk->icsk_ca_state;  	info->tcpi_retransmits = icsk->icsk_retransmits;  	info->tcpi_probes = icsk->icsk_probes_out; @@ -2748,13 +2791,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)  	info->tcpi_snd_mss = tp->mss_cache;  	info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; -	if (info->tcpi_state == TCP_LISTEN) { -		info->tcpi_unacked = sk->sk_ack_backlog; -		info->tcpi_sacked = sk->sk_max_ack_backlog; -	} else { -		info->tcpi_unacked = tp->packets_out; -		info->tcpi_sacked = tp->sacked_out; -	} +	info->tcpi_unacked = tp->packets_out; +	info->tcpi_sacked = tp->sacked_out; +  	info->tcpi_lost = tp->lost_out;  	info->tcpi_retrans = tp->retrans_out;  	info->tcpi_fackets = tp->fackets_out; @@ -2768,34 +2807,25 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)  	info->tcpi_rtt = tp->srtt_us >> 3;  	info->tcpi_rttvar = tp->mdev_us >> 2;  	info->tcpi_snd_ssthresh = tp->snd_ssthresh; -	info->tcpi_snd_cwnd = tp->snd_cwnd;  	info->tcpi_advmss = tp->advmss; -	info->tcpi_reordering = tp->reordering;  	info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;  	info->tcpi_rcv_space = tp->rcvq_space.space;  	info->tcpi_total_retrans = tp->total_retrans; -	rate = READ_ONCE(sk->sk_pacing_rate); -	rate64 = rate != ~0U ? rate : ~0ULL; -	put_unaligned(rate64, &info->tcpi_pacing_rate); +	slow = lock_sock_fast(sk); -	rate = READ_ONCE(sk->sk_max_pacing_rate); -	rate64 = rate != ~0U ? rate : ~0ULL; -	put_unaligned(rate64, &info->tcpi_max_pacing_rate); +	info->tcpi_bytes_acked = tp->bytes_acked; +	info->tcpi_bytes_received = tp->bytes_received; +	info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt); +	tcp_get_info_chrono_stats(tp, info); + +	unlock_sock_fast(sk, slow); -	do { -		start = u64_stats_fetch_begin_irq(&tp->syncp); -		put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); -		put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); -	} while (u64_stats_fetch_retry_irq(&tp->syncp, start));  	info->tcpi_segs_out = tp->segs_out;  	info->tcpi_segs_in = tp->segs_in; -	notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt); -	info->tcpi_notsent_bytes = max(0, notsent_bytes); -  	info->tcpi_min_rtt = tcp_min_rtt(tp);  	info->tcpi_data_segs_in = tp->data_segs_in;  	info->tcpi_data_segs_out = tp->data_segs_out; @@ -2806,11 +2836,31 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)  	if (rate && intv) {  		rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;  		do_div(rate64, intv); -		put_unaligned(rate64, &info->tcpi_delivery_rate); +		info->tcpi_delivery_rate = rate64;  	}  }  EXPORT_SYMBOL_GPL(tcp_get_info); +struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) +{ +	const struct tcp_sock *tp = tcp_sk(sk); +	struct sk_buff *stats; +	struct tcp_info info; + +	stats = alloc_skb(3 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC); +	if (!stats) +		return NULL; + +	tcp_get_info_chrono_stats(tp, &info); +	nla_put_u64_64bit(stats, TCP_NLA_BUSY, +			  info.tcpi_busy_time, TCP_NLA_PAD); +	nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED, +			  info.tcpi_rwnd_limited, TCP_NLA_PAD); +	nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED, +			  info.tcpi_sndbuf_limited, TCP_NLA_PAD); +	return stats; +} +  static int do_tcp_getsockopt(struct sock *sk, int level,  		int optname, char __user *optval, int __user *optlen)  {  |