diff options
Diffstat (limited to 'include/linux/tcp.h')
| -rw-r--r-- | include/linux/tcp.h | 256 | 
1 files changed, 136 insertions, 120 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 68f3d315d2e1..89b290d8c8dc 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -169,7 +169,7 @@ struct tcp_request_sock {  #ifdef CONFIG_TCP_AO  	u8				ao_keyid;  	u8				ao_rcv_next; -	u8				maclen; +	bool				used_tcp_ao;  #endif  }; @@ -180,37 +180,131 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)  static inline bool tcp_rsk_used_ao(const struct request_sock *req)  { -	/* The real length of MAC is saved in the request socket, -	 * signing anything with zero-length makes no sense, so here is -	 * a little hack.. -	 */  #ifndef CONFIG_TCP_AO  	return false;  #else -	return tcp_rsk(req)->maclen != 0; +	return tcp_rsk(req)->used_tcp_ao;  #endif  }  #define TCP_RMEM_TO_WIN_SCALE 8  struct tcp_sock { +	/* Cacheline organization can be found documented in +	 * Documentation/networking/net_cachelines/tcp_sock.rst. +	 * Please update the document when adding new fields. +	 */ +  	/* inet_connection_sock has to be the first member of tcp_sock */  	struct inet_connection_sock	inet_conn; -	u16	tcp_header_len;	/* Bytes of tcp header to send		*/ + +	/* TX read-mostly hotpath cache lines */ +	__cacheline_group_begin(tcp_sock_read_tx); +	/* timestamp of last sent data packet (for restart window) */ +	u32	max_window;	/* Maximal window ever seen from peer	*/ +	u32	rcv_ssthresh;	/* Current window clamp			*/ +	u32	reordering;	/* Packet reordering metric.		*/ +	u32	notsent_lowat;	/* TCP_NOTSENT_LOWAT */  	u16	gso_segs;	/* Max number of segs per GSO packet	*/ +	/* from STCP, retrans queue hinting */ +	struct sk_buff *lost_skb_hint; +	struct sk_buff *retransmit_skb_hint; +	__cacheline_group_end(tcp_sock_read_tx); + +	/* TXRX read-mostly hotpath cache lines */ +	__cacheline_group_begin(tcp_sock_read_txrx); +	u32	tsoffset;	/* timestamp offset */ +	u32	snd_wnd;	/* The window we expect to receive	*/ +	u32	mss_cache;	/* Cached effective mss, not including SACKS */ +	u32	snd_cwnd;	/* Sending congestion window		*/ +	u32	prr_out;	/* Total number of pkts sent during Recovery. */ +	u32	lost_out;	/* Lost packets			*/ +	u32	sacked_out;	/* SACK'd packets			*/ +	u16	tcp_header_len;	/* Bytes of tcp header to send		*/ +	u8	chrono_type : 2,	/* current chronograph type */ +		repair      : 1, +		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */ +		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ +	__cacheline_group_end(tcp_sock_read_txrx); + +	/* RX read-mostly hotpath cache lines */ +	__cacheline_group_begin(tcp_sock_read_rx); +	u32	copied_seq;	/* Head of yet unread data */ +	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */ +	u32	snd_wl1;	/* Sequence for window update		*/ +	u32	tlp_high_seq;	/* snd_nxt at the time of TLP */ +	u32	rttvar_us;	/* smoothed mdev_max			*/ +	u32	retrans_out;	/* Retransmitted packets out		*/ +	u16	advmss;		/* Advertised MSS			*/ +	u16	urg_data;	/* Saved octet of OOB data and control flags */ +	u32	lost;		/* Total data packets lost incl. rexmits */ +	struct  minmax rtt_min; +	/* OOO segments go in this rbtree. Socket lock must be held. */ +	struct rb_root	out_of_order_queue; +	u32	snd_ssthresh;	/* Slow start size threshold		*/ +	__cacheline_group_end(tcp_sock_read_rx); +	/* TX read-write hotpath cache lines */ +	__cacheline_group_begin(tcp_sock_write_tx) ____cacheline_aligned; +	u32	segs_out;	/* RFC4898 tcpEStatsPerfSegsOut +				 * The total number of segments sent. +				 */ +	u32	data_segs_out;	/* RFC4898 tcpEStatsPerfDataSegsOut +				 * total number of data segments sent. +				 */ +	u64	bytes_sent;	/* RFC4898 tcpEStatsPerfHCDataOctetsOut +				 * total number of data bytes sent. +				 */ +	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */ +	u32	chrono_start;	/* Start time in jiffies of a TCP chrono */ +	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */ +	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */ +	u32	pushed_seq;	/* Last pushed seq, required to talk to windows */ +	u32	lsndtime; +	u32	mdev_us;	/* medium deviation			*/ +	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */ +	u64	tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ +	u64	tcp_mstamp;	/* most recent packet received/sent */ +	u32	rtt_seq;	/* sequence number to update rttvar	*/ +	struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ +	struct sk_buff *highest_sack;   /* skb just after the highest +					 * skb with SACKed bit set +					 * (validity guaranteed only if +					 * sacked_out > 0) +					 */ +	u8	ecn_flags;	/* ECN status bits.			*/ +	__cacheline_group_end(tcp_sock_write_tx); + +	/* TXRX read-write hotpath cache lines */ +	__cacheline_group_begin(tcp_sock_write_txrx);  /*   *	Header prediction flags   *	0x5?10 << 16 + snd_wnd in net byte order   */  	__be32	pred_flags; - +	u32	rcv_nxt;	/* What we want to receive next		*/ +	u32	snd_nxt;	/* Next sequence we send		*/ +	u32	snd_una;	/* First byte we want an ack for	*/ +	u32	window_clamp;	/* Maximal window to advertise		*/ +	u32	srtt_us;	/* smoothed round trip time << 3 in usecs */ +	u32	packets_out;	/* Packets which are "in flight"	*/ +	u32	snd_up;		/* Urgent pointer		*/ +	u32	delivered;	/* Total data packets delivered incl. rexmits */ +	u32	delivered_ce;	/* Like the above but only ECE marked packets */ +	u32	app_limited;	/* limited until "delivered" reaches this val */ +	u32	rcv_wnd;	/* Current receiver window		*/  /* - *	RFC793 variables by their proper names. This means you can - *	read the code and the spec side by side (and laugh ...) - *	See RFC793 and RFC1122. The RFC writes these in capitals. + *      Options received (usually on last packet, some only on SYN packets).   */ -	u64	bytes_received;	/* RFC4898 tcpEStatsAppHCThruOctetsReceived +	struct tcp_options_received rx_opt; +	u8	nonagle     : 4,/* Disable Nagle algorithm?             */ +		rate_app_limited:1;  /* rate_{delivered,interval_us} limited? */ +	__cacheline_group_end(tcp_sock_write_txrx); + +	/* RX read-write hotpath cache lines */ +	__cacheline_group_begin(tcp_sock_write_rx); +	u64	bytes_received; +				/* RFC4898 tcpEStatsAppHCThruOctetsReceived  				 * sum(delta(rcv_nxt)), or how many bytes  				 * were acked.  				 */ @@ -220,45 +314,44 @@ struct tcp_sock {  	u32	data_segs_in;	/* RFC4898 tcpEStatsPerfDataSegsIn  				 * total number of data segments in.  				 */ - 	u32	rcv_nxt;	/* What we want to receive next 	*/ -	u32	copied_seq;	/* Head of yet unread data		*/  	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/ - 	u32	snd_nxt;	/* Next sequence we send		*/ -	u32	segs_out;	/* RFC4898 tcpEStatsPerfSegsOut -				 * The total number of segments sent. -				 */ -	u32	data_segs_out;	/* RFC4898 tcpEStatsPerfDataSegsOut -				 * total number of data segments sent. -				 */ -	u64	bytes_sent;	/* RFC4898 tcpEStatsPerfHCDataOctetsOut -				 * total number of data bytes sent. -				 */ +	u32	max_packets_out;  /* max packets_out in last window */ +	u32	cwnd_usage_seq;  /* right edge of cwnd usage tracking flight */ +	u32	rate_delivered;    /* saved rate sample: packets delivered */ +	u32	rate_interval_us;  /* saved rate sample: time elapsed */ +	u32	rcv_rtt_last_tsecr; +	u64	first_tx_mstamp;  /* start of window send phase */ +	u64	delivered_mstamp; /* time we reached "delivered" */  	u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked  				 * sum(delta(snd_una)), or how many bytes  				 * were acked.  				 */ +	struct { +		u32	rtt_us; +		u32	seq; +		u64	time; +	} rcv_rtt_est; +/* Receiver queue space */ +	struct { +		u32	space; +		u32	seq; +		u64	time; +	} rcvq_space; +	__cacheline_group_end(tcp_sock_write_rx); +	/* End of Hot Path */ + +/* + *	RFC793 variables by their proper names. This means you can + *	read the code and the spec side by side (and laugh ...) + *	See RFC793 and RFC1122. The RFC writes these in capitals. + */  	u32	dsack_dups;	/* RFC4898 tcpEStatsStackDSACKDups  				 * total number of DSACK blocks received  				 */ - 	u32	snd_una;	/* First byte we want an ack for	*/ - 	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */ -	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */ -	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */  	u32	last_oow_ack_time;  /* timestamp of last out-of-window ACK */  	u32	compressed_ack_rcv_nxt; - -	u32	tsoffset;	/* timestamp offset */ -  	struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ -	struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ - -	u32	snd_wl1;	/* Sequence for window update		*/ -	u32	snd_wnd;	/* The window we expect to receive	*/ -	u32	max_window;	/* Maximal window ever seen from peer	*/ -	u32	mss_cache;	/* Cached effective mss, not including SACKS */ -	u32	window_clamp;	/* Maximal window to advertise		*/ -	u32	rcv_ssthresh;	/* Current window clamp			*/  	u8	scaling_ratio;	/* see tcp_win_from_space() */  	/* Information of the most recently (s)acked skb */  	struct tcp_rack { @@ -272,24 +365,16 @@ struct tcp_sock {  		   dsack_seen:1, /* Whether DSACK seen after last adj */  		   advanced:1;	 /* mstamp advanced since last lost marking */  	} rack; -	u16	advmss;		/* Advertised MSS			*/  	u8	compressed_ack;  	u8	dup_ack_counter:2,  		tlp_retrans:1,	/* TLP is a retransmission */  		tcp_usec_ts:1, /* TSval values in usec */  		unused:4; -	u32	chrono_start;	/* Start time in jiffies of a TCP chrono */ -	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */ -	u8	chrono_type:2,	/* current chronograph type */ -		rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */ +	u8	thin_lto    : 1,/* Use linear timeouts for thin streams */ +		recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */  		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */  		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ -		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */ -		fastopen_client_fail:2; /* reason why fastopen failed */ -	u8	nonagle     : 4,/* Disable Nagle algorithm?             */ -		thin_lto    : 1,/* Use linear timeouts for thin streams */ -		recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ -		repair      : 1, +		fastopen_client_fail:2, /* reason why fastopen failed */  		frto        : 1;/* F-RTO (RFC5682) activated in CA_Loss */  	u8	repair_queue;  	u8	save_syn:2,	/* Save headers of SYN packet */ @@ -297,45 +382,19 @@ struct tcp_sock {  		syn_fastopen:1,	/* SYN includes Fast Open option */  		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */  		syn_fastopen_ch:1, /* Active TFO re-enabling probe */ -		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ -		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ -	u32	tlp_high_seq;	/* snd_nxt at the time of TLP */ +		syn_data_acked:1;/* data in SYN is acked by SYN-ACK */  	u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */ -	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */ -	u64	tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */  /* RTT measurement */ -	u64	tcp_mstamp;	/* most recent packet received/sent */ -	u32	srtt_us;	/* smoothed round trip time << 3 in usecs */ -	u32	mdev_us;	/* medium deviation			*/  	u32	mdev_max_us;	/* maximal mdev for the last rtt period	*/ -	u32	rttvar_us;	/* smoothed mdev_max			*/ -	u32	rtt_seq;	/* sequence number to update rttvar	*/ -	struct  minmax rtt_min; -	u32	packets_out;	/* Packets which are "in flight"	*/ -	u32	retrans_out;	/* Retransmitted packets out		*/ -	u32	max_packets_out;  /* max packets_out in last window */ -	u32	cwnd_usage_seq;  /* right edge of cwnd usage tracking flight */ - -	u16	urg_data;	/* Saved octet of OOB data and control flags */ -	u8	ecn_flags;	/* ECN status bits.			*/  	u8	keepalive_probes; /* num of allowed keep alive probes	*/ -	u32	reordering;	/* Packet reordering metric.		*/  	u32	reord_seen;	/* number of data packet reordering events */ -	u32	snd_up;		/* Urgent pointer		*/ - -/* - *      Options received (usually on last packet, some only on SYN packets). - */ -	struct tcp_options_received rx_opt;  /*   *	Slow start and congestion control (see also Nagle, and Karn & Partridge)   */ - 	u32	snd_ssthresh;	/* Slow start size threshold		*/ - 	u32	snd_cwnd;	/* Sending congestion window		*/  	u32	snd_cwnd_cnt;	/* Linear increase counter		*/  	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */  	u32	snd_cwnd_used; @@ -343,32 +402,10 @@ struct tcp_sock {  	u32	prior_cwnd;	/* cwnd right before starting loss recovery */  	u32	prr_delivered;	/* Number of newly delivered packets to  				 * receiver in Recovery. */ -	u32	prr_out;	/* Total number of pkts sent during Recovery. */ -	u32	delivered;	/* Total data packets delivered incl. rexmits */ -	u32	delivered_ce;	/* Like the above but only ECE marked packets */ -	u32	lost;		/* Total data packets lost incl. rexmits */ -	u32	app_limited;	/* limited until "delivered" reaches this val */ -	u64	first_tx_mstamp;  /* start of window send phase */ -	u64	delivered_mstamp; /* time we reached "delivered" */ -	u32	rate_delivered;    /* saved rate sample: packets delivered */ -	u32	rate_interval_us;  /* saved rate sample: time elapsed */ - - 	u32	rcv_wnd;	/* Current receiver window		*/ -	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */ -	u32	notsent_lowat;	/* TCP_NOTSENT_LOWAT */ -	u32	pushed_seq;	/* Last pushed seq, required to talk to windows */ -	u32	lost_out;	/* Lost packets			*/ -	u32	sacked_out;	/* SACK'd packets			*/  	struct hrtimer	pacing_timer;  	struct hrtimer	compressed_ack_timer; -	/* from STCP, retrans queue hinting */ -	struct sk_buff* lost_skb_hint; -	struct sk_buff *retransmit_skb_hint; - -	/* OOO segments go in this rbtree. Socket lock must be held. */ -	struct rb_root	out_of_order_queue;  	struct sk_buff	*ooo_last_skb; /* cache rb_last(out_of_order_queue) */  	/* SACKs data, these 2 need to be together (see tcp_options_write) */ @@ -377,12 +414,6 @@ struct tcp_sock {  	struct tcp_sack_block recv_sack_cache[4]; -	struct sk_buff *highest_sack;   /* skb just after the highest -					 * skb with SACKed bit set -					 * (validity guaranteed only if -					 * sacked_out > 0) -					 */ -  	int     lost_cnt_hint;  	u32	prior_ssthresh; /* ssthresh saved at recovery start	*/ @@ -433,21 +464,6 @@ struct tcp_sock {  	u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */ -/* Receiver side RTT estimation */ -	u32 rcv_rtt_last_tsecr; -	struct { -		u32	rtt_us; -		u32	seq; -		u64	time; -	} rcv_rtt_est; - -/* Receiver queue space */ -	struct { -		u32	space; -		u32	seq; -		u64	time; -	} rcvq_space; -  /* TCP-specific MTU probe information. */  	struct {  		u32		  probe_seq_start;  |