From b4c1c1d03842e6b24e87e48c07769e105df99985 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 21 Aug 2013 19:47:58 +0200 Subject: net: tcp_probe: also include rcv_wnd next to snd_wnd It is helpful to sometimes know the TCP window sizes of an established socket e.g. to confirm that window scaling is working or to tweak the window size to improve high-latency connections, etc etc. Currently the TCP snooper only exports the send window size, but not the receive window size. Therefore, also add the receive window size to the end of the output line. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_probe.c') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index d4943f67aff2..fae788b5b03e 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -60,6 +60,7 @@ struct tcp_log { u32 snd_nxt; u32 snd_una; u32 snd_wnd; + u32 rcv_wnd; u32 snd_cwnd; u32 ssthresh; u32 srtt; @@ -116,6 +117,7 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, p->snd_una = tp->snd_una; p->snd_cwnd = tp->snd_cwnd; p->snd_wnd = tp->snd_wnd; + p->rcv_wnd = tp->rcv_wnd; p->ssthresh = tcp_current_ssthresh(sk); p->srtt = tp->srtt >> 3; @@ -157,13 +159,13 @@ static int tcpprobe_sprint(char *tbuf, int n) = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); return scnprintf(tbuf, n, - "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n", + "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u %u\n", (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec, &p->saddr, ntohs(p->sport), &p->daddr, ntohs(p->dport), p->length, p->snd_nxt, p->snd_una, - p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt); + p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd); } static ssize_t tcpprobe_read(struct file *file, char __user *buf, -- cgit From d8cdeda6ddbcb33debbb87590a7d42ff7f5b5cfd Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 21 Aug 2013 19:47:59 +0200 Subject: net: tcp_probe: kprobes: adapt jtcp_rcv_established signature This patches fixes a rather unproblematic function signature mismatch as the const specifier was missing for the th variable; and next to that it adds a build-time assertion so that future function signature mismatches for kprobes will not end badly, similarly as commit 22222997 ("net: sctp: add build check for sctp_sf_eat_sack_6_2/jsctp_sf_eat_sack") did it for SCTP. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_probe.c') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index fae788b5b03e..a2392f48ba35 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -92,7 +92,7 @@ static inline int tcp_probe_avail(void) * Note: arguments must match tcp_rcv_established()! */ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned int len) + const struct tcphdr *th, unsigned int len) { const struct tcp_sock *tp = tcp_sk(sk); const struct inet_sock *inet = inet_sk(sk); @@ -225,6 +225,13 @@ static __init int tcpprobe_init(void) { int ret = -ENOMEM; + /* Warning: if the function signature of tcp_rcv_established, + * has been changed, you also have to change the signature of + * jtcp_rcv_established, otherwise you end up right here! + */ + BUILD_BUG_ON(__same_type(tcp_rcv_established, + jtcp_rcv_established) == 0); + init_waitqueue_head(&tcp_probe.wait); spin_lock_init(&tcp_probe.lock); -- cgit From f925d0a62db3f1b6e463ef956d0855006538d002 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 21 Aug 2013 19:48:00 +0200 Subject: net: tcp_probe: add IPv6 support The tcp_probe currently only supports analysis of IPv4 connections. Therefore, it would be nice to have IPv6 supported as well. Since we have the recently added %pISpc specifier that is IPv4/IPv6 generic, build related sockaddress structures from the flow information and pass this to our format string. Tested with SSH and HTTP sessions on IPv4 and IPv6. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 54 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 10 deletions(-) (limited to 'net/ipv4/tcp_probe.c') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index a2392f48ba35..301a3effe579 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -54,8 +54,11 @@ static const char procname[] = "tcpprobe"; struct tcp_log { ktime_t tstamp; - __be32 saddr, daddr; - __be16 sport, dport; + union { + struct sockaddr raw; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } src, dst; u16 length; u32 snd_nxt; u32 snd_una; @@ -87,6 +90,30 @@ static inline int tcp_probe_avail(void) return bufsize - tcp_probe_used() - 1; } +#define tcp_probe_copy_fl_to_si4(inet, si4, mem) \ + do { \ + si4.sin_family = AF_INET; \ + si4.sin_port = inet->inet_##mem##port; \ + si4.sin_addr.s_addr = inet->inet_##mem##addr; \ + } while (0) \ + +#if IS_ENABLED(CONFIG_IPV6) +#define tcp_probe_copy_fl_to_si6(inet, si6, mem) \ + do { \ + struct ipv6_pinfo *pi6 = inet->pinet6; \ + si6.sin6_family = AF_INET6; \ + si6.sin6_port = inet->inet_##mem##port; \ + si6.sin6_addr = pi6->mem##addr; \ + si6.sin6_flowinfo = 0; /* No need here. */ \ + si6.sin6_scope_id = 0; /* No need here. */ \ + } while (0) +#else +#define tcp_probe_copy_fl_to_si6(fl, si6, mem) \ + do { \ + memset(&si6, 0, sizeof(si6)); \ + } while (0) +#endif + /* * Hook inserted to be called before each receive packet. * Note: arguments must match tcp_rcv_established()! @@ -108,10 +135,19 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, struct tcp_log *p = tcp_probe.log + tcp_probe.head; p->tstamp = ktime_get(); - p->saddr = inet->inet_saddr; - p->sport = inet->inet_sport; - p->daddr = inet->inet_daddr; - p->dport = inet->inet_dport; + switch (sk->sk_family) { + case AF_INET: + tcp_probe_copy_fl_to_si4(inet, p->src.v4, s); + tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d); + break; + case AF_INET6: + tcp_probe_copy_fl_to_si6(inet, p->src.v6, s); + tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d); + break; + default: + BUG(); + } + p->length = skb->len; p->snd_nxt = tp->snd_nxt; p->snd_una = tp->snd_una; @@ -159,12 +195,10 @@ static int tcpprobe_sprint(char *tbuf, int n) = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); return scnprintf(tbuf, n, - "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u %u\n", + "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n", (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec, - &p->saddr, ntohs(p->sport), - &p->daddr, ntohs(p->dport), - p->length, p->snd_nxt, p->snd_una, + &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una, p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd); } -- cgit From b1dcdc68b1f4cc77f603d7507f7a14f1f4864d41 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Aug 2013 16:16:33 +0200 Subject: net: tcp_probe: allow more advanced ingress filtering by mark Currently, the tcp_probe snooper can either filter packets by a given port (handed to the module via module parameter e.g. port=80) or lets all TCP traffic pass (port=0, default). When a port is specified, the port number is tested against the sk's source/destination port. Thus, if one of them matches, the information will be further processed for the log. As this is quite limited, allow for more advanced filtering possibilities which can facilitate debugging/analysis with the help of the tcp_probe snooper. Therefore, similarly as added to BPF machine in commit 7e75f93e ("pkt_sched: ingress socket filter by mark"), add the possibility to use skb->mark as a filter. If the mark is not being used otherwise, this allows ingress filtering by flow (e.g. in order to track updates from only a single flow, or a subset of all flows for a given port) and other things such as dynamic logging and reconfiguration without removing/re-inserting the tcp_probe module, etc. Simple example: insmod net/ipv4/tcp_probe.ko fwmark=8888 full=1 ... iptables -A INPUT -i eth4 -t mangle -p tcp --dport 22 \ --sport 60952 -j MARK --set-mark 8888 [... sampling interval ...] iptables -D INPUT -i eth4 -t mangle -p tcp --dport 22 \ --sport 60952 -j MARK --set-mark 8888 The current option to filter by a given port is still being preserved. A similar approach could be done for the sctp_probe module as a follow-up. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_probe.c') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 301a3effe579..622a4377b397 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -46,6 +46,10 @@ static unsigned int bufsize __read_mostly = 4096; MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); module_param(bufsize, uint, 0); +static unsigned int fwmark __read_mostly = 0; +MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); +module_param(fwmark, uint, 0); + static int full __read_mostly; MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); module_param(full, int, 0); @@ -124,9 +128,11 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct tcp_sock *tp = tcp_sk(sk); const struct inet_sock *inet = inet_sk(sk); - /* Only update if port matches */ - if ((port == 0 || ntohs(inet->inet_dport) == port || - ntohs(inet->inet_sport) == port) && + /* Only update if port or skb mark matches */ + if (((port == 0 && fwmark == 0) || + ntohs(inet->inet_dport) == port || + ntohs(inet->inet_sport) == port || + (fwmark > 0 && skb->mark == fwmark)) && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { spin_lock(&tcp_probe.lock); @@ -284,7 +290,8 @@ static __init int tcpprobe_init(void) if (ret) goto err1; - pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize); + pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n", + port, fwmark, bufsize); return 0; err1: remove_proc_entry(procname, init_net.proc_net); -- cgit From cc8c6c1b21c9b1e0f1e89428c07c7c52435cba0c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 3 Sep 2013 18:24:02 +0200 Subject: net: tcp_probe: adapt tbuf size for recent changes With recent changes in tcp_probe module (e.g. f925d0a62d ("net: tcp_probe: add IPv6 support")) we also need to take into account that tbuf needs to be updated as format string will be further expanded. tbuf sits on the stack in tcpprobe_read() function that is invoked when user space reads procfs file /proc/net/tcpprobe, hence not fast path as in jtcp_rcv_established(). Having a size similarly as in sctp_probe module of 256 bytes is fully sufficient for that, we need theoretical maximum of 252 bytes otherwise we could get truncated. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_probe.c') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 622a4377b397..1f6aa543b64e 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -218,7 +218,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf, return -EINVAL; while (cnt < len) { - char tbuf[164]; + char tbuf[256]; int width; /* Wait for data in buffer */ -- cgit From c995ae2259ee36caf48bbfacf40111998dacd4af Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Tue, 3 Sep 2013 12:23:22 -0700 Subject: tcp: Change return value of tcp_rcv_established() tcp_rcv_established() returns only one value namely 0. We change the return value to void (as suggested by David Miller). After commit 0c24604b (tcp: implement RFC 5961 4.2), we no longer send RSTs in response to SYNs. We can remove the check and processing on the return value of tcp_rcv_established(). We also fix jtcp_rcv_established() in tcp_probe.c to match that of tcp_rcv_established(). Signed-off-by: Vijay Subramanian Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++-- net/ipv4/tcp_input.c | 13 ++++++------- net/ipv4/tcp_ipv4.c | 5 +---- net/ipv4/tcp_probe.c | 5 ++--- net/ipv6/tcp_ipv6.c | 3 +-- 5 files changed, 12 insertions(+), 18 deletions(-) (limited to 'net/ipv4/tcp_probe.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 6a6a88db462d..b1aa324c5e65 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -371,8 +371,8 @@ extern void tcp_delack_timer_handler(struct sock *sk); extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len); -extern int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len); +extern void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct tcphdr *th, unsigned int len); extern void tcp_rcv_space_adjust(struct sock *sk); extern void tcp_cleanup_rbuf(struct sock *sk, int copied); extern int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1a84fffe6993..93d7e9de4143 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5049,8 +5049,8 @@ discard: * the rest is checked inline. Fast processing is turned on in * tcp_data_queue when everything is OK. */ -int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len) +void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct tcphdr *th, unsigned int len) { struct tcp_sock *tp = tcp_sk(sk); @@ -5127,7 +5127,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_ack(sk, skb, 0); __kfree_skb(skb); tcp_data_snd_check(sk); - return 0; + return; } else { /* Header too small */ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); goto discard; @@ -5220,7 +5220,7 @@ no_ack: if (eaten) kfree_skb_partial(skb, fragstolen); sk->sk_data_ready(sk, 0); - return 0; + return; } } @@ -5236,7 +5236,7 @@ slow_path: */ if (!tcp_validate_incoming(sk, skb, th, 1)) - return 0; + return; step5: if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) @@ -5252,7 +5252,7 @@ step5: tcp_data_snd_check(sk); tcp_ack_snd_check(sk); - return 0; + return; csum_error: TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); @@ -5260,7 +5260,6 @@ csum_error: discard: __kfree_skb(skb); - return 0; } EXPORT_SYMBOL(tcp_rcv_established); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 09d45d718973..b14266bb91eb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1799,10 +1799,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_rx_dst = NULL; } } - if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { - rsk = sk; - goto reset; - } + tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); return 0; } diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 1f6aa543b64e..611beab38a00 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -122,8 +122,8 @@ static inline int tcp_probe_avail(void) * Hook inserted to be called before each receive packet. * Note: arguments must match tcp_rcv_established()! */ -static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len) +static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct tcphdr *th, unsigned int len) { const struct tcp_sock *tp = tcp_sk(sk); const struct inet_sock *inet = inet_sk(sk); @@ -172,7 +172,6 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, } jprobe_return(); - return 0; } static struct jprobe tcp_jprobe = { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5bcfadf09e95..9acdcedf9a14 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1360,8 +1360,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } } - if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) - goto reset; + tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); if (opt_skb) goto ipv6_pktoptions; return 0; -- cgit