From 2767c97765cb3d9b54c8e62b468e55cc56854a66 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:20 -0700 Subject: selftests/bpf: Implement sample tcp/tcp6 bpf_iter programs In my VM, I got identical result compared to /proc/net/{tcp,tcp6}. For tcp6: $ cat /proc/net/tcp6 sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode 0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000001 00000000 0 0 17955 1 000000003eb3102e 100 0 0 10 0 $ cat /sys/fs/bpf/p1 sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode 0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 17955 1 000000003eb3102e 100 0 0 10 0 For tcp: $ cat /proc/net/tcp sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode 0: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 2666 1 000000007152e43f 100 0 0 10 0 $ cat /sys/fs/bpf/p2 sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode 1: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 2666 1 000000007152e43f 100 0 0 10 0 Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230820.3989165-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c | 250 ++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c (limited to 'tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c') diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c new file mode 100644 index 000000000000..10dec4392031 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +static int hlist_unhashed_lockless(const struct hlist_node *h) +{ + return !(h->pprev); +} + +static int timer_pending(const struct timer_list * timer) +{ + return !hlist_unhashed_lockless(&timer->entry); +} + +extern unsigned CONFIG_HZ __kconfig; + +#define USER_HZ 100 +#define NSEC_PER_SEC 1000000000ULL +static clock_t jiffies_to_clock_t(unsigned long x) +{ + /* The implementation here tailored to a particular + * setting of USER_HZ. + */ + u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ; + u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ; + + if ((tick_nsec % user_hz_nsec) == 0) { + if (CONFIG_HZ < USER_HZ) + return x * (USER_HZ / CONFIG_HZ); + else + return x / (CONFIG_HZ / USER_HZ); + } + return x * tick_nsec/user_hz_nsec; +} + +static clock_t jiffies_delta_to_clock_t(long delta) +{ + if (delta <= 0) + return 0; + + return jiffies_to_clock_t(delta); +} + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +static bool +inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk) +{ + return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; +} + +static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp) +{ + return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; +} + +static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp, + uid_t uid, __u32 seq_num) +{ + const struct inet_connection_sock *icsk; + const struct fastopen_queue *fastopenq; + const struct in6_addr *dest, *src; + const struct inet_sock *inet; + unsigned long timer_expires; + const struct sock *sp; + __u16 destp, srcp; + int timer_active; + int rx_queue; + int state; + + icsk = &tp->tcp.inet_conn; + inet = &icsk->icsk_inet; + sp = &inet->sk; + fastopenq = &icsk->icsk_accept_queue.fastopenq; + + dest = &sp->sk_v6_daddr; + src = &sp->sk_v6_rcv_saddr; + destp = bpf_ntohs(inet->inet_dport); + srcp = bpf_ntohs(inet->inet_sport); + + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + timer_active = 1; + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + timer_active = 4; + timer_expires = icsk->icsk_timeout; + } else if (timer_pending(&sp->sk_timer)) { + timer_active = 2; + timer_expires = sp->sk_timer.expires; + } else { + timer_active = 0; + timer_expires = bpf_jiffies64(); + } + + state = sp->sk_state; + if (state == TCP_LISTEN) { + rx_queue = sp->sk_ack_backlog; + } else { + rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq; + if (rx_queue < 0) + rx_queue = 0; + } + + BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + seq_num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ", + state, + tp->tcp.write_seq - tp->tcp.snd_una, rx_queue, + timer_active, + jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()), + icsk->icsk_retransmits, uid, + icsk->icsk_probes_out, + sock_i_ino(sp), + sp->sk_refcnt.refs.counter); + BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n", + tp, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk), + tp->tcp.snd_cwnd, + state == TCP_LISTEN ? fastopenq->max_qlen + : (tcp_in_initial_slowstart(&tp->tcp) ? -1 + : tp->tcp.snd_ssthresh) + ); + + return 0; +} + +static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw, + uid_t uid, __u32 seq_num) +{ + struct inet_timewait_sock *tw = &ttw->tw_sk; + const struct in6_addr *dest, *src; + __u16 destp, srcp; + long delta; + + delta = tw->tw_timer.expires - bpf_jiffies64(); + dest = &tw->tw_v6_daddr; + src = &tw->tw_v6_rcv_saddr; + destp = bpf_ntohs(tw->tw_dport); + srcp = bpf_ntohs(tw->tw_sport); + + BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + seq_num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + tw->tw_substate, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, + tw->tw_refcnt.refs.counter, tw); + + return 0; +} + +static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq, + uid_t uid, __u32 seq_num) +{ + struct inet_request_sock *irsk = &treq->req; + struct request_sock *req = &irsk->req; + struct in6_addr *src, *dest; + long ttd; + + ttd = req->rsk_timer.expires - bpf_jiffies64(); + src = &irsk->ir_v6_loc_addr; + dest = &irsk->ir_v6_rmt_addr; + + if (ttd < 0) + ttd = 0; + + BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + seq_num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], + irsk->ir_num, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], + bpf_ntohs(irsk->ir_rmt_port)); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd), + req->num_timeout, uid, 0, 0, 0, req); + + return 0; +} + +SEC("iter/tcp") +int dump_tcp6(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + struct seq_file *seq = ctx->meta->seq; + struct tcp_timewait_sock *tw; + struct tcp_request_sock *req; + struct tcp6_sock *tp; + uid_t uid = ctx->uid; + __u32 seq_num; + + if (sk_common == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, " sl " + "local_address " + "remote_address " + "st tx_queue rx_queue tr tm->when retrnsmt" + " uid timeout inode\n"); + + if (sk_common->skc_family != AF_INET6) + return 0; + + tp = bpf_skc_to_tcp6_sock(sk_common); + if (tp) + return dump_tcp6_sock(seq, tp, uid, seq_num); + + tw = bpf_skc_to_tcp_timewait_sock(sk_common); + if (tw) + return dump_tw_sock(seq, tw, uid, seq_num); + + req = bpf_skc_to_tcp_request_sock(sk_common); + if (req) + return dump_req_sock(seq, req, uid, seq_num); + + return 0; +} -- cgit From e4d9c2320716ea0e9ef59f503ddd8f253a642ddd Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 20 Jul 2020 13:48:06 +0200 Subject: samples/bpf, selftests/bpf: Use bpf_probe_read_kernel A handful of samples and selftests fail to build on s390, because after commit 0ebeea8ca8a4 ("bpf: Restrict bpf_probe_read{, str}() only to archs where they work") bpf_probe_read is not available anymore. Fix by using bpf_probe_read_kernel. Signed-off-by: Ilya Leoshkevich Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200720114806.88823-1-iii@linux.ibm.com --- samples/bpf/offwaketime_kern.c | 7 ++++++- samples/bpf/test_overhead_kprobe_kern.c | 12 +++++++++--- samples/bpf/tracex1_kern.c | 9 +++++++-- samples/bpf/tracex5_kern.c | 4 ++-- tools/bpf/bpftool/skeleton/pid_iter.bpf.c | 3 ++- tools/testing/selftests/bpf/progs/bpf_iter_netlink.c | 6 +++--- tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c | 2 +- tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c | 2 +- tools/testing/selftests/bpf/progs/bpf_iter_udp4.c | 2 +- tools/testing/selftests/bpf/progs/bpf_iter_udp6.c | 2 +- 10 files changed, 33 insertions(+), 16 deletions(-) (limited to 'tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c') diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c index d459f73412a4..e74ee1cd4b9c 100644 --- a/samples/bpf/offwaketime_kern.c +++ b/samples/bpf/offwaketime_kern.c @@ -12,7 +12,12 @@ #include #include -#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define _(P) \ + ({ \ + typeof(P) val; \ + bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ + val; \ + }) #define MINBLOCK_US 1 diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c index 8b811c29dc79..f6d593e47037 100644 --- a/samples/bpf/test_overhead_kprobe_kern.c +++ b/samples/bpf/test_overhead_kprobe_kern.c @@ -10,7 +10,12 @@ #include #include -#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define _(P) \ + ({ \ + typeof(P) val = 0; \ + bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ + val; \ + }) SEC("kprobe/__set_task_comm") int prog(struct pt_regs *ctx) @@ -25,8 +30,9 @@ int prog(struct pt_regs *ctx) tsk = (void *)PT_REGS_PARM1(ctx); pid = _(tsk->pid); - bpf_probe_read(oldcomm, sizeof(oldcomm), &tsk->comm); - bpf_probe_read(newcomm, sizeof(newcomm), (void *)PT_REGS_PARM2(ctx)); + bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm); + bpf_probe_read_kernel(newcomm, sizeof(newcomm), + (void *)PT_REGS_PARM2(ctx)); signal = _(tsk->signal); oom_score_adj = _(signal->oom_score_adj); return 0; diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c index 8e2610e14475..3f4599c9a202 100644 --- a/samples/bpf/tracex1_kern.c +++ b/samples/bpf/tracex1_kern.c @@ -11,7 +11,12 @@ #include #include -#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define _(P) \ + ({ \ + typeof(P) val = 0; \ + bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ + val; \ + }) /* kprobe is NOT a stable ABI * kernel functions can be removed, renamed or completely change semantics. @@ -34,7 +39,7 @@ int bpf_prog1(struct pt_regs *ctx) dev = _(skb->dev); len = _(skb->len); - bpf_probe_read(devname, sizeof(devname), dev->name); + bpf_probe_read_kernel(devname, sizeof(devname), dev->name); if (devname[0] == 'l' && devname[1] == 'o') { char fmt[] = "skb %p len %d\n"; diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c index 32b49e8ab6bd..64a1f7550d7e 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5_kern.c @@ -47,7 +47,7 @@ PROG(SYS__NR_write)(struct pt_regs *ctx) { struct seccomp_data sd; - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); + bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); if (sd.args[2] == 512) { char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), @@ -60,7 +60,7 @@ PROG(SYS__NR_read)(struct pt_regs *ctx) { struct seccomp_data sd; - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); + bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); if (sd.args[2] > 128 && sd.args[2] <= 1024) { char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c index 8468a608911e..d9b420972934 100644 --- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c +++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c @@ -71,7 +71,8 @@ int iter(struct bpf_iter__task_file *ctx) e.pid = task->tgid; e.id = get_obj_id(file->private_data, obj_type); - bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm); + bpf_probe_read_kernel(&e.comm, sizeof(e.comm), + task->group_leader->comm); bpf_seq_write(ctx->meta->seq, &e, sizeof(e)); return 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c index 7de98a68599a..95989f4c99b5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c @@ -36,10 +36,10 @@ int dump_netlink(struct bpf_iter__netlink *ctx) if (!nlk->groups) { group = 0; } else { - /* FIXME: temporary use bpf_probe_read here, needs + /* FIXME: temporary use bpf_probe_read_kernel here, needs * verifier support to do direct access. */ - bpf_probe_read(&group, sizeof(group), &nlk->groups[0]); + bpf_probe_read_kernel(&group, sizeof(group), &nlk->groups[0]); } BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ", nlk->portid, (u32)group, @@ -56,7 +56,7 @@ int dump_netlink(struct bpf_iter__netlink *ctx) * with current verifier. */ inode = SOCK_INODE(sk); - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); } BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index 30fd587cb325..54380c5e1069 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -57,7 +57,7 @@ static long sock_i_ino(const struct sock *sk) return 0; inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); return ino; } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c index 10dec4392031..b4fbddfa4e10 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -57,7 +57,7 @@ static long sock_i_ino(const struct sock *sk) return 0; inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); return ino; } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c index 7053784575e4..f258583afbbd 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c @@ -18,7 +18,7 @@ static long sock_i_ino(const struct sock *sk) return 0; inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); return ino; } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c index c1175a6ecf43..65f93bb03f0f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c @@ -25,7 +25,7 @@ static long sock_i_ino(const struct sock *sk) return 0; inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); return ino; } -- cgit