diff options
Diffstat (limited to 'samples/bpf')
-rw-r--r-- | samples/bpf/Makefile | 1 | ||||
-rwxr-xr-x | samples/bpf/xdp2skb_meta.sh | 220 | ||||
-rw-r--r-- | samples/bpf/xdp2skb_meta_kern.c | 105 | ||||
-rw-r--r-- | samples/bpf/xdp_monitor_kern.c | 96 | ||||
-rw-r--r-- | samples/bpf/xdp_monitor_user.c | 416 |
5 files changed, 770 insertions, 68 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3ff7a05bea9a..7f61a3d57fa7 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -142,6 +142,7 @@ always += xdp_redirect_map_kern.o always += xdp_redirect_cpu_kern.o always += xdp_monitor_kern.o always += xdp_rxq_info_kern.o +always += xdp2skb_meta_kern.o always += syscall_tp_kern.o HOSTCFLAGS += -I$(objtree)/usr/include diff --git a/samples/bpf/xdp2skb_meta.sh b/samples/bpf/xdp2skb_meta.sh new file mode 100755 index 000000000000..b9c9549c4c27 --- /dev/null +++ b/samples/bpf/xdp2skb_meta.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. +# +# Bash-shell example on using iproute2 tools 'tc' and 'ip' to load +# eBPF programs, both for XDP and clsbpf. Shell script function +# wrappers and even long options parsing is illustrated, for ease of +# use. +# +# Related to sample/bpf/xdp2skb_meta_kern.c, which contains BPF-progs +# that need to collaborate between XDP and TC hooks. Thus, it is +# convenient that the same tool load both programs that need to work +# together. +# +BPF_FILE=xdp2skb_meta_kern.o +DIR=$(dirname $0) + +export TC=/usr/sbin/tc +export IP=/usr/sbin/ip + +function usage() { + echo "" + echo "Usage: $0 [-vfh] --dev ethX" + echo " -d | --dev : Network device (required)" + echo " --flush : Cleanup flush TC and XDP progs" + echo " --list : (\$LIST) List TC and XDP progs" + echo " -v | --verbose : (\$VERBOSE) Verbose" + echo " --dry-run : (\$DRYRUN) Dry-run only (echo commands)" + echo "" +} + +## -- General shell logging cmds -- +function err() { + local exitcode=$1 + shift + echo "ERROR: $@" >&2 + exit $exitcode +} + +function info() { + if [[ -n "$VERBOSE" ]]; then + echo "# $@" + fi +} + +## -- Helper function calls -- + +# Wrapper call for TC and IP +# - Will display the offending command on failure +function _call_cmd() { + local cmd="$1" + local allow_fail="$2" + shift 2 + if [[ -n "$VERBOSE" ]]; then + echo "$(basename $cmd) $@" + fi + if [[ -n "$DRYRUN" ]]; then + return + fi + $cmd "$@" + local status=$? + if (( $status != 0 )); then + if [[ "$allow_fail" == "" ]]; then + err 2 "Exec error($status) occurred cmd: \"$cmd $@\"" + fi + fi +} +function call_tc() { + _call_cmd "$TC" "" "$@" +} +function call_tc_allow_fail() { + _call_cmd "$TC" "allow_fail" "$@" +} +function call_ip() { + _call_cmd "$IP" "" "$@" +} + +## --- Parse command line arguments / parameters --- +# Using external program "getopt" to get --long-options +OPTIONS=$(getopt -o vfhd: \ + --long verbose,flush,help,list,dev:,dry-run -- "$@") +if (( $? != 0 )); then + err 4 "Error calling getopt" +fi +eval set -- "$OPTIONS" + +unset DEV +unset FLUSH +while true; do + case "$1" in + -d | --dev ) # device + DEV=$2 + info "Device set to: DEV=$DEV" >&2 + shift 2 + ;; + -v | --verbose) + VERBOSE=yes + # info "Verbose mode: VERBOSE=$VERBOSE" >&2 + shift + ;; + --dry-run ) + DRYRUN=yes + VERBOSE=yes + info "Dry-run mode: enable VERBOSE and don't call TC+IP" >&2 + shift + ;; + -f | --flush ) + FLUSH=yes + shift + ;; + --list ) + LIST=yes + shift + ;; + -- ) + shift + break + ;; + -h | --help ) + usage; + exit 0 + ;; + * ) + shift + break + ;; + esac +done + +FILE="$DIR/$BPF_FILE" +if [[ ! -e $FILE ]]; then + err 3 "Missing BPF object file ($FILE)" +fi + +if [[ -z $DEV ]]; then + usage + err 2 "Please specify network device -- required option --dev" +fi + +## -- Function calls -- + +function list_tc() +{ + local device="$1" + shift + info "Listing current TC ingress rules" + call_tc filter show dev $device ingress +} + +function list_xdp() +{ + local device="$1" + shift + info "Listing current XDP device($device) setting" + call_ip link show dev $device | grep --color=auto xdp +} + +function flush_tc() +{ + local device="$1" + shift + info "Flush TC on device: $device" + call_tc_allow_fail filter del dev $device ingress + call_tc_allow_fail qdisc del dev $device clsact +} + +function flush_xdp() +{ + local device="$1" + shift + info "Flush XDP on device: $device" + call_ip link set dev $device xdp off +} + +function attach_tc_mark() +{ + local device="$1" + local file="$2" + local prog="tc_mark" + shift 2 + + # Re-attach clsact to clear/flush existing role + call_tc_allow_fail qdisc del dev $device clsact 2> /dev/null + call_tc qdisc add dev $device clsact + + # Attach BPF prog + call_tc filter add dev $device ingress \ + prio 1 handle 1 bpf da obj $file sec $prog +} + +function attach_xdp_mark() +{ + local device="$1" + local file="$2" + local prog="xdp_mark" + shift 2 + + # Remove XDP prog in-case it's already loaded + # TODO: Need ip-link option to override/replace existing XDP prog + flush_xdp $device + + # Attach XDP/BPF prog + call_ip link set dev $device xdp obj $file sec $prog +} + +if [[ -n $FLUSH ]]; then + flush_tc $DEV + flush_xdp $DEV + exit 0 +fi + +if [[ -n $LIST ]]; then + list_tc $DEV + list_xdp $DEV + exit 0 +fi + +attach_tc_mark $DEV $FILE +attach_xdp_mark $DEV $FILE diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c new file mode 100644 index 000000000000..0c12048ac79f --- /dev/null +++ b/samples/bpf/xdp2skb_meta_kern.c @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. + * + * Example howto transfer info from XDP to SKB, e.g. skb->mark + * ----------------------------------------------------------- + * This uses the XDP data_meta infrastructure, and is a cooperation + * between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook. + * + * Notice: This example does not use the BPF C-loader (bpf_load.c), + * but instead rely on the iproute2 TC tool for loading BPF-objects. + */ +#include <uapi/linux/bpf.h> +#include <uapi/linux/pkt_cls.h> + +#include "bpf_helpers.h" + +/* + * This struct is stored in the XDP 'data_meta' area, which is located + * just in-front-of the raw packet payload data. The meaning is + * specific to these two BPF programs that use it as a communication + * channel. XDP adjust/increase the area via a bpf-helper, and TC use + * boundary checks to see if data have been provided. + * + * The struct must be 4 byte aligned, which here is enforced by the + * struct __attribute__((aligned(4))). + */ +struct meta_info { + __u32 mark; +} __attribute__((aligned(4))); + +SEC("xdp_mark") +int _xdp_mark(struct xdp_md *ctx) +{ + struct meta_info *meta; + void *data, *data_end; + int ret; + + /* Reserve space in-front of data pointer for our meta info. + * (Notice drivers not supporting data_meta will fail here!) + */ + ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta)); + if (ret < 0) + return XDP_ABORTED; + + /* Notice: Kernel-side verifier requires that loading of + * ctx->data MUST happen _after_ helper bpf_xdp_adjust_meta(), + * as pkt-data pointers are invalidated. Helpers that require + * this are determined/marked by bpf_helper_changes_pkt_data() + */ + data = (void *)(unsigned long)ctx->data; + + /* Check data_meta have room for meta_info struct */ + meta = (void *)(unsigned long)ctx->data_meta; + if (meta + 1 > data) + return XDP_ABORTED; + + meta->mark = 42; + + return XDP_PASS; +} + +SEC("tc_mark") +int _tc_mark(struct __sk_buff *ctx) +{ + void *data = (void *)(unsigned long)ctx->data; + void *data_end = (void *)(unsigned long)ctx->data_end; + void *data_meta = (void *)(unsigned long)ctx->data_meta; + struct meta_info *meta = data_meta; + + /* Check XDP gave us some data_meta */ + if (meta + 1 > data) { + ctx->mark = 41; + /* Skip "accept" if no data_meta is avail */ + return TC_ACT_OK; + } + + /* Hint: See func tc_cls_act_is_valid_access() for BPF_WRITE access */ + ctx->mark = meta->mark; /* Transfer XDP-mark to SKB-mark */ + + return TC_ACT_OK; +} + +/* Manually attaching these programs: +export DEV=ixgbe2 +export FILE=xdp2skb_meta_kern.o + +# via TC command +tc qdisc del dev $DEV clsact 2> /dev/null +tc qdisc add dev $DEV clsact +tc filter add dev $DEV ingress prio 1 handle 1 bpf da obj $FILE sec tc_mark +tc filter show dev $DEV ingress + +# XDP via IP command: +ip link set dev $DEV xdp off +ip link set dev $DEV xdp obj $FILE sec xdp_mark + +# Use iptable to "see" if SKBs are marked +iptables -I INPUT -p icmp -m mark --mark 41 # == 0x29 +iptables -I INPUT -p icmp -m mark --mark 42 # == 0x2a + +# Hint: catch XDP_ABORTED errors via +perf record -e xdp:* +perf script + +*/ diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c index 2fe2f761a0d0..211db8ded0de 100644 --- a/samples/bpf/xdp_monitor_kern.c +++ b/samples/bpf/xdp_monitor_kern.c @@ -1,6 +1,7 @@ -/* XDP monitor tool, based on tracepoints +/* SPDX-License-Identifier: GPL-2.0 + * Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc. * - * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc. + * XDP monitor tool, based on tracepoints */ #include <uapi/linux/bpf.h> #include "bpf_helpers.h" @@ -104,7 +105,7 @@ struct xdp_exception_ctx { SEC("tracepoint/xdp/xdp_exception") int trace_xdp_exception(struct xdp_exception_ctx *ctx) { - u64 *cnt;; + u64 *cnt; u32 key; key = ctx->act; @@ -118,3 +119,92 @@ int trace_xdp_exception(struct xdp_exception_ctx *ctx) return 0; } + +/* Common stats data record shared with _user.c */ +struct datarec { + u64 processed; + u64 dropped; + u64 info; +}; +#define MAX_CPUS 64 + +struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct datarec), + .max_entries = MAX_CPUS, +}; + +struct bpf_map_def SEC("maps") cpumap_kthread_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct datarec), + .max_entries = 1, +}; + +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct cpumap_enqueue_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int map_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + int cpu; // offset:16; size:4; signed:1; + unsigned int drops; // offset:20; size:4; signed:0; + unsigned int processed; // offset:24; size:4; signed:0; + int to_cpu; // offset:28; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_cpumap_enqueue") +int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) +{ + u32 to_cpu = ctx->to_cpu; + struct datarec *rec; + + if (to_cpu >= MAX_CPUS) + return 1; + + rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); + if (!rec) + return 0; + rec->processed += ctx->processed; + rec->dropped += ctx->drops; + + /* Record bulk events, then userspace can calc average bulk size */ + if (ctx->processed > 0) + rec->info += 1; + + return 0; +} + +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct cpumap_kthread_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int map_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + int cpu; // offset:16; size:4; signed:1; + unsigned int drops; // offset:20; size:4; signed:0; + unsigned int processed; // offset:24; size:4; signed:0; + int sched; // offset:28; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_cpumap_kthread") +int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) +{ + struct datarec *rec; + u32 key = 0; + + rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); + if (!rec) + return 0; + rec->processed += ctx->processed; + rec->dropped += ctx->drops; + + /* Count times kthread yielded CPU via schedule call */ + if (ctx->sched) + rec->info++; + + return 0; +} diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index eaba165b3549..eec14520d513 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -1,4 +1,5 @@ -/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. +/* SPDX-License-Identifier: GPL-2.0 + * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */ static const char *__doc__= "XDP monitor tool, based on tracepoints\n" @@ -40,6 +41,9 @@ static const struct option long_options[] = { {0, 0, NULL, 0 } }; +/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */ +#define EXIT_FAIL_MEM 5 + static void usage(char *argv[]) { int i; @@ -108,23 +112,93 @@ static const char *action2str(int action) return NULL; } +/* Common stats data record shared with _kern.c */ +struct datarec { + __u64 processed; + __u64 dropped; + __u64 info; +}; +#define MAX_CPUS 64 + +/* Userspace structs for collection of stats from maps */ struct record { - __u64 counter; __u64 timestamp; + struct datarec total; + struct datarec *cpu; +}; +struct u64rec { + __u64 processed; +}; +struct record_u64 { + /* record for _kern side __u64 values */ + __u64 timestamp; + struct u64rec total; + struct u64rec *cpu; }; struct stats_record { - struct record xdp_redir[REDIR_RES_MAX]; - struct record xdp_exception[XDP_ACTION_MAX]; + struct record_u64 xdp_redirect[REDIR_RES_MAX]; + struct record_u64 xdp_exception[XDP_ACTION_MAX]; + struct record xdp_cpumap_kthread; + struct record xdp_cpumap_enqueue[MAX_CPUS]; }; -static void stats_print_headers(bool err_only) +static bool map_collect_record(int fd, __u32 key, struct record *rec) { - if (err_only) - printf("\n%s\n", __doc_err_only__); + /* For percpu maps, userspace gets a value per possible CPU */ + unsigned int nr_cpus = bpf_num_possible_cpus(); + struct datarec values[nr_cpus]; + __u64 sum_processed = 0; + __u64 sum_dropped = 0; + __u64 sum_info = 0; + int i; + + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); + return false; + } + /* Get time as close as possible to reading map contents */ + rec->timestamp = gettime(); - printf("%-14s %-11s %-10s %-18s %-9s\n", - "ACTION", "result", "pps ", "pps-human-readable", "measure-period"); + /* Record and sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + rec->cpu[i].processed = values[i].processed; + sum_processed += values[i].processed; + rec->cpu[i].dropped = values[i].dropped; + sum_dropped += values[i].dropped; + rec->cpu[i].info = values[i].info; + sum_info += values[i].info; + } + rec->total.processed = sum_processed; + rec->total.dropped = sum_dropped; + rec->total.info = sum_info; + return true; +} + +static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec) +{ + /* For percpu maps, userspace gets a value per possible CPU */ + unsigned int nr_cpus = bpf_num_possible_cpus(); + struct u64rec values[nr_cpus]; + __u64 sum_total = 0; + int i; + + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); + return false; + } + /* Get time as close as possible to reading map contents */ + rec->timestamp = gettime(); + + /* Record and sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + rec->cpu[i].processed = values[i].processed; + sum_total += values[i].processed; + } + rec->total.processed = sum_total; + return true; } static double calc_period(struct record *r, struct record *p) @@ -139,77 +213,203 @@ static double calc_period(struct record *r, struct record *p) return period_; } -static double calc_pps(struct record *r, struct record *p, double period) +static double calc_period_u64(struct record_u64 *r, struct record_u64 *p) +{ + double period_ = 0; + __u64 period = 0; + + period = r->timestamp - p->timestamp; + if (period > 0) + period_ = ((double) period / NANOSEC_PER_SEC); + + return period_; +} + +static double calc_pps(struct datarec *r, struct datarec *p, double period) +{ + __u64 packets = 0; + double pps = 0; + + if (period > 0) { + packets = r->processed - p->processed; + pps = packets / period; + } + return pps; +} + +static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period) +{ + __u64 packets = 0; + double pps = 0; + + if (period > 0) { + packets = r->processed - p->processed; + pps = packets / period; + } + return pps; +} + +static double calc_drop(struct datarec *r, struct datarec *p, double period) +{ + __u64 packets = 0; + double pps = 0; + + if (period > 0) { + packets = r->dropped - p->dropped; + pps = packets / period; + } + return pps; +} + +static double calc_info(struct datarec *r, struct datarec *p, double period) { __u64 packets = 0; double pps = 0; if (period > 0) { - packets = r->counter - p->counter; + packets = r->info - p->info; pps = packets / period; } return pps; } -static void stats_print(struct stats_record *rec, - struct stats_record *prev, +static void stats_print(struct stats_record *stats_rec, + struct stats_record *stats_prev, bool err_only) { - double period = 0, pps = 0; - struct record *r, *p; - int i = 0; + unsigned int nr_cpus = bpf_num_possible_cpus(); + int rec_i = 0, i, to_cpu; + double t = 0, pps = 0; - char *fmt = "%-14s %-11s %-10.0f %'-18.0f %f\n"; + /* Header */ + printf("%-15s %-7s %-12s %-12s %-9s\n", + "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info"); /* tracepoint: xdp:xdp_redirect_* */ if (err_only) - i = REDIR_ERROR; - - for (; i < REDIR_RES_MAX; i++) { - r = &rec->xdp_redir[i]; - p = &prev->xdp_redir[i]; - - if (p->timestamp) { - period = calc_period(r, p); - pps = calc_pps(r, p, period); + rec_i = REDIR_ERROR; + + for (; rec_i < REDIR_RES_MAX; rec_i++) { + struct record_u64 *rec, *prev; + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n"; + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n"; + + rec = &stats_rec->xdp_redirect[rec_i]; + prev = &stats_prev->xdp_redirect[rec_i]; + t = calc_period_u64(rec, prev); + + for (i = 0; i < nr_cpus; i++) { + struct u64rec *r = &rec->cpu[i]; + struct u64rec *p = &prev->cpu[i]; + + pps = calc_pps_u64(r, p, t); + if (pps > 0) + printf(fmt1, "XDP_REDIRECT", i, + rec_i ? 0.0: pps, rec_i ? pps : 0.0, + err2str(rec_i)); } - printf(fmt, "XDP_REDIRECT", err2str(i), pps, pps, period); + pps = calc_pps_u64(&rec->total, &prev->total, t); + printf(fmt2, "XDP_REDIRECT", "total", + rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i)); } /* tracepoint: xdp:xdp_exception */ - for (i = 0; i < XDP_ACTION_MAX; i++) { - r = &rec->xdp_exception[i]; - p = &prev->xdp_exception[i]; - if (p->timestamp) { - period = calc_period(r, p); - pps = calc_pps(r, p, period); + for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) { + struct record_u64 *rec, *prev; + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n"; + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n"; + + rec = &stats_rec->xdp_exception[rec_i]; + prev = &stats_prev->xdp_exception[rec_i]; + t = calc_period_u64(rec, prev); + + for (i = 0; i < nr_cpus; i++) { + struct u64rec *r = &rec->cpu[i]; + struct u64rec *p = &prev->cpu[i]; + + pps = calc_pps_u64(r, p, t); + if (pps > 0) + printf(fmt1, "Exception", i, + 0.0, pps, err2str(rec_i)); } + pps = calc_pps_u64(&rec->total, &prev->total, t); if (pps > 0) - printf(fmt, action2str(i), "Exception", - pps, pps, period); + printf(fmt2, "Exception", "total", + 0.0, pps, action2str(rec_i)); } - printf("\n"); -} -static __u64 get_key32_value64_percpu(int fd, __u32 key) -{ - /* For percpu maps, userspace gets a value per possible CPU */ - unsigned int nr_cpus = bpf_num_possible_cpus(); - __u64 values[nr_cpus]; - __u64 sum = 0; - int i; - - if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { - fprintf(stderr, - "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); - return 0; + /* cpumap enqueue stats */ + for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) { + char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n"; + char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n"; + struct record *rec, *prev; + char *info_str = ""; + double drop, info; + + rec = &stats_rec->xdp_cpumap_enqueue[to_cpu]; + prev = &stats_prev->xdp_cpumap_enqueue[to_cpu]; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + pps = calc_pps(r, p, t); + drop = calc_drop(r, p, t); + info = calc_info(r, p, t); + if (info > 0) { + info_str = "bulk-average"; + info = pps / info; /* calc average bulk size */ + } + if (pps > 0) + printf(fmt1, "cpumap-enqueue", + i, to_cpu, pps, drop, info, info_str); + } + pps = calc_pps(&rec->total, &prev->total, t); + if (pps > 0) { + drop = calc_drop(&rec->total, &prev->total, t); + info = calc_info(&rec->total, &prev->total, t); + if (info > 0) { + info_str = "bulk-average"; + info = pps / info; /* calc average bulk size */ + } + printf(fmt2, "cpumap-enqueue", + "sum", to_cpu, pps, drop, info, info_str); + } } - /* Sum values from each CPU */ - for (i = 0; i < nr_cpus; i++) { - sum += values[i]; + /* cpumap kthread stats */ + { + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n"; + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n"; + struct record *rec, *prev; + double drop, info; + char *i_str = ""; + + rec = &stats_rec->xdp_cpumap_kthread; + prev = &stats_prev->xdp_cpumap_kthread; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + pps = calc_pps(r, p, t); + drop = calc_drop(r, p, t); + info = calc_info(r, p, t); + if (info > 0) + i_str = "sched"; + if (pps > 0) + printf(fmt1, "cpumap-kthread", + i, pps, drop, info, i_str); + } + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop(&rec->total, &prev->total, t); + info = calc_info(&rec->total, &prev->total, t); + if (info > 0) + i_str = "sched-sum"; + printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str); } - return sum; + + printf("\n"); } static bool stats_collect(struct stats_record *rec) @@ -222,25 +422,109 @@ static bool stats_collect(struct stats_record *rec) */ fd = map_data[0].fd; /* map0: redirect_err_cnt */ - for (i = 0; i < REDIR_RES_MAX; i++) { - rec->xdp_redir[i].timestamp = gettime(); - rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i); - } + for (i = 0; i < REDIR_RES_MAX; i++) + map_collect_record_u64(fd, i, &rec->xdp_redirect[i]); fd = map_data[1].fd; /* map1: exception_cnt */ for (i = 0; i < XDP_ACTION_MAX; i++) { - rec->xdp_exception[i].timestamp = gettime(); - rec->xdp_exception[i].counter = get_key32_value64_percpu(fd, i); + map_collect_record_u64(fd, i, &rec->xdp_exception[i]); } + fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */ + for (i = 0; i < MAX_CPUS; i++) + map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]); + + fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */ + map_collect_record(fd, 0, &rec->xdp_cpumap_kthread); + return true; } +static void *alloc_rec_per_cpu(int record_size) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + void *array; + size_t size; + + size = record_size * nr_cpus; + array = malloc(size); + memset(array, 0, size); + if (!array) { + fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); + exit(EXIT_FAIL_MEM); + } + return array; +} + +static struct stats_record *alloc_stats_record(void) +{ + struct stats_record *rec; + int rec_sz; + int i; + + /* Alloc main stats_record structure */ + rec = malloc(sizeof(*rec)); + memset(rec, 0, sizeof(*rec)); + if (!rec) { + fprintf(stderr, "Mem alloc error\n"); + exit(EXIT_FAIL_MEM); + } + + /* Alloc stats stored per CPU for each record */ + rec_sz = sizeof(struct u64rec); + for (i = 0; i < REDIR_RES_MAX; i++) + rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz); + + for (i = 0; i < XDP_ACTION_MAX; i++) + rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz); + + rec_sz = sizeof(struct datarec); + rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz); + + for (i = 0; i < MAX_CPUS; i++) + rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz); + + return rec; +} + +static void free_stats_record(struct stats_record *r) +{ + int i; + + for (i = 0; i < REDIR_RES_MAX; i++) + free(r->xdp_redirect[i].cpu); + + for (i = 0; i < XDP_ACTION_MAX; i++) + free(r->xdp_exception[i].cpu); + + free(r->xdp_cpumap_kthread.cpu); + + for (i = 0; i < MAX_CPUS; i++) + free(r->xdp_cpumap_enqueue[i].cpu); + + free(r); +} + +/* Pointer swap trick */ +static inline void swap(struct stats_record **a, struct stats_record **b) +{ + struct stats_record *tmp; + + tmp = *a; + *a = *b; + *b = tmp; +} + static void stats_poll(int interval, bool err_only) { - struct stats_record rec, prev; + struct stats_record *rec, *prev; - memset(&rec, 0, sizeof(rec)); + rec = alloc_stats_record(); + prev = alloc_stats_record(); + stats_collect(rec); + + if (err_only) + printf("\n%s\n", __doc_err_only__); /* Trick to pretty printf with thousands separators use %' */ setlocale(LC_NUMERIC, "en_US"); @@ -258,13 +542,15 @@ static void stats_poll(int interval, bool err_only) fflush(stdout); while (1) { - memcpy(&prev, &rec, sizeof(rec)); - stats_collect(&rec); - stats_print_headers(err_only); - stats_print(&rec, &prev, err_only); + swap(&prev, &rec); + stats_collect(rec); + stats_print(rec, prev, err_only); fflush(stdout); sleep(interval); } + + free_stats_record(rec); + free_stats_record(prev); } static void print_bpf_prog_info(void) |