aboutsummaryrefslogtreecommitdiff
path: root/samples/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'samples/bpf')
-rw-r--r--samples/bpf/Makefile1
-rwxr-xr-xsamples/bpf/xdp2skb_meta.sh220
-rw-r--r--samples/bpf/xdp2skb_meta_kern.c105
-rw-r--r--samples/bpf/xdp_monitor_kern.c96
-rw-r--r--samples/bpf/xdp_monitor_user.c416
5 files changed, 770 insertions, 68 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 3ff7a05bea9a..7f61a3d57fa7 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -142,6 +142,7 @@ always += xdp_redirect_map_kern.o
always += xdp_redirect_cpu_kern.o
always += xdp_monitor_kern.o
always += xdp_rxq_info_kern.o
+always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
diff --git a/samples/bpf/xdp2skb_meta.sh b/samples/bpf/xdp2skb_meta.sh
new file mode 100755
index 000000000000..b9c9549c4c27
--- /dev/null
+++ b/samples/bpf/xdp2skb_meta.sh
@@ -0,0 +1,220 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
+#
+# Bash-shell example on using iproute2 tools 'tc' and 'ip' to load
+# eBPF programs, both for XDP and clsbpf. Shell script function
+# wrappers and even long options parsing is illustrated, for ease of
+# use.
+#
+# Related to sample/bpf/xdp2skb_meta_kern.c, which contains BPF-progs
+# that need to collaborate between XDP and TC hooks. Thus, it is
+# convenient that the same tool load both programs that need to work
+# together.
+#
+BPF_FILE=xdp2skb_meta_kern.o
+DIR=$(dirname $0)
+
+export TC=/usr/sbin/tc
+export IP=/usr/sbin/ip
+
+function usage() {
+ echo ""
+ echo "Usage: $0 [-vfh] --dev ethX"
+ echo " -d | --dev : Network device (required)"
+ echo " --flush : Cleanup flush TC and XDP progs"
+ echo " --list : (\$LIST) List TC and XDP progs"
+ echo " -v | --verbose : (\$VERBOSE) Verbose"
+ echo " --dry-run : (\$DRYRUN) Dry-run only (echo commands)"
+ echo ""
+}
+
+## -- General shell logging cmds --
+function err() {
+ local exitcode=$1
+ shift
+ echo "ERROR: $@" >&2
+ exit $exitcode
+}
+
+function info() {
+ if [[ -n "$VERBOSE" ]]; then
+ echo "# $@"
+ fi
+}
+
+## -- Helper function calls --
+
+# Wrapper call for TC and IP
+# - Will display the offending command on failure
+function _call_cmd() {
+ local cmd="$1"
+ local allow_fail="$2"
+ shift 2
+ if [[ -n "$VERBOSE" ]]; then
+ echo "$(basename $cmd) $@"
+ fi
+ if [[ -n "$DRYRUN" ]]; then
+ return
+ fi
+ $cmd "$@"
+ local status=$?
+ if (( $status != 0 )); then
+ if [[ "$allow_fail" == "" ]]; then
+ err 2 "Exec error($status) occurred cmd: \"$cmd $@\""
+ fi
+ fi
+}
+function call_tc() {
+ _call_cmd "$TC" "" "$@"
+}
+function call_tc_allow_fail() {
+ _call_cmd "$TC" "allow_fail" "$@"
+}
+function call_ip() {
+ _call_cmd "$IP" "" "$@"
+}
+
+## --- Parse command line arguments / parameters ---
+# Using external program "getopt" to get --long-options
+OPTIONS=$(getopt -o vfhd: \
+ --long verbose,flush,help,list,dev:,dry-run -- "$@")
+if (( $? != 0 )); then
+ err 4 "Error calling getopt"
+fi
+eval set -- "$OPTIONS"
+
+unset DEV
+unset FLUSH
+while true; do
+ case "$1" in
+ -d | --dev ) # device
+ DEV=$2
+ info "Device set to: DEV=$DEV" >&2
+ shift 2
+ ;;
+ -v | --verbose)
+ VERBOSE=yes
+ # info "Verbose mode: VERBOSE=$VERBOSE" >&2
+ shift
+ ;;
+ --dry-run )
+ DRYRUN=yes
+ VERBOSE=yes
+ info "Dry-run mode: enable VERBOSE and don't call TC+IP" >&2
+ shift
+ ;;
+ -f | --flush )
+ FLUSH=yes
+ shift
+ ;;
+ --list )
+ LIST=yes
+ shift
+ ;;
+ -- )
+ shift
+ break
+ ;;
+ -h | --help )
+ usage;
+ exit 0
+ ;;
+ * )
+ shift
+ break
+ ;;
+ esac
+done
+
+FILE="$DIR/$BPF_FILE"
+if [[ ! -e $FILE ]]; then
+ err 3 "Missing BPF object file ($FILE)"
+fi
+
+if [[ -z $DEV ]]; then
+ usage
+ err 2 "Please specify network device -- required option --dev"
+fi
+
+## -- Function calls --
+
+function list_tc()
+{
+ local device="$1"
+ shift
+ info "Listing current TC ingress rules"
+ call_tc filter show dev $device ingress
+}
+
+function list_xdp()
+{
+ local device="$1"
+ shift
+ info "Listing current XDP device($device) setting"
+ call_ip link show dev $device | grep --color=auto xdp
+}
+
+function flush_tc()
+{
+ local device="$1"
+ shift
+ info "Flush TC on device: $device"
+ call_tc_allow_fail filter del dev $device ingress
+ call_tc_allow_fail qdisc del dev $device clsact
+}
+
+function flush_xdp()
+{
+ local device="$1"
+ shift
+ info "Flush XDP on device: $device"
+ call_ip link set dev $device xdp off
+}
+
+function attach_tc_mark()
+{
+ local device="$1"
+ local file="$2"
+ local prog="tc_mark"
+ shift 2
+
+ # Re-attach clsact to clear/flush existing role
+ call_tc_allow_fail qdisc del dev $device clsact 2> /dev/null
+ call_tc qdisc add dev $device clsact
+
+ # Attach BPF prog
+ call_tc filter add dev $device ingress \
+ prio 1 handle 1 bpf da obj $file sec $prog
+}
+
+function attach_xdp_mark()
+{
+ local device="$1"
+ local file="$2"
+ local prog="xdp_mark"
+ shift 2
+
+ # Remove XDP prog in-case it's already loaded
+ # TODO: Need ip-link option to override/replace existing XDP prog
+ flush_xdp $device
+
+ # Attach XDP/BPF prog
+ call_ip link set dev $device xdp obj $file sec $prog
+}
+
+if [[ -n $FLUSH ]]; then
+ flush_tc $DEV
+ flush_xdp $DEV
+ exit 0
+fi
+
+if [[ -n $LIST ]]; then
+ list_tc $DEV
+ list_xdp $DEV
+ exit 0
+fi
+
+attach_tc_mark $DEV $FILE
+attach_xdp_mark $DEV $FILE
diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c
new file mode 100644
index 000000000000..0c12048ac79f
--- /dev/null
+++ b/samples/bpf/xdp2skb_meta_kern.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
+ *
+ * Example howto transfer info from XDP to SKB, e.g. skb->mark
+ * -----------------------------------------------------------
+ * This uses the XDP data_meta infrastructure, and is a cooperation
+ * between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook.
+ *
+ * Notice: This example does not use the BPF C-loader (bpf_load.c),
+ * but instead rely on the iproute2 TC tool for loading BPF-objects.
+ */
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/pkt_cls.h>
+
+#include "bpf_helpers.h"
+
+/*
+ * This struct is stored in the XDP 'data_meta' area, which is located
+ * just in-front-of the raw packet payload data. The meaning is
+ * specific to these two BPF programs that use it as a communication
+ * channel. XDP adjust/increase the area via a bpf-helper, and TC use
+ * boundary checks to see if data have been provided.
+ *
+ * The struct must be 4 byte aligned, which here is enforced by the
+ * struct __attribute__((aligned(4))).
+ */
+struct meta_info {
+ __u32 mark;
+} __attribute__((aligned(4)));
+
+SEC("xdp_mark")
+int _xdp_mark(struct xdp_md *ctx)
+{
+ struct meta_info *meta;
+ void *data, *data_end;
+ int ret;
+
+ /* Reserve space in-front of data pointer for our meta info.
+ * (Notice drivers not supporting data_meta will fail here!)
+ */
+ ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta));
+ if (ret < 0)
+ return XDP_ABORTED;
+
+ /* Notice: Kernel-side verifier requires that loading of
+ * ctx->data MUST happen _after_ helper bpf_xdp_adjust_meta(),
+ * as pkt-data pointers are invalidated. Helpers that require
+ * this are determined/marked by bpf_helper_changes_pkt_data()
+ */
+ data = (void *)(unsigned long)ctx->data;
+
+ /* Check data_meta have room for meta_info struct */
+ meta = (void *)(unsigned long)ctx->data_meta;
+ if (meta + 1 > data)
+ return XDP_ABORTED;
+
+ meta->mark = 42;
+
+ return XDP_PASS;
+}
+
+SEC("tc_mark")
+int _tc_mark(struct __sk_buff *ctx)
+{
+ void *data = (void *)(unsigned long)ctx->data;
+ void *data_end = (void *)(unsigned long)ctx->data_end;
+ void *data_meta = (void *)(unsigned long)ctx->data_meta;
+ struct meta_info *meta = data_meta;
+
+ /* Check XDP gave us some data_meta */
+ if (meta + 1 > data) {
+ ctx->mark = 41;
+ /* Skip "accept" if no data_meta is avail */
+ return TC_ACT_OK;
+ }
+
+ /* Hint: See func tc_cls_act_is_valid_access() for BPF_WRITE access */
+ ctx->mark = meta->mark; /* Transfer XDP-mark to SKB-mark */
+
+ return TC_ACT_OK;
+}
+
+/* Manually attaching these programs:
+export DEV=ixgbe2
+export FILE=xdp2skb_meta_kern.o
+
+# via TC command
+tc qdisc del dev $DEV clsact 2> /dev/null
+tc qdisc add dev $DEV clsact
+tc filter add dev $DEV ingress prio 1 handle 1 bpf da obj $FILE sec tc_mark
+tc filter show dev $DEV ingress
+
+# XDP via IP command:
+ip link set dev $DEV xdp off
+ip link set dev $DEV xdp obj $FILE sec xdp_mark
+
+# Use iptable to "see" if SKBs are marked
+iptables -I INPUT -p icmp -m mark --mark 41 # == 0x29
+iptables -I INPUT -p icmp -m mark --mark 42 # == 0x2a
+
+# Hint: catch XDP_ABORTED errors via
+perf record -e xdp:*
+perf script
+
+*/
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index 2fe2f761a0d0..211db8ded0de 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -1,6 +1,7 @@
-/* XDP monitor tool, based on tracepoints
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
*
- * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * XDP monitor tool, based on tracepoints
*/
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
@@ -104,7 +105,7 @@ struct xdp_exception_ctx {
SEC("tracepoint/xdp/xdp_exception")
int trace_xdp_exception(struct xdp_exception_ctx *ctx)
{
- u64 *cnt;;
+ u64 *cnt;
u32 key;
key = ctx->act;
@@ -118,3 +119,92 @@ int trace_xdp_exception(struct xdp_exception_ctx *ctx)
return 0;
}
+
+/* Common stats data record shared with _user.c */
+struct datarec {
+ u64 processed;
+ u64 dropped;
+ u64 info;
+};
+#define MAX_CPUS 64
+
+struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = MAX_CPUS,
+};
+
+struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = 1,
+};
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct cpumap_enqueue_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int cpu; // offset:16; size:4; signed:1;
+ unsigned int drops; // offset:20; size:4; signed:0;
+ unsigned int processed; // offset:24; size:4; signed:0;
+ int to_cpu; // offset:28; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_enqueue")
+int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
+{
+ u32 to_cpu = ctx->to_cpu;
+ struct datarec *rec;
+
+ if (to_cpu >= MAX_CPUS)
+ return 1;
+
+ rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
+ if (!rec)
+ return 0;
+ rec->processed += ctx->processed;
+ rec->dropped += ctx->drops;
+
+ /* Record bulk events, then userspace can calc average bulk size */
+ if (ctx->processed > 0)
+ rec->info += 1;
+
+ return 0;
+}
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct cpumap_kthread_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int cpu; // offset:16; size:4; signed:1;
+ unsigned int drops; // offset:20; size:4; signed:0;
+ unsigned int processed; // offset:24; size:4; signed:0;
+ int sched; // offset:28; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_kthread")
+int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
+{
+ struct datarec *rec;
+ u32 key = 0;
+
+ rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
+ if (!rec)
+ return 0;
+ rec->processed += ctx->processed;
+ rec->dropped += ctx->drops;
+
+ /* Count times kthread yielded CPU via schedule call */
+ if (ctx->sched)
+ rec->info++;
+
+ return 0;
+}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index eaba165b3549..eec14520d513 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -1,4 +1,5 @@
-/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
static const char *__doc__=
"XDP monitor tool, based on tracepoints\n"
@@ -40,6 +41,9 @@ static const struct option long_options[] = {
{0, 0, NULL, 0 }
};
+/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
+#define EXIT_FAIL_MEM 5
+
static void usage(char *argv[])
{
int i;
@@ -108,23 +112,93 @@ static const char *action2str(int action)
return NULL;
}
+/* Common stats data record shared with _kern.c */
+struct datarec {
+ __u64 processed;
+ __u64 dropped;
+ __u64 info;
+};
+#define MAX_CPUS 64
+
+/* Userspace structs for collection of stats from maps */
struct record {
- __u64 counter;
__u64 timestamp;
+ struct datarec total;
+ struct datarec *cpu;
+};
+struct u64rec {
+ __u64 processed;
+};
+struct record_u64 {
+ /* record for _kern side __u64 values */
+ __u64 timestamp;
+ struct u64rec total;
+ struct u64rec *cpu;
};
struct stats_record {
- struct record xdp_redir[REDIR_RES_MAX];
- struct record xdp_exception[XDP_ACTION_MAX];
+ struct record_u64 xdp_redirect[REDIR_RES_MAX];
+ struct record_u64 xdp_exception[XDP_ACTION_MAX];
+ struct record xdp_cpumap_kthread;
+ struct record xdp_cpumap_enqueue[MAX_CPUS];
};
-static void stats_print_headers(bool err_only)
+static bool map_collect_record(int fd, __u32 key, struct record *rec)
{
- if (err_only)
- printf("\n%s\n", __doc_err_only__);
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct datarec values[nr_cpus];
+ __u64 sum_processed = 0;
+ __u64 sum_dropped = 0;
+ __u64 sum_info = 0;
+ int i;
+
+ if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+ fprintf(stderr,
+ "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+ return false;
+ }
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
- printf("%-14s %-11s %-10s %-18s %-9s\n",
- "ACTION", "result", "pps ", "pps-human-readable", "measure-period");
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = values[i].processed;
+ sum_processed += values[i].processed;
+ rec->cpu[i].dropped = values[i].dropped;
+ sum_dropped += values[i].dropped;
+ rec->cpu[i].info = values[i].info;
+ sum_info += values[i].info;
+ }
+ rec->total.processed = sum_processed;
+ rec->total.dropped = sum_dropped;
+ rec->total.info = sum_info;
+ return true;
+}
+
+static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct u64rec values[nr_cpus];
+ __u64 sum_total = 0;
+ int i;
+
+ if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+ fprintf(stderr,
+ "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+ return false;
+ }
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = values[i].processed;
+ sum_total += values[i].processed;
+ }
+ rec->total.processed = sum_total;
+ return true;
}
static double calc_period(struct record *r, struct record *p)
@@ -139,77 +213,203 @@ static double calc_period(struct record *r, struct record *p)
return period_;
}
-static double calc_pps(struct record *r, struct record *p, double period)
+static double calc_period_u64(struct record_u64 *r, struct record_u64 *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double) period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static double calc_pps(struct datarec *r, struct datarec *p, double period)
+{
+ __u64 packets = 0;
+ double pps = 0;
+
+ if (period > 0) {
+ packets = r->processed - p->processed;
+ pps = packets / period;
+ }
+ return pps;
+}
+
+static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period)
+{
+ __u64 packets = 0;
+ double pps = 0;
+
+ if (period > 0) {
+ packets = r->processed - p->processed;
+ pps = packets / period;
+ }
+ return pps;
+}
+
+static double calc_drop(struct datarec *r, struct datarec *p, double period)
+{
+ __u64 packets = 0;
+ double pps = 0;
+
+ if (period > 0) {
+ packets = r->dropped - p->dropped;
+ pps = packets / period;
+ }
+ return pps;
+}
+
+static double calc_info(struct datarec *r, struct datarec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
- packets = r->counter - p->counter;
+ packets = r->info - p->info;
pps = packets / period;
}
return pps;
}
-static void stats_print(struct stats_record *rec,
- struct stats_record *prev,
+static void stats_print(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
bool err_only)
{
- double period = 0, pps = 0;
- struct record *r, *p;
- int i = 0;
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ int rec_i = 0, i, to_cpu;
+ double t = 0, pps = 0;
- char *fmt = "%-14s %-11s %-10.0f %'-18.0f %f\n";
+ /* Header */
+ printf("%-15s %-7s %-12s %-12s %-9s\n",
+ "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info");
/* tracepoint: xdp:xdp_redirect_* */
if (err_only)
- i = REDIR_ERROR;
-
- for (; i < REDIR_RES_MAX; i++) {
- r = &rec->xdp_redir[i];
- p = &prev->xdp_redir[i];
-
- if (p->timestamp) {
- period = calc_period(r, p);
- pps = calc_pps(r, p, period);
+ rec_i = REDIR_ERROR;
+
+ for (; rec_i < REDIR_RES_MAX; rec_i++) {
+ struct record_u64 *rec, *prev;
+ char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
+ char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
+
+ rec = &stats_rec->xdp_redirect[rec_i];
+ prev = &stats_prev->xdp_redirect[rec_i];
+ t = calc_period_u64(rec, prev);
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct u64rec *r = &rec->cpu[i];
+ struct u64rec *p = &prev->cpu[i];
+
+ pps = calc_pps_u64(r, p, t);
+ if (pps > 0)
+ printf(fmt1, "XDP_REDIRECT", i,
+ rec_i ? 0.0: pps, rec_i ? pps : 0.0,
+ err2str(rec_i));
}
- printf(fmt, "XDP_REDIRECT", err2str(i), pps, pps, period);
+ pps = calc_pps_u64(&rec->total, &prev->total, t);
+ printf(fmt2, "XDP_REDIRECT", "total",
+ rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i));
}
/* tracepoint: xdp:xdp_exception */
- for (i = 0; i < XDP_ACTION_MAX; i++) {
- r = &rec->xdp_exception[i];
- p = &prev->xdp_exception[i];
- if (p->timestamp) {
- period = calc_period(r, p);
- pps = calc_pps(r, p, period);
+ for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
+ struct record_u64 *rec, *prev;
+ char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
+ char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
+
+ rec = &stats_rec->xdp_exception[rec_i];
+ prev = &stats_prev->xdp_exception[rec_i];
+ t = calc_period_u64(rec, prev);
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct u64rec *r = &rec->cpu[i];
+ struct u64rec *p = &prev->cpu[i];
+
+ pps = calc_pps_u64(r, p, t);
+ if (pps > 0)
+ printf(fmt1, "Exception", i,
+ 0.0, pps, err2str(rec_i));
}
+ pps = calc_pps_u64(&rec->total, &prev->total, t);
if (pps > 0)
- printf(fmt, action2str(i), "Exception",
- pps, pps, period);
+ printf(fmt2, "Exception", "total",
+ 0.0, pps, action2str(rec_i));
}
- printf("\n");
-}
-static __u64 get_key32_value64_percpu(int fd, __u32 key)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- __u64 values[nr_cpus];
- __u64 sum = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return 0;
+ /* cpumap enqueue stats */
+ for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
+ char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
+ char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
+ struct record *rec, *prev;
+ char *info_str = "";
+ double drop, info;
+
+ rec = &stats_rec->xdp_cpumap_enqueue[to_cpu];
+ prev = &stats_prev->xdp_cpumap_enqueue[to_cpu];
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop(r, p, t);
+ info = calc_info(r, p, t);
+ if (info > 0) {
+ info_str = "bulk-average";
+ info = pps / info; /* calc average bulk size */
+ }
+ if (pps > 0)
+ printf(fmt1, "cpumap-enqueue",
+ i, to_cpu, pps, drop, info, info_str);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ if (pps > 0) {
+ drop = calc_drop(&rec->total, &prev->total, t);
+ info = calc_info(&rec->total, &prev->total, t);
+ if (info > 0) {
+ info_str = "bulk-average";
+ info = pps / info; /* calc average bulk size */
+ }
+ printf(fmt2, "cpumap-enqueue",
+ "sum", to_cpu, pps, drop, info, info_str);
+ }
}
- /* Sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- sum += values[i];
+ /* cpumap kthread stats */
+ {
+ char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
+ char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
+ struct record *rec, *prev;
+ double drop, info;
+ char *i_str = "";
+
+ rec = &stats_rec->xdp_cpumap_kthread;
+ prev = &stats_prev->xdp_cpumap_kthread;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop(r, p, t);
+ info = calc_info(r, p, t);
+ if (info > 0)
+ i_str = "sched";
+ if (pps > 0)
+ printf(fmt1, "cpumap-kthread",
+ i, pps, drop, info, i_str);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop(&rec->total, &prev->total, t);
+ info = calc_info(&rec->total, &prev->total, t);
+ if (info > 0)
+ i_str = "sched-sum";
+ printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
}
- return sum;
+
+ printf("\n");
}
static bool stats_collect(struct stats_record *rec)
@@ -222,25 +422,109 @@ static bool stats_collect(struct stats_record *rec)
*/
fd = map_data[0].fd; /* map0: redirect_err_cnt */
- for (i = 0; i < REDIR_RES_MAX; i++) {
- rec->xdp_redir[i].timestamp = gettime();
- rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i);
- }
+ for (i = 0; i < REDIR_RES_MAX; i++)
+ map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
fd = map_data[1].fd; /* map1: exception_cnt */
for (i = 0; i < XDP_ACTION_MAX; i++) {
- rec->xdp_exception[i].timestamp = gettime();
- rec->xdp_exception[i].counter = get_key32_value64_percpu(fd, i);
+ map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
}
+ fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
+ for (i = 0; i < MAX_CPUS; i++)
+ map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
+
+ fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
+ map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
+
return true;
}
+static void *alloc_rec_per_cpu(int record_size)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ void *array;
+ size_t size;
+
+ size = record_size * nr_cpus;
+ array = malloc(size);
+ memset(array, 0, size);
+ if (!array) {
+ fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
+ exit(EXIT_FAIL_MEM);
+ }
+ return array;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+ struct stats_record *rec;
+ int rec_sz;
+ int i;
+
+ /* Alloc main stats_record structure */
+ rec = malloc(sizeof(*rec));
+ memset(rec, 0, sizeof(*rec));
+ if (!rec) {
+ fprintf(stderr, "Mem alloc error\n");
+ exit(EXIT_FAIL_MEM);
+ }
+
+ /* Alloc stats stored per CPU for each record */
+ rec_sz = sizeof(struct u64rec);
+ for (i = 0; i < REDIR_RES_MAX; i++)
+ rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz);
+
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz);
+
+ rec_sz = sizeof(struct datarec);
+ rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
+
+ for (i = 0; i < MAX_CPUS; i++)
+ rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
+
+ return rec;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+ int i;
+
+ for (i = 0; i < REDIR_RES_MAX; i++)
+ free(r->xdp_redirect[i].cpu);
+
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(r->xdp_exception[i].cpu);
+
+ free(r->xdp_cpumap_kthread.cpu);
+
+ for (i = 0; i < MAX_CPUS; i++)
+ free(r->xdp_cpumap_enqueue[i].cpu);
+
+ free(r);
+}
+
+/* Pointer swap trick */
+static inline void swap(struct stats_record **a, struct stats_record **b)
+{
+ struct stats_record *tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
static void stats_poll(int interval, bool err_only)
{
- struct stats_record rec, prev;
+ struct stats_record *rec, *prev;
- memset(&rec, 0, sizeof(rec));
+ rec = alloc_stats_record();
+ prev = alloc_stats_record();
+ stats_collect(rec);
+
+ if (err_only)
+ printf("\n%s\n", __doc_err_only__);
/* Trick to pretty printf with thousands separators use %' */
setlocale(LC_NUMERIC, "en_US");
@@ -258,13 +542,15 @@ static void stats_poll(int interval, bool err_only)
fflush(stdout);
while (1) {
- memcpy(&prev, &rec, sizeof(rec));
- stats_collect(&rec);
- stats_print_headers(err_only);
- stats_print(&rec, &prev, err_only);
+ swap(&prev, &rec);
+ stats_collect(rec);
+ stats_print(rec, prev, err_only);
fflush(stdout);
sleep(interval);
}
+
+ free_stats_record(rec);
+ free_stats_record(prev);
}
static void print_bpf_prog_info(void)