diff options
Diffstat (limited to 'samples/bpf')
| -rw-r--r-- | samples/bpf/Makefile | 16 | ||||
| -rw-r--r-- | samples/bpf/bpf_helpers.h | 4 | ||||
| -rw-r--r-- | samples/bpf/bpf_load.c | 8 | ||||
| -rw-r--r-- | samples/bpf/sockex2_user.c | 3 | ||||
| -rw-r--r-- | samples/bpf/sockex3_user.c | 3 | ||||
| -rw-r--r-- | samples/bpf/test_cgrp2_array_pin.c | 109 | ||||
| -rwxr-xr-x | samples/bpf/test_cgrp2_tc.sh | 184 | ||||
| -rw-r--r-- | samples/bpf/test_cgrp2_tc_kern.c | 69 | ||||
| -rw-r--r-- | samples/bpf/test_probe_write_user_kern.c | 52 | ||||
| -rw-r--r-- | samples/bpf/test_probe_write_user_user.c | 78 | ||||
| -rw-r--r-- | samples/bpf/xdp1_kern.c | 93 | ||||
| -rw-r--r-- | samples/bpf/xdp1_user.c | 181 | ||||
| -rw-r--r-- | samples/bpf/xdp2_kern.c | 114 | 
13 files changed, 914 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0bf2478cb7df..90ebf7d35c07 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -14,12 +14,16 @@ hostprogs-y += tracex3  hostprogs-y += tracex4  hostprogs-y += tracex5  hostprogs-y += tracex6 +hostprogs-y += test_probe_write_user  hostprogs-y += trace_output  hostprogs-y += lathist  hostprogs-y += offwaketime  hostprogs-y += spintest  hostprogs-y += map_perf_test  hostprogs-y += test_overhead +hostprogs-y += test_cgrp2_array_pin +hostprogs-y += xdp1 +hostprogs-y += xdp2  test_verifier-objs := test_verifier.o libbpf.o  test_maps-objs := test_maps.o libbpf.o @@ -34,12 +38,17 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o  tracex4-objs := bpf_load.o libbpf.o tracex4_user.o  tracex5-objs := bpf_load.o libbpf.o tracex5_user.o  tracex6-objs := bpf_load.o libbpf.o tracex6_user.o +test_probe_write_user-objs := bpf_load.o libbpf.o test_probe_write_user_user.o  trace_output-objs := bpf_load.o libbpf.o trace_output_user.o  lathist-objs := bpf_load.o libbpf.o lathist_user.o  offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o  spintest-objs := bpf_load.o libbpf.o spintest_user.o  map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o  test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o +test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o +xdp1-objs := bpf_load.o libbpf.o xdp1_user.o +# reuse xdp1 source intentionally +xdp2-objs := bpf_load.o libbpf.o xdp1_user.o  # Tell kbuild to always build the programs  always := $(hostprogs-y) @@ -52,6 +61,7 @@ always += tracex3_kern.o  always += tracex4_kern.o  always += tracex5_kern.o  always += tracex6_kern.o +always += test_probe_write_user_kern.o  always += trace_output_kern.o  always += tcbpf1_kern.o  always += lathist_kern.o @@ -61,6 +71,9 @@ always += map_perf_test_kern.o  always += test_overhead_tp_kern.o  always += test_overhead_kprobe_kern.o  always += parse_varlen.o parse_simple.o parse_ldabs.o +always += test_cgrp2_tc_kern.o +always += xdp1_kern.o +always += xdp2_kern.o  HOSTCFLAGS += -I$(objtree)/usr/include @@ -75,12 +88,15 @@ HOSTLOADLIBES_tracex3 += -lelf  HOSTLOADLIBES_tracex4 += -lelf -lrt  HOSTLOADLIBES_tracex5 += -lelf  HOSTLOADLIBES_tracex6 += -lelf +HOSTLOADLIBES_test_probe_write_user += -lelf  HOSTLOADLIBES_trace_output += -lelf -lrt  HOSTLOADLIBES_lathist += -lelf  HOSTLOADLIBES_offwaketime += -lelf  HOSTLOADLIBES_spintest += -lelf  HOSTLOADLIBES_map_perf_test += -lelf -lrt  HOSTLOADLIBES_test_overhead += -lelf -lrt +HOSTLOADLIBES_xdp1 += -lelf +HOSTLOADLIBES_xdp2 += -lelf  # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:  #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 7904a2a493de..217c8d507f2e 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -41,6 +41,8 @@ static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data,  	(void *) BPF_FUNC_perf_event_output;  static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =  	(void *) BPF_FUNC_get_stackid; +static int (*bpf_probe_write_user)(void *dst, void *src, int size) = +	(void *) BPF_FUNC_probe_write_user;  /* llvm builtin functions that eBPF C program may use to   * emit BPF_LD_ABS and BPF_LD_IND instructions @@ -70,6 +72,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag  	(void *) BPF_FUNC_l3_csum_replace;  static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =  	(void *) BPF_FUNC_l4_csum_replace; +static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) = +	(void *) BPF_FUNC_skb_in_cgroup;  #if defined(__x86_64__) diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 022af71c2bb5..0cfda2320320 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -50,6 +50,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)  	bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;  	bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;  	bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; +	bool is_xdp = strncmp(event, "xdp", 3) == 0;  	enum bpf_prog_type prog_type;  	char buf[256];  	int fd, efd, err, id; @@ -66,6 +67,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)  		prog_type = BPF_PROG_TYPE_KPROBE;  	} else if (is_tracepoint) {  		prog_type = BPF_PROG_TYPE_TRACEPOINT; +	} else if (is_xdp) { +		prog_type = BPF_PROG_TYPE_XDP;  	} else {  		printf("Unknown event '%s'\n", event);  		return -1; @@ -79,6 +82,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)  	prog_fd[prog_cnt++] = fd; +	if (is_xdp) +		return 0; +  	if (is_socket) {  		event += 6;  		if (*event != '/') @@ -319,6 +325,7 @@ int load_bpf_file(char *path)  			if (memcmp(shname_prog, "kprobe/", 7) == 0 ||  			    memcmp(shname_prog, "kretprobe/", 10) == 0 ||  			    memcmp(shname_prog, "tracepoint/", 11) == 0 || +			    memcmp(shname_prog, "xdp", 3) == 0 ||  			    memcmp(shname_prog, "socket", 6) == 0)  				load_and_attach(shname_prog, insns, data_prog->d_size);  		} @@ -336,6 +343,7 @@ int load_bpf_file(char *path)  		if (memcmp(shname, "kprobe/", 7) == 0 ||  		    memcmp(shname, "kretprobe/", 10) == 0 ||  		    memcmp(shname, "tracepoint/", 11) == 0 || +		    memcmp(shname, "xdp", 3) == 0 ||  		    memcmp(shname, "socket", 6) == 0)  			load_and_attach(shname, data->d_buf, data->d_size);  	} diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index 29a276d766fc..8a4085c2d117 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -5,6 +5,7 @@  #include "bpf_load.h"  #include <unistd.h>  #include <arpa/inet.h> +#include <sys/resource.h>  struct pair {  	__u64 packets; @@ -13,11 +14,13 @@ struct pair {  int main(int ac, char **argv)  { +	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};  	char filename[256];  	FILE *f;  	int i, sock;  	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); +	setrlimit(RLIMIT_MEMLOCK, &r);  	if (load_bpf_file(filename)) {  		printf("%s", bpf_log_buf); diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 2617772d060d..d4184ab5f3ac 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -5,6 +5,7 @@  #include "bpf_load.h"  #include <unistd.h>  #include <arpa/inet.h> +#include <sys/resource.h>  struct flow_keys {  	__be32 src; @@ -23,11 +24,13 @@ struct pair {  int main(int argc, char **argv)  { +	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};  	char filename[256];  	FILE *f;  	int i, sock;  	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); +	setrlimit(RLIMIT_MEMLOCK, &r);  	if (load_bpf_file(filename)) {  		printf("%s", bpf_log_buf); diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c new file mode 100644 index 000000000000..70e86f7be69d --- /dev/null +++ b/samples/bpf/test_cgrp2_array_pin.c @@ -0,0 +1,109 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/unistd.h> +#include <linux/bpf.h> + +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> + +#include "libbpf.h" + +static void usage(void) +{ +	printf("Usage: test_cgrp2_array_pin [...]\n"); +	printf("       -F <file>   File to pin an BPF cgroup array\n"); +	printf("       -U <file>   Update an already pinned BPF cgroup array\n"); +	printf("       -v <value>  Full path of the cgroup2\n"); +	printf("       -h          Display this help\n"); +} + +int main(int argc, char **argv) +{ +	const char *pinned_file = NULL, *cg2 = NULL; +	int create_array = 1; +	int array_key = 0; +	int array_fd = -1; +	int cg2_fd = -1; +	int ret = -1; +	int opt; + +	while ((opt = getopt(argc, argv, "F:U:v:")) != -1) { +		switch (opt) { +		/* General args */ +		case 'F': +			pinned_file = optarg; +			break; +		case 'U': +			pinned_file = optarg; +			create_array = 0; +			break; +		case 'v': +			cg2 = optarg; +			break; +		default: +			usage(); +			goto out; +		} +	} + +	if (!cg2 || !pinned_file) { +		usage(); +		goto out; +	} + +	cg2_fd = open(cg2, O_RDONLY); +	if (cg2_fd < 0) { +		fprintf(stderr, "open(%s,...): %s(%d)\n", +			cg2, strerror(errno), errno); +		goto out; +	} + +	if (create_array) { +		array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY, +					  sizeof(uint32_t), sizeof(uint32_t), +					  1, 0); +		if (array_fd < 0) { +			fprintf(stderr, +				"bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n", +				strerror(errno), errno); +			goto out; +		} +	} else { +		array_fd = bpf_obj_get(pinned_file); +		if (array_fd < 0) { +			fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n", +				pinned_file, strerror(errno), errno); +			goto out; +		} +	} + +	ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0); +	if (ret) { +		perror("bpf_update_elem"); +		goto out; +	} + +	if (create_array) { +		ret = bpf_obj_pin(array_fd, pinned_file); +		if (ret) { +			fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n", +				pinned_file, strerror(errno), errno); +			goto out; +		} +	} + +out: +	if (array_fd != -1) +		close(array_fd); +	if (cg2_fd != -1) +		close(cg2_fd); +	return ret; +} diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh new file mode 100755 index 000000000000..0b119eeaf85c --- /dev/null +++ b/samples/bpf/test_cgrp2_tc.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +MY_DIR=$(dirname $0) +# Details on the bpf prog +BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin' +BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o" +BPF_SECTION='filter' + +[ -z "$TC" ] && TC='tc' +[ -z "$IP" ] && IP='ip' + +# Names of the veth interface, net namespace...etc. +HOST_IFC='ve' +NS_IFC='vens' +NS='ns' + +find_mnt() { +    cat /proc/mounts | \ +	awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }' +} + +# Init cgroup2 vars +init_cgrp2_vars() { +    CGRP2_ROOT=$(find_mnt cgroup2) +    if [ -z "$CGRP2_ROOT" ] +    then +	CGRP2_ROOT='/mnt/cgroup2' +	MOUNT_CGRP2="yes" +    fi +    CGRP2_TC="$CGRP2_ROOT/tc" +    CGRP2_TC_LEAF="$CGRP2_TC/leaf" +} + +# Init bpf fs vars +init_bpf_fs_vars() { +    local bpf_fs_root=$(find_mnt bpf) +    [ -n "$bpf_fs_root" ] || return -1 +    BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals" +} + +setup_cgrp2() { +    case $1 in +	start) +	    if [ "$MOUNT_CGRP2" == 'yes' ] +	    then +		[ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT +		mount -t cgroup2 none $CGRP2_ROOT || return $? +	    fi +	    mkdir -p $CGRP2_TC_LEAF +	    ;; +	*) +	    rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC +	    [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT +	    ;; +    esac +} + +setup_bpf_cgrp2_array() { +    local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME" +    case $1 in +	start) +	    $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC +	    ;; +	*) +	    [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array +	    ;; +    esac +} + +setup_net() { +    case $1 in +	start) +	    $IP link add $HOST_IFC type veth peer name $NS_IFC || return $? +	    $IP link set dev $HOST_IFC up || return $? +	    sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0 + +	    $IP netns add ns || return $? +	    $IP link set dev $NS_IFC netns ns || return $? +	    $IP -n $NS link set dev $NS_IFC up || return $? +	    $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0 +	    $TC qdisc add dev $HOST_IFC clsact || return $? +	    $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $? +	    ;; +	*) +	    $IP netns del $NS +	    $IP link del $HOST_IFC +	    ;; +    esac +} + +run_in_cgrp() { +    # Fork another bash and move it under the specified cgroup. +    # It makes the cgroup cleanup easier at the end of the test. +    cmd='echo $$ > ' +    cmd="$cmd $1/cgroup.procs; exec $2" +    bash -c "$cmd" +} + +do_test() { +    run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null" +    local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \ +			   awk '/drop/{print substr($7, 0, index($7, ",")-1)}') +    if [[ $dropped -eq 0 ]] +    then +	echo "FAIL" +	return 1 +    else +	echo "Successfully filtered $dropped packets" +	return 0 +    fi +} + +do_exit() { +    if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ] +    then +	echo "------ DEBUG ------" +	echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo +	echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo +	if [ -d "$BPF_FS_TC_SHARE" ] +	then +	    echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo +	fi +	echo "Host net:" +	$IP netns +	$IP link show dev $HOST_IFC +	$IP -6 a show dev $HOST_IFC +	$TC -s qdisc show dev $HOST_IFC +	echo +	echo "$NS net:" +	$IP -n $NS link show dev $NS_IFC +	$IP -n $NS -6 link show dev $NS_IFC +	echo "------ DEBUG ------" +	echo +    fi + +    if [ "$MODE" != 'nocleanup' ] +    then +	setup_net stop +	setup_bpf_cgrp2_array stop +	setup_cgrp2 stop +    fi +} + +init_cgrp2_vars +init_bpf_fs_vars + +while [[ $# -ge 1 ]] +do +    a="$1" +    case $a in +	debug) +	    DEBUG='yes' +	    shift 1 +	    ;; +	cleanup-only) +	    MODE='cleanuponly' +	    shift 1 +	    ;; +	no-cleanup) +	    MODE='nocleanup' +	    shift 1 +	    ;; +	*) +	    echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]" +	    echo "  debug: Print cgrp and network setup details at the end of the test" +	    echo "  cleanup-only: Try to cleanup things from last test.  No test will be run" +	    echo "  no-cleanup: Run the test but don't do cleanup at the end" +	    echo "[Note: If no arg is given, it will run the test and do cleanup at the end]" +	    echo +	    exit -1 +	    ;; +    esac +done + +trap do_exit 0 + +[ "$MODE" == 'cleanuponly' ] && exit + +setup_cgrp2 start || exit $? +setup_net start || exit $? +init_bpf_fs_vars || exit $? +setup_bpf_cgrp2_array start || exit $? +do_test +echo diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c new file mode 100644 index 000000000000..2732c37c8d5b --- /dev/null +++ b/samples/bpf/test_cgrp2_tc_kern.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <uapi/linux/if_ether.h> +#include <uapi/linux/in6.h> +#include <uapi/linux/ipv6.h> +#include <uapi/linux/pkt_cls.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +/* copy of 'struct ethhdr' without __packed */ +struct eth_hdr { +	unsigned char   h_dest[ETH_ALEN]; +	unsigned char   h_source[ETH_ALEN]; +	unsigned short  h_proto; +}; + +#define PIN_GLOBAL_NS		2 +struct bpf_elf_map { +	__u32 type; +	__u32 size_key; +	__u32 size_value; +	__u32 max_elem; +	__u32 flags; +	__u32 id; +	__u32 pinning; +}; + +struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = { +	.type		= BPF_MAP_TYPE_CGROUP_ARRAY, +	.size_key	= sizeof(uint32_t), +	.size_value	= sizeof(uint32_t), +	.pinning	= PIN_GLOBAL_NS, +	.max_elem	= 1, +}; + +SEC("filter") +int handle_egress(struct __sk_buff *skb) +{ +	void *data = (void *)(long)skb->data; +	struct eth_hdr *eth = data; +	struct ipv6hdr *ip6h = data + sizeof(*eth); +	void *data_end = (void *)(long)skb->data_end; +	char dont_care_msg[] = "dont care %04x %d\n"; +	char pass_msg[] = "pass\n"; +	char reject_msg[] = "reject\n"; + +	/* single length check */ +	if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) +		return TC_ACT_OK; + +	if (eth->h_proto != htons(ETH_P_IPV6) || +	    ip6h->nexthdr != IPPROTO_ICMPV6) { +		bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg), +				 eth->h_proto, ip6h->nexthdr); +		return TC_ACT_OK; +	} else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) { +		bpf_trace_printk(pass_msg, sizeof(pass_msg)); +		return TC_ACT_OK; +	} else { +		bpf_trace_printk(reject_msg, sizeof(reject_msg)); +		return TC_ACT_SHOT; +	} +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c new file mode 100644 index 000000000000..3a677c807044 --- /dev/null +++ b/samples/bpf/test_probe_write_user_kern.c @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 Sargun Dhillon <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <uapi/linux/bpf.h> +#include <linux/version.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") dnat_map = { +	.type = BPF_MAP_TYPE_HASH, +	.key_size = sizeof(struct sockaddr_in), +	.value_size = sizeof(struct sockaddr_in), +	.max_entries = 256, +}; + +/* kprobe is NOT a stable ABI + * kernel functions can be removed, renamed or completely change semantics. + * Number of arguments and their positions can change, etc. + * In such case this bpf+kprobe example will no longer be meaningful + * + * This example sits on a syscall, and the syscall ABI is relatively stable + * of course, across platforms, and over time, the ABI may change. + */ +SEC("kprobe/sys_connect") +int bpf_prog1(struct pt_regs *ctx) +{ +	struct sockaddr_in new_addr, orig_addr = {}; +	struct sockaddr_in *mapped_addr; +	void *sockaddr_arg = (void *)PT_REGS_PARM2(ctx); +	int sockaddr_len = (int)PT_REGS_PARM3(ctx); + +	if (sockaddr_len > sizeof(orig_addr)) +		return 0; + +	if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0) +		return 0; + +	mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr); +	if (mapped_addr != NULL) { +		memcpy(&new_addr, mapped_addr, sizeof(new_addr)); +		bpf_probe_write_user(sockaddr_arg, &new_addr, +				     sizeof(new_addr)); +	} +	return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c new file mode 100644 index 000000000000..a44bf347bedd --- /dev/null +++ b/samples/bpf/test_probe_write_user_user.c @@ -0,0 +1,78 @@ +#include <stdio.h> +#include <assert.h> +#include <linux/bpf.h> +#include <unistd.h> +#include "libbpf.h" +#include "bpf_load.h" +#include <sys/socket.h> +#include <string.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +int main(int ac, char **argv) +{ +	int serverfd, serverconnfd, clientfd; +	socklen_t sockaddr_len; +	struct sockaddr serv_addr, mapped_addr, tmp_addr; +	struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in; +	char filename[256]; +	char *ip; + +	serv_addr_in = (struct sockaddr_in *)&serv_addr; +	mapped_addr_in = (struct sockaddr_in *)&mapped_addr; +	tmp_addr_in = (struct sockaddr_in *)&tmp_addr; + +	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + +	if (load_bpf_file(filename)) { +		printf("%s", bpf_log_buf); +		return 1; +	} + +	assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0); +	assert((clientfd = socket(AF_INET, SOCK_STREAM, 0)) > 0); + +	/* Bind server to ephemeral port on lo */ +	memset(&serv_addr, 0, sizeof(serv_addr)); +	serv_addr_in->sin_family = AF_INET; +	serv_addr_in->sin_port = 0; +	serv_addr_in->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + +	assert(bind(serverfd, &serv_addr, sizeof(serv_addr)) == 0); + +	sockaddr_len = sizeof(serv_addr); +	assert(getsockname(serverfd, &serv_addr, &sockaddr_len) == 0); +	ip = inet_ntoa(serv_addr_in->sin_addr); +	printf("Server bound to: %s:%d\n", ip, ntohs(serv_addr_in->sin_port)); + +	memset(&mapped_addr, 0, sizeof(mapped_addr)); +	mapped_addr_in->sin_family = AF_INET; +	mapped_addr_in->sin_port = htons(5555); +	mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255"); + +	assert(!bpf_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY)); + +	assert(listen(serverfd, 5) == 0); + +	ip = inet_ntoa(mapped_addr_in->sin_addr); +	printf("Client connecting to: %s:%d\n", +	       ip, ntohs(mapped_addr_in->sin_port)); +	assert(connect(clientfd, &mapped_addr, sizeof(mapped_addr)) == 0); + +	sockaddr_len = sizeof(tmp_addr); +	ip = inet_ntoa(tmp_addr_in->sin_addr); +	assert((serverconnfd = accept(serverfd, &tmp_addr, &sockaddr_len)) > 0); +	printf("Server received connection from: %s:%d\n", +	       ip, ntohs(tmp_addr_in->sin_port)); + +	sockaddr_len = sizeof(tmp_addr); +	assert(getpeername(clientfd, &tmp_addr, &sockaddr_len) == 0); +	ip = inet_ntoa(tmp_addr_in->sin_addr); +	printf("Client's peer address: %s:%d\n", +	       ip, ntohs(tmp_addr_in->sin_port)); + +	/* Is the server's getsockname = the socket getpeername */ +	assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0); + +	return 0; +} diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c new file mode 100644 index 000000000000..219742106bfd --- /dev/null +++ b/samples/bpf/xdp1_kern.c @@ -0,0 +1,93 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") rxcnt = { +	.type = BPF_MAP_TYPE_PERCPU_ARRAY, +	.key_size = sizeof(u32), +	.value_size = sizeof(long), +	.max_entries = 256, +}; + +static int parse_ipv4(void *data, u64 nh_off, void *data_end) +{ +	struct iphdr *iph = data + nh_off; + +	if (iph + 1 > data_end) +		return 0; +	return iph->protocol; +} + +static int parse_ipv6(void *data, u64 nh_off, void *data_end) +{ +	struct ipv6hdr *ip6h = data + nh_off; + +	if (ip6h + 1 > data_end) +		return 0; +	return ip6h->nexthdr; +} + +SEC("xdp1") +int xdp_prog1(struct xdp_md *ctx) +{ +	void *data_end = (void *)(long)ctx->data_end; +	void *data = (void *)(long)ctx->data; +	struct ethhdr *eth = data; +	int rc = XDP_DROP; +	long *value; +	u16 h_proto; +	u64 nh_off; +	u32 ipproto; + +	nh_off = sizeof(*eth); +	if (data + nh_off > data_end) +		return rc; + +	h_proto = eth->h_proto; + +	if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { +		struct vlan_hdr *vhdr; + +		vhdr = data + nh_off; +		nh_off += sizeof(struct vlan_hdr); +		if (data + nh_off > data_end) +			return rc; +		h_proto = vhdr->h_vlan_encapsulated_proto; +	} +	if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { +		struct vlan_hdr *vhdr; + +		vhdr = data + nh_off; +		nh_off += sizeof(struct vlan_hdr); +		if (data + nh_off > data_end) +			return rc; +		h_proto = vhdr->h_vlan_encapsulated_proto; +	} + +	if (h_proto == htons(ETH_P_IP)) +		ipproto = parse_ipv4(data, nh_off, data_end); +	else if (h_proto == htons(ETH_P_IPV6)) +		ipproto = parse_ipv6(data, nh_off, data_end); +	else +		ipproto = 0; + +	value = bpf_map_lookup_elem(&rxcnt, &ipproto); +	if (value) +		*value += 1; + +	return rc; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c new file mode 100644 index 000000000000..a5e109e398a1 --- /dev/null +++ b/samples/bpf/xdp1_user.c @@ -0,0 +1,181 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <unistd.h> +#include "bpf_load.h" +#include "libbpf.h" + +static int set_link_xdp_fd(int ifindex, int fd) +{ +	struct sockaddr_nl sa; +	int sock, seq = 0, len, ret = -1; +	char buf[4096]; +	struct nlattr *nla, *nla_xdp; +	struct { +		struct nlmsghdr  nh; +		struct ifinfomsg ifinfo; +		char             attrbuf[64]; +	} req; +	struct nlmsghdr *nh; +	struct nlmsgerr *err; + +	memset(&sa, 0, sizeof(sa)); +	sa.nl_family = AF_NETLINK; + +	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); +	if (sock < 0) { +		printf("open netlink socket: %s\n", strerror(errno)); +		return -1; +	} + +	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { +		printf("bind to netlink: %s\n", strerror(errno)); +		goto cleanup; +	} + +	memset(&req, 0, sizeof(req)); +	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); +	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; +	req.nh.nlmsg_type = RTM_SETLINK; +	req.nh.nlmsg_pid = 0; +	req.nh.nlmsg_seq = ++seq; +	req.ifinfo.ifi_family = AF_UNSPEC; +	req.ifinfo.ifi_index = ifindex; +	nla = (struct nlattr *)(((char *)&req) +				+ NLMSG_ALIGN(req.nh.nlmsg_len)); +	nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/; + +	nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN); +	nla_xdp->nla_type = 1/*IFLA_XDP_FD*/; +	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); +	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); +	nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len; + +	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + +	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { +		printf("send to netlink: %s\n", strerror(errno)); +		goto cleanup; +	} + +	len = recv(sock, buf, sizeof(buf), 0); +	if (len < 0) { +		printf("recv from netlink: %s\n", strerror(errno)); +		goto cleanup; +	} + +	for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); +	     nh = NLMSG_NEXT(nh, len)) { +		if (nh->nlmsg_pid != getpid()) { +			printf("Wrong pid %d, expected %d\n", +			       nh->nlmsg_pid, getpid()); +			goto cleanup; +		} +		if (nh->nlmsg_seq != seq) { +			printf("Wrong seq %d, expected %d\n", +			       nh->nlmsg_seq, seq); +			goto cleanup; +		} +		switch (nh->nlmsg_type) { +		case NLMSG_ERROR: +			err = (struct nlmsgerr *)NLMSG_DATA(nh); +			if (!err->error) +				continue; +			printf("nlmsg error %s\n", strerror(-err->error)); +			goto cleanup; +		case NLMSG_DONE: +			break; +		} +	} + +	ret = 0; + +cleanup: +	close(sock); +	return ret; +} + +static int ifindex; + +static void int_exit(int sig) +{ +	set_link_xdp_fd(ifindex, -1); +	exit(0); +} + +/* simple per-protocol drop counter + */ +static void poll_stats(int interval) +{ +	unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); +	const unsigned int nr_keys = 256; +	__u64 values[nr_cpus], prev[nr_keys][nr_cpus]; +	__u32 key; +	int i; + +	memset(prev, 0, sizeof(prev)); + +	while (1) { +		sleep(interval); + +		for (key = 0; key < nr_keys; key++) { +			__u64 sum = 0; + +			assert(bpf_lookup_elem(map_fd[0], &key, values) == 0); +			for (i = 0; i < nr_cpus; i++) +				sum += (values[i] - prev[key][i]); +			if (sum) +				printf("proto %u: %10llu pkt/s\n", +				       key, sum / interval); +			memcpy(prev[key], values, sizeof(values)); +		} +	} +} + +int main(int ac, char **argv) +{ +	char filename[256]; + +	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + +	if (ac != 2) { +		printf("usage: %s IFINDEX\n", argv[0]); +		return 1; +	} + +	ifindex = strtoul(argv[1], NULL, 0); + +	if (load_bpf_file(filename)) { +		printf("%s", bpf_log_buf); +		return 1; +	} + +	if (!prog_fd[0]) { +		printf("load_bpf_file: %s\n", strerror(errno)); +		return 1; +	} + +	signal(SIGINT, int_exit); + +	if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) { +		printf("link set xdp fd failed\n"); +		return 1; +	} + +	poll_stats(2); + +	return 0; +} diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c new file mode 100644 index 000000000000..e01288867d15 --- /dev/null +++ b/samples/bpf/xdp2_kern.c @@ -0,0 +1,114 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") rxcnt = { +	.type = BPF_MAP_TYPE_PERCPU_ARRAY, +	.key_size = sizeof(u32), +	.value_size = sizeof(long), +	.max_entries = 256, +}; + +static void swap_src_dst_mac(void *data) +{ +	unsigned short *p = data; +	unsigned short dst[3]; + +	dst[0] = p[0]; +	dst[1] = p[1]; +	dst[2] = p[2]; +	p[0] = p[3]; +	p[1] = p[4]; +	p[2] = p[5]; +	p[3] = dst[0]; +	p[4] = dst[1]; +	p[5] = dst[2]; +} + +static int parse_ipv4(void *data, u64 nh_off, void *data_end) +{ +	struct iphdr *iph = data + nh_off; + +	if (iph + 1 > data_end) +		return 0; +	return iph->protocol; +} + +static int parse_ipv6(void *data, u64 nh_off, void *data_end) +{ +	struct ipv6hdr *ip6h = data + nh_off; + +	if (ip6h + 1 > data_end) +		return 0; +	return ip6h->nexthdr; +} + +SEC("xdp1") +int xdp_prog1(struct xdp_md *ctx) +{ +	void *data_end = (void *)(long)ctx->data_end; +	void *data = (void *)(long)ctx->data; +	struct ethhdr *eth = data; +	int rc = XDP_DROP; +	long *value; +	u16 h_proto; +	u64 nh_off; +	u32 ipproto; + +	nh_off = sizeof(*eth); +	if (data + nh_off > data_end) +		return rc; + +	h_proto = eth->h_proto; + +	if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { +		struct vlan_hdr *vhdr; + +		vhdr = data + nh_off; +		nh_off += sizeof(struct vlan_hdr); +		if (data + nh_off > data_end) +			return rc; +		h_proto = vhdr->h_vlan_encapsulated_proto; +	} +	if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { +		struct vlan_hdr *vhdr; + +		vhdr = data + nh_off; +		nh_off += sizeof(struct vlan_hdr); +		if (data + nh_off > data_end) +			return rc; +		h_proto = vhdr->h_vlan_encapsulated_proto; +	} + +	if (h_proto == htons(ETH_P_IP)) +		ipproto = parse_ipv4(data, nh_off, data_end); +	else if (h_proto == htons(ETH_P_IPV6)) +		ipproto = parse_ipv6(data, nh_off, data_end); +	else +		ipproto = 0; + +	value = bpf_map_lookup_elem(&rxcnt, &ipproto); +	if (value) +		*value += 1; + +	if (ipproto == IPPROTO_UDP) { +		swap_src_dst_mac(data); +		rc = XDP_TX; +	} + +	return rc; +} + +char _license[] SEC("license") = "GPL";  |