aboutsummaryrefslogtreecommitdiff
path: root/samples/bpf/map_perf_test_user.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 16:40:27 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 16:40:27 -0700
commit8d65b08debc7e62b2c6032d7fe7389d895b92cbc (patch)
tree0c3141b60c3a03cc32742b5750c5e763b9dae489 /samples/bpf/map_perf_test_user.c
parent5a0387a8a8efb90ae7fea1e2e5c62de3efa74691 (diff)
parent5d15af6778b8e4ed1fd41b040283af278e7a9a72 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Millar: "Here are some highlights from the 2065 networking commits that happened this development cycle: 1) XDP support for IXGBE (John Fastabend) and thunderx (Sunil Kowuri) 2) Add a generic XDP driver, so that anyone can test XDP even if they lack a networking device whose driver has explicit XDP support (me). 3) Sparc64 now has an eBPF JIT too (me) 4) Add a BPF program testing framework via BPF_PROG_TEST_RUN (Alexei Starovoitov) 5) Make netfitler network namespace teardown less expensive (Florian Westphal) 6) Add symmetric hashing support to nft_hash (Laura Garcia Liebana) 7) Implement NAPI and GRO in netvsc driver (Stephen Hemminger) 8) Support TC flower offload statistics in mlxsw (Arkadi Sharshevsky) 9) Multiqueue support in stmmac driver (Joao Pinto) 10) Remove TCP timewait recycling, it never really could possibly work well in the real world and timestamp randomization really zaps any hint of usability this feature had (Soheil Hassas Yeganeh) 11) Support level3 vs level4 ECMP route hashing in ipv4 (Nikolay Aleksandrov) 12) Add socket busy poll support to epoll (Sridhar Samudrala) 13) Netlink extended ACK support (Johannes Berg, Pablo Neira Ayuso, and several others) 14) IPSEC hw offload infrastructure (Steffen Klassert)" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2065 commits) tipc: refactor function tipc_sk_recv_stream() tipc: refactor function tipc_sk_recvmsg() net: thunderx: Optimize page recycling for XDP net: thunderx: Support for XDP header adjustment net: thunderx: Add support for XDP_TX net: thunderx: Add support for XDP_DROP net: thunderx: Add basic XDP support net: thunderx: Cleanup receive buffer allocation net: thunderx: Optimize CQE_TX handling net: thunderx: Optimize RBDR descriptor handling net: thunderx: Support for page recycling ipx: call ipxitf_put() in ioctl error path net: sched: add helpers to handle extended actions qed*: Fix issues in the ptp filter config implementation. qede: Fix concurrency issue in PTP Tx path processing. stmmac: Add support for SIMATIC IOT2000 platform net: hns: fix ethtool_get_strings overflow in hns driver tcp: fix wraparound issue in tcp_lp bpf, arm64: fix jit branch offset related to ldimm64 bpf, arm64: implement jiting of BPF_XADD ...
Diffstat (limited to 'samples/bpf/map_perf_test_user.c')
-rw-r--r--samples/bpf/map_perf_test_user.c255
1 files changed, 202 insertions, 53 deletions
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 680260a91f50..6ac778153315 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -18,10 +18,14 @@
#include <string.h>
#include <time.h>
#include <sys/resource.h>
+#include <arpa/inet.h>
+#include <errno.h>
+
#include "libbpf.h"
#include "bpf_load.h"
-#define MAX_CNT 1000000
+#define TEST_BIT(t) (1U << (t))
+#define MAX_NR_CPUS 1024
static __u64 time_get_ns(void)
{
@@ -31,15 +35,44 @@ static __u64 time_get_ns(void)
return ts.tv_sec * 1000000000ull + ts.tv_nsec;
}
-#define HASH_PREALLOC (1 << 0)
-#define PERCPU_HASH_PREALLOC (1 << 1)
-#define HASH_KMALLOC (1 << 2)
-#define PERCPU_HASH_KMALLOC (1 << 3)
-#define LRU_HASH_PREALLOC (1 << 4)
-#define PERCPU_LRU_HASH_PREALLOC (1 << 5)
-#define LPM_KMALLOC (1 << 6)
+enum test_type {
+ HASH_PREALLOC,
+ PERCPU_HASH_PREALLOC,
+ HASH_KMALLOC,
+ PERCPU_HASH_KMALLOC,
+ LRU_HASH_PREALLOC,
+ NOCOMMON_LRU_HASH_PREALLOC,
+ LPM_KMALLOC,
+ HASH_LOOKUP,
+ ARRAY_LOOKUP,
+ INNER_LRU_HASH_PREALLOC,
+ NR_TESTS,
+};
+
+const char *test_map_names[NR_TESTS] = {
+ [HASH_PREALLOC] = "hash_map",
+ [PERCPU_HASH_PREALLOC] = "percpu_hash_map",
+ [HASH_KMALLOC] = "hash_map_alloc",
+ [PERCPU_HASH_KMALLOC] = "percpu_hash_map_alloc",
+ [LRU_HASH_PREALLOC] = "lru_hash_map",
+ [NOCOMMON_LRU_HASH_PREALLOC] = "nocommon_lru_hash_map",
+ [LPM_KMALLOC] = "lpm_trie_map_alloc",
+ [HASH_LOOKUP] = "hash_map",
+ [ARRAY_LOOKUP] = "array_map",
+ [INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map",
+};
static int test_flags = ~0;
+static uint32_t num_map_entries;
+static uint32_t inner_lru_hash_size;
+static int inner_lru_hash_idx = -1;
+static int array_of_lru_hashs_idx = -1;
+static uint32_t max_cnt = 1000000;
+
+static int check_test_flags(enum test_type t)
+{
+ return test_flags & TEST_BIT(t);
+}
static void test_hash_prealloc(int cpu)
{
@@ -47,34 +80,89 @@ static void test_hash_prealloc(int cpu)
int i;
start_time = time_get_ns();
- for (i = 0; i < MAX_CNT; i++)
+ for (i = 0; i < max_cnt; i++)
syscall(__NR_getuid);
printf("%d:hash_map_perf pre-alloc %lld events per sec\n",
- cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+ cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
}
-static void test_lru_hash_prealloc(int cpu)
+static void do_test_lru(enum test_type test, int cpu)
{
+ static int inner_lru_map_fds[MAX_NR_CPUS];
+
+ struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 };
+ const char *test_name;
__u64 start_time;
- int i;
+ int i, ret;
+
+ if (test == INNER_LRU_HASH_PREALLOC) {
+ int outer_fd = map_fd[array_of_lru_hashs_idx];
+
+ assert(cpu < MAX_NR_CPUS);
+
+ if (cpu) {
+ inner_lru_map_fds[cpu] =
+ bpf_create_map(BPF_MAP_TYPE_LRU_HASH,
+ sizeof(uint32_t), sizeof(long),
+ inner_lru_hash_size, 0);
+ if (inner_lru_map_fds[cpu] == -1) {
+ printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n",
+ strerror(errno), errno);
+ exit(1);
+ }
+ } else {
+ inner_lru_map_fds[cpu] = map_fd[inner_lru_hash_idx];
+ }
+
+ ret = bpf_map_update_elem(outer_fd, &cpu,
+ &inner_lru_map_fds[cpu],
+ BPF_ANY);
+ if (ret) {
+ printf("cannot update ARRAY_OF_LRU_HASHS with key:%u. %s(%d)\n",
+ cpu, strerror(errno), errno);
+ exit(1);
+ }
+ }
+
+ in6.sin6_addr.s6_addr16[0] = 0xdead;
+ in6.sin6_addr.s6_addr16[1] = 0xbeef;
+
+ if (test == LRU_HASH_PREALLOC) {
+ test_name = "lru_hash_map_perf";
+ in6.sin6_addr.s6_addr16[7] = 0;
+ } else if (test == NOCOMMON_LRU_HASH_PREALLOC) {
+ test_name = "nocommon_lru_hash_map_perf";
+ in6.sin6_addr.s6_addr16[7] = 1;
+ } else if (test == INNER_LRU_HASH_PREALLOC) {
+ test_name = "inner_lru_hash_map_perf";
+ in6.sin6_addr.s6_addr16[7] = 2;
+ } else {
+ assert(0);
+ }
start_time = time_get_ns();
- for (i = 0; i < MAX_CNT; i++)
- syscall(__NR_getpid);
- printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n",
- cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+ for (i = 0; i < max_cnt; i++) {
+ ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6));
+ assert(ret == -1 && errno == EBADF);
+ }
+ printf("%d:%s pre-alloc %lld events per sec\n",
+ cpu, test_name,
+ max_cnt * 1000000000ll / (time_get_ns() - start_time));
+}
+
+static void test_lru_hash_prealloc(int cpu)
+{
+ do_test_lru(LRU_HASH_PREALLOC, cpu);
}
-static void test_percpu_lru_hash_prealloc(int cpu)
+static void test_nocommon_lru_hash_prealloc(int cpu)
{
- __u64 start_time;
- int i;
+ do_test_lru(NOCOMMON_LRU_HASH_PREALLOC, cpu);
+}
- start_time = time_get_ns();
- for (i = 0; i < MAX_CNT; i++)
- syscall(__NR_getppid);
- printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n",
- cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+static void test_inner_lru_hash_prealloc(int cpu)
+{
+ do_test_lru(INNER_LRU_HASH_PREALLOC, cpu);
}
static void test_percpu_hash_prealloc(int cpu)
@@ -83,10 +171,10 @@ static void test_percpu_hash_prealloc(int cpu)
int i;
start_time = time_get_ns();
- for (i = 0; i < MAX_CNT; i++)
+ for (i = 0; i < max_cnt; i++)
syscall(__NR_geteuid);
printf("%d:percpu_hash_map_perf pre-alloc %lld events per sec\n",
- cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+ cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
}
static void test_hash_kmalloc(int cpu)
@@ -95,10 +183,10 @@ static void test_hash_kmalloc(int cpu)
int i;
start_time = time_get_ns();
- for (i = 0; i < MAX_CNT; i++)
+ for (i = 0; i < max_cnt; i++)
syscall(__NR_getgid);
printf("%d:hash_map_perf kmalloc %lld events per sec\n",
- cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+ cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
}
static void test_percpu_hash_kmalloc(int cpu)
@@ -107,10 +195,10 @@ static void test_percpu_hash_kmalloc(int cpu)
int i;
start_time = time_get_ns();
- for (i = 0; i < MAX_CNT; i++)
+ for (i = 0; i < max_cnt; i++)
syscall(__NR_getegid);
printf("%d:percpu_hash_map_perf kmalloc %lld events per sec\n",
- cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+ cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
}
static void test_lpm_kmalloc(int cpu)
@@ -119,40 +207,63 @@ static void test_lpm_kmalloc(int cpu)
int i;
start_time = time_get_ns();
- for (i = 0; i < MAX_CNT; i++)
+ for (i = 0; i < max_cnt; i++)
syscall(__NR_gettid);
printf("%d:lpm_perf kmalloc %lld events per sec\n",
- cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+ cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
}
-static void loop(int cpu)
+static void test_hash_lookup(int cpu)
{
- cpu_set_t cpuset;
-
- CPU_ZERO(&cpuset);
- CPU_SET(cpu, &cpuset);
- sched_setaffinity(0, sizeof(cpuset), &cpuset);
+ __u64 start_time;
+ int i;
- if (test_flags & HASH_PREALLOC)
- test_hash_prealloc(cpu);
+ start_time = time_get_ns();
+ for (i = 0; i < max_cnt; i++)
+ syscall(__NR_getpgid, 0);
+ printf("%d:hash_lookup %lld lookups per sec\n",
+ cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time));
+}
- if (test_flags & PERCPU_HASH_PREALLOC)
- test_percpu_hash_prealloc(cpu);
+static void test_array_lookup(int cpu)
+{
+ __u64 start_time;
+ int i;
- if (test_flags & HASH_KMALLOC)
- test_hash_kmalloc(cpu);
+ start_time = time_get_ns();
+ for (i = 0; i < max_cnt; i++)
+ syscall(__NR_getpgrp, 0);
+ printf("%d:array_lookup %lld lookups per sec\n",
+ cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time));
+}
- if (test_flags & PERCPU_HASH_KMALLOC)
- test_percpu_hash_kmalloc(cpu);
+typedef void (*test_func)(int cpu);
+const test_func test_funcs[] = {
+ [HASH_PREALLOC] = test_hash_prealloc,
+ [PERCPU_HASH_PREALLOC] = test_percpu_hash_prealloc,
+ [HASH_KMALLOC] = test_hash_kmalloc,
+ [PERCPU_HASH_KMALLOC] = test_percpu_hash_kmalloc,
+ [LRU_HASH_PREALLOC] = test_lru_hash_prealloc,
+ [NOCOMMON_LRU_HASH_PREALLOC] = test_nocommon_lru_hash_prealloc,
+ [LPM_KMALLOC] = test_lpm_kmalloc,
+ [HASH_LOOKUP] = test_hash_lookup,
+ [ARRAY_LOOKUP] = test_array_lookup,
+ [INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc,
+};
- if (test_flags & LRU_HASH_PREALLOC)
- test_lru_hash_prealloc(cpu);
+static void loop(int cpu)
+{
+ cpu_set_t cpuset;
+ int i;
- if (test_flags & PERCPU_LRU_HASH_PREALLOC)
- test_percpu_lru_hash_prealloc(cpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ sched_setaffinity(0, sizeof(cpuset), &cpuset);
- if (test_flags & LPM_KMALLOC)
- test_lpm_kmalloc(cpu);
+ for (i = 0; i < NR_TESTS; i++) {
+ if (check_test_flags(i))
+ test_funcs[i](cpu);
+ }
}
static void run_perf_test(int tasks)
@@ -209,6 +320,38 @@ static void fill_lpm_trie(void)
assert(!r);
}
+static void fixup_map(struct bpf_map_def *map, const char *name, int idx)
+{
+ int i;
+
+ if (!strcmp("inner_lru_hash_map", name)) {
+ inner_lru_hash_idx = idx;
+ inner_lru_hash_size = map->max_entries;
+ }
+
+ if (!strcmp("array_of_lru_hashs", name)) {
+ if (inner_lru_hash_idx == -1) {
+ printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n");
+ exit(1);
+ }
+ map->inner_map_idx = inner_lru_hash_idx;
+ array_of_lru_hashs_idx = idx;
+ }
+
+ if (num_map_entries <= 0)
+ return;
+
+ inner_lru_hash_size = num_map_entries;
+
+ /* Only change the max_entries for the enabled test(s) */
+ for (i = 0; i < NR_TESTS; i++) {
+ if (!strcmp(test_map_names[i], name) &&
+ (check_test_flags(i))) {
+ map->max_entries = num_map_entries;
+ }
+ }
+}
+
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
@@ -224,7 +367,13 @@ int main(int argc, char **argv)
if (argc > 2)
num_cpu = atoi(argv[2]) ? : num_cpu;
- if (load_bpf_file(filename)) {
+ if (argc > 3)
+ num_map_entries = atoi(argv[3]);
+
+ if (argc > 4)
+ max_cnt = atoi(argv[4]);
+
+ if (load_bpf_file_fixup_map(filename, fixup_map)) {
printf("%s", bpf_log_buf);
return 1;
}