diff options
author | Joanne Koong <joannekoong@fb.com> | 2021-11-29 19:06:21 -0800 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2021-11-30 10:56:28 -0800 |
commit | f6e659b7f97c76d0471d12bf274ea2a097cf3c5c (patch) | |
tree | b8130dd0f6a15b2b28121d425169ef8f707382c2 /tools/testing/selftests/bpf/progs/pyperf.h | |
parent | 4e5070b64b375a9c1f570893cfceeba108382bef (diff) |
selftests/bpf: Measure bpf_loop verifier performance
This patch tests bpf_loop in pyperf and strobemeta, and measures the
verifier performance of replacing the traditional for loop
with bpf_loop.
The results are as follows:
~strobemeta~
Baseline
verification time 6808200 usec
stack depth 496
processed 554252 insns (limit 1000000) max_states_per_insn 16
total_states 15878 peak_states 13489 mark_read 3110
#192 verif_scale_strobemeta:OK (unrolled loop)
Using bpf_loop
verification time 31589 usec
stack depth 96+400
processed 1513 insns (limit 1000000) max_states_per_insn 2
total_states 106 peak_states 106 mark_read 60
#193 verif_scale_strobemeta_bpf_loop:OK
~pyperf600~
Baseline
verification time 29702486 usec
stack depth 368
processed 626838 insns (limit 1000000) max_states_per_insn 7
total_states 30368 peak_states 30279 mark_read 748
#182 verif_scale_pyperf600:OK (unrolled loop)
Using bpf_loop
verification time 148488 usec
stack depth 320+40
processed 10518 insns (limit 1000000) max_states_per_insn 10
total_states 705 peak_states 517 mark_read 38
#183 verif_scale_pyperf600_bpf_loop:OK
Using the bpf_loop helper led to approximately a 99% decrease
in the verification time and in the number of instructions.
Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211130030622.4131246-4-joannekoong@fb.com
Diffstat (limited to 'tools/testing/selftests/bpf/progs/pyperf.h')
-rw-r--r-- | tools/testing/selftests/bpf/progs/pyperf.h | 71 |
1 files changed, 70 insertions, 1 deletions
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 2fb7adafb6b6..1ed28882daf3 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -159,6 +159,59 @@ struct { __uint(value_size, sizeof(long long) * 127); } stackmap SEC(".maps"); +#ifdef USE_BPF_LOOP +struct process_frame_ctx { + int cur_cpu; + int32_t *symbol_counter; + void *frame_ptr; + FrameData *frame; + PidData *pidData; + Symbol *sym; + Event *event; + bool done; +}; + +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) + +static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) +{ + int zero = 0; + void *frame_ptr = ctx->frame_ptr; + PidData *pidData = ctx->pidData; + FrameData *frame = ctx->frame; + int32_t *symbol_counter = ctx->symbol_counter; + int cur_cpu = ctx->cur_cpu; + Event *event = ctx->event; + Symbol *sym = ctx->sym; + + if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) { + int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; + int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym); + + if (!symbol_id) { + bpf_map_update_elem(&symbolmap, sym, &zero, 0); + symbol_id = bpf_map_lookup_elem(&symbolmap, sym); + if (!symbol_id) { + ctx->done = true; + return 1; + } + } + if (*symbol_id == new_symbol_id) + (*symbol_counter)++; + + barrier_var(i); + if (i >= STACK_MAX_LEN) + return 1; + + event->stack[i] = *symbol_id; + + event->stack_len = i + 1; + frame_ptr = frame->f_back; + } + return 0; +} +#endif /* USE_BPF_LOOP */ + #ifdef GLOBAL_FUNC __noinline #elif defined(SUBPROGS) @@ -228,11 +281,26 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); if (symbol_counter == NULL) return 0; +#ifdef USE_BPF_LOOP + struct process_frame_ctx ctx = { + .cur_cpu = cur_cpu, + .symbol_counter = symbol_counter, + .frame_ptr = frame_ptr, + .frame = &frame, + .pidData = pidData, + .sym = &sym, + .event = event, + }; + + bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0); + if (ctx.done) + return 0; +#else #ifdef NO_UNROLL #pragma clang loop unroll(disable) #else #pragma clang loop unroll(full) -#endif +#endif /* NO_UNROLL */ /* Unwind python stack */ for (int i = 0; i < STACK_MAX_LEN; ++i) { if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { @@ -251,6 +319,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) frame_ptr = frame.f_back; } } +#endif /* USE_BPF_LOOP */ event->stack_complete = frame_ptr == NULL; } else { event->stack_complete = 1; |