diff options
author | Alexei Starovoitov <[email protected]> | 2023-12-06 17:17:21 -0800 |
---|---|---|
committer | Alexei Starovoitov <[email protected]> | 2023-12-06 17:17:21 -0800 |
commit | 2146f7fe6e028a3905f0658a1a0d8ef7c115d6c1 (patch) | |
tree | 9ed227cd85ea4549462a4fecca5b1bada101d3c1 | |
parent | 7065eefb38f16c91e9ace36fb7c873e4c9857c27 (diff) | |
parent | 3ba026fca8786161b0c4d75be396e61d6816e0a1 (diff) |
Merge branch 'allocate-bpf-trampoline-on-bpf_prog_pack'
Song Liu says:
====================
Allocate bpf trampoline on bpf_prog_pack
This set enables allocating bpf trampoline from bpf_prog_pack on x86. The
majority of this work, however, is the refactoring of trampoline code.
This is needed because we need to handle 4 archs and 2 users (trampoline
and struct_ops).
1/7 through 6/7 refactors trampoline code. A few helpers are added.
7/7 finally let bpf trampoline on x86 use bpf_prog_pack.
Changes in v7:
1. Use kvmalloc for rw_image in x86/arch_prepare_bpf_trampoline. (Alexei)
2. Add comment to explain why we cannot use kvmalloc in
x86/arch_bpf_trampoline_size. (Alexei)
Changes in v6:
1. Rebase.
2. Add Acked-by and Tested-by from Jiri Olsa and Björn Töpel.
Changes in v5:
1. Adjust size of trampoline ksym. (Jiri)
2. Use "unsigned int size" arg in image management helpers.(Daniel)
Changes in v4:
1. Dropped 1/8 in v3, which is already merged in bpf-next.
2. Add Reviewed-by from Björn Töpel.
Changes in v3:
1. Fix bug in s390. (Thanks to Ilya Leoshkevich).
2. Fix build error in riscv. (kernel test robot).
Changes in v2:
1. Add missing changes in net/bpf/bpf_dummy_struct_ops.c.
2. Reduce one dry run in arch_prepare_bpf_trampoline. (Xu Kuohai)
3. Other small fixes.
====================
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Alexei Starovoitov <[email protected]>
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 55 | ||||
-rw-r--r-- | arch/riscv/net/bpf_jit_comp64.c | 25 | ||||
-rw-r--r-- | arch/s390/net/bpf_jit_comp.c | 56 | ||||
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 130 | ||||
-rw-r--r-- | include/linux/bpf.h | 14 | ||||
-rw-r--r-- | include/linux/filter.h | 2 | ||||
-rw-r--r-- | kernel/bpf/bpf_struct_ops.c | 19 | ||||
-rw-r--r-- | kernel/bpf/core.c | 21 | ||||
-rw-r--r-- | kernel/bpf/dispatcher.c | 7 | ||||
-rw-r--r-- | kernel/bpf/trampoline.c | 101 | ||||
-rw-r--r-- | net/bpf/bpf_dummy_struct_ops.c | 7 |
11 files changed, 305 insertions, 132 deletions
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 7d4af64e3982..8955da5c47cf 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1828,7 +1828,7 @@ static void restore_args(struct jit_ctx *ctx, int args_off, int nregs) * */ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, - struct bpf_tramp_links *tlinks, void *orig_call, + struct bpf_tramp_links *tlinks, void *func_addr, int nregs, u32 flags) { int i; @@ -1926,7 +1926,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_IP_ARG) { /* save ip address of the traced function */ - emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx); + emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx); emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); } @@ -2026,18 +2026,10 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, return ctx->idx; } -int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, - void *image_end, const struct btf_func_model *m, - u32 flags, struct bpf_tramp_links *tlinks, - void *orig_call) +static int btf_func_model_nregs(const struct btf_func_model *m) { - int i, ret; int nregs = m->nr_args; - int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE; - struct jit_ctx ctx = { - .image = NULL, - .idx = 0, - }; + int i; /* extra registers needed for struct argument */ for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { @@ -2046,22 +2038,49 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, nregs += (m->arg_size[i] + 7) / 8 - 1; } + return nregs; +} + +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr) +{ + struct jit_ctx ctx = { + .image = NULL, + .idx = 0, + }; + struct bpf_tramp_image im; + int nregs, ret; + + nregs = btf_func_model_nregs(m); /* the first 8 registers are used for arguments */ if (nregs > 8) return -ENOTSUPP; - ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags); + ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags); if (ret < 0) return ret; - if (ret > max_insns) - return -EFBIG; + return ret < 0 ? ret : ret * AARCH64_INSN_SIZE; +} - ctx.image = image; - ctx.idx = 0; +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, + void *image_end, const struct btf_func_model *m, + u32 flags, struct bpf_tramp_links *tlinks, + void *func_addr) +{ + int ret, nregs; + struct jit_ctx ctx = { + .image = image, + .idx = 0, + }; + + nregs = btf_func_model_nregs(m); + /* the first 8 registers are used for arguments */ + if (nregs > 8) + return -ENOTSUPP; jit_fill_hole(image, (unsigned int)(image_end - image)); - ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags); + ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags); if (ret > 0 && validate_code(&ctx) < 0) ret = -EINVAL; diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 8581693e62d3..58dc64dd94a8 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1029,23 +1029,28 @@ out: return ret; } -int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, - void *image_end, const struct btf_func_model *m, - u32 flags, struct bpf_tramp_links *tlinks, - void *func_addr) +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr) { - int ret; + struct bpf_tramp_image im; struct rv_jit_context ctx; + int ret; ctx.ninsns = 0; ctx.insns = NULL; ctx.ro_insns = NULL; - ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx); - if (ret < 0) - return ret; + ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx); - if (ninsns_rvoff(ret) > (long)image_end - (long)image) - return -EFBIG; + return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns); +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, + void *image_end, const struct btf_func_model *m, + u32 flags, struct bpf_tramp_links *tlinks, + void *func_addr) +{ + int ret; + struct rv_jit_context ctx; ctx.ninsns = 0; /* diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index bf06b7283f0c..cc129617480a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2637,6 +2637,21 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, return 0; } +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *orig_call) +{ + struct bpf_tramp_image im; + struct bpf_tramp_jit tjit; + int ret; + + memset(&tjit, 0, sizeof(tjit)); + + ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags, + tlinks, orig_call); + + return ret < 0 ? ret : tjit.common.prg; +} + int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, @@ -2644,30 +2659,27 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, { struct bpf_tramp_jit tjit; int ret; - int i; - for (i = 0; i < 2; i++) { - if (i == 0) { - /* Compute offsets, check whether the code fits. */ - memset(&tjit, 0, sizeof(tjit)); - } else { - /* Generate the code. */ - tjit.common.prg = 0; - tjit.common.prg_buf = image; - } - ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags, - tlinks, func_addr); - if (ret < 0) - return ret; - if (tjit.common.prg > (char *)image_end - (char *)image) - /* - * Use the same error code as for exceeding - * BPF_MAX_TRAMP_LINKS. - */ - return -E2BIG; - } + /* Compute offsets, check whether the code fits. */ + memset(&tjit, 0, sizeof(tjit)); + ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags, + tlinks, func_addr); + + if (ret < 0) + return ret; + if (tjit.common.prg > (char *)image_end - (char *)image) + /* + * Use the same error code as for exceeding + * BPF_MAX_TRAMP_LINKS. + */ + return -E2BIG; + + tjit.common.prg = 0; + tjit.common.prg_buf = image; + ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags, + tlinks, func_addr); - return tjit.common.prg; + return ret < 0 ? ret : tjit.common.prg; } bool bpf_jit_supports_subprog_tailcalls(void) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8c10d9abc239..af4a5de7d93a 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2198,7 +2198,8 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, struct bpf_tramp_link *l, int stack_size, - int run_ctx_off, bool save_ret) + int run_ctx_off, bool save_ret, + void *image, void *rw_image) { u8 *prog = *pprog; u8 *jmp_insn; @@ -2226,7 +2227,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, else EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); - if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog)) + if (emit_rsb_call(&prog, bpf_trampoline_enter(p), image + (prog - (u8 *)rw_image))) return -EINVAL; /* remember prog start time returned by __bpf_prog_enter */ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); @@ -2250,7 +2251,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, (long) p->insnsi >> 32, (u32) (long) p->insnsi); /* call JITed bpf program or interpreter */ - if (emit_rsb_call(&prog, p->bpf_func, prog)) + if (emit_rsb_call(&prog, p->bpf_func, image + (prog - (u8 *)rw_image))) return -EINVAL; /* @@ -2277,7 +2278,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off); else EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); - if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog)) + if (emit_rsb_call(&prog, bpf_trampoline_exit(p), image + (prog - (u8 *)rw_image))) return -EINVAL; *pprog = prog; @@ -2312,14 +2313,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, struct bpf_tramp_links *tl, int stack_size, - int run_ctx_off, bool save_ret) + int run_ctx_off, bool save_ret, + void *image, void *rw_image) { int i; u8 *prog = *pprog; for (i = 0; i < tl->nr_links; i++) { if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, - run_ctx_off, save_ret)) + run_ctx_off, save_ret, image, rw_image)) return -EINVAL; } *pprog = prog; @@ -2328,7 +2330,8 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, struct bpf_tramp_links *tl, int stack_size, - int run_ctx_off, u8 **branches) + int run_ctx_off, u8 **branches, + void *image, void *rw_image) { u8 *prog = *pprog; int i; @@ -2339,7 +2342,8 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, emit_mov_imm32(&prog, false, BPF_REG_0, 0); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); for (i = 0; i < tl->nr_links; i++) { - if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true)) + if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true, + image, rw_image)) return -EINVAL; /* mod_ret prog stored return value into [rbp - 8]. Emit: @@ -2422,10 +2426,11 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, * add rsp, 8 // skip eth_type_trans's frame * ret // return to its caller */ -int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, - const struct btf_func_model *m, u32 flags, - struct bpf_tramp_links *tlinks, - void *func_addr) +static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image, + void *rw_image_end, void *image, + const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, + void *func_addr) { int i, ret, nr_regs = m->nr_args, stack_size = 0; int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off; @@ -2521,7 +2526,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i orig_call += X86_PATCH_SIZE; } - prog = image; + prog = rw_image; EMIT_ENDBR(); /* @@ -2563,7 +2568,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_CALL_ORIG) { /* arg1: mov rdi, im */ emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); - if (emit_rsb_call(&prog, __bpf_tramp_enter, prog)) { + if (emit_rsb_call(&prog, __bpf_tramp_enter, + image + (prog - (u8 *)rw_image))) { ret = -EINVAL; goto cleanup; } @@ -2571,7 +2577,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (fentry->nr_links) if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, - flags & BPF_TRAMP_F_RET_FENTRY_RET)) + flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image)) return -EINVAL; if (fmod_ret->nr_links) { @@ -2581,7 +2587,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i return -ENOMEM; if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off, - run_ctx_off, branches)) { + run_ctx_off, branches, image, rw_image)) { ret = -EINVAL; goto cleanup; } @@ -2602,14 +2608,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i EMIT2(0xff, 0xd3); /* call *rbx */ } else { /* call original function */ - if (emit_rsb_call(&prog, orig_call, prog)) { + if (emit_rsb_call(&prog, orig_call, image + (prog - (u8 *)rw_image))) { ret = -EINVAL; goto cleanup; } } /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); - im->ip_after_call = prog; + im->ip_after_call = image + (prog - (u8 *)rw_image); memcpy(prog, x86_nops[5], X86_PATCH_SIZE); prog += X86_PATCH_SIZE; } @@ -2625,12 +2631,13 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i * aligned address of do_fexit. */ for (i = 0; i < fmod_ret->nr_links; i++) - emit_cond_near_jump(&branches[i], prog, branches[i], - X86_JNE); + emit_cond_near_jump(&branches[i], image + (prog - (u8 *)rw_image), + image + (branches[i] - (u8 *)rw_image), X86_JNE); } if (fexit->nr_links) - if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) { + if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, + false, image, rw_image)) { ret = -EINVAL; goto cleanup; } @@ -2643,10 +2650,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i * restored to R0. */ if (flags & BPF_TRAMP_F_CALL_ORIG) { - im->ip_epilogue = prog; + im->ip_epilogue = image + (prog - (u8 *)rw_image); /* arg1: mov rdi, im */ emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); - if (emit_rsb_call(&prog, __bpf_tramp_exit, prog)) { + if (emit_rsb_call(&prog, __bpf_tramp_exit, image + (prog - (u8 *)rw_image))) { ret = -EINVAL; goto cleanup; } @@ -2665,19 +2672,90 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_SKIP_FRAME) /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ - emit_return(&prog, prog); + emit_return(&prog, image + (prog - (u8 *)rw_image)); /* Make sure the trampoline generation logic doesn't overflow */ - if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { + if (WARN_ON_ONCE(prog > (u8 *)rw_image_end - BPF_INSN_SAFETY)) { ret = -EFAULT; goto cleanup; } - ret = prog - (u8 *)image; + ret = prog - (u8 *)rw_image + BPF_INSN_SAFETY; cleanup: kfree(branches); return ret; } +void *arch_alloc_bpf_trampoline(unsigned int size) +{ + return bpf_prog_pack_alloc(size, jit_fill_hole); +} + +void arch_free_bpf_trampoline(void *image, unsigned int size) +{ + bpf_prog_pack_free(image, size); +} + +void arch_protect_bpf_trampoline(void *image, unsigned int size) +{ +} + +void arch_unprotect_bpf_trampoline(void *image, unsigned int size) +{ +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, + const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, + void *func_addr) +{ + void *rw_image, *tmp; + int ret; + u32 size = image_end - image; + + /* rw_image doesn't need to be in module memory range, so we can + * use kvmalloc. + */ + rw_image = kvmalloc(size, GFP_KERNEL); + if (!rw_image) + return -ENOMEM; + + ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m, + flags, tlinks, func_addr); + if (ret < 0) + goto out; + + tmp = bpf_arch_text_copy(image, rw_image, size); + if (IS_ERR(tmp)) + ret = PTR_ERR(tmp); +out: + kvfree(rw_image); + return ret; +} + +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr) +{ + struct bpf_tramp_image im; + void *image; + int ret; + + /* Allocate a temporary buffer for __arch_prepare_bpf_trampoline(). + * This will NOT cause fragmentation in direct map, as we do not + * call set_memory_*() on this buffer. + * + * We cannot use kvmalloc here, because we need image to be in + * module memory range. + */ + image = bpf_jit_alloc_exec(PAGE_SIZE); + if (!image) + return -ENOMEM; + + ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image, + m, flags, tlinks, func_addr); + bpf_jit_free_exec(image); + return ret; +} + static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image, u8 *buf) { u8 *jg_reloc, *prog = *pprog; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7a483f6b6d5f..c1a06263a4f3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1098,10 +1098,17 @@ struct bpf_tramp_run_ctx; * fexit = a set of program to run after original function */ struct bpf_tramp_image; -int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, - void *orig_call); + void *func_addr); +void *arch_alloc_bpf_trampoline(unsigned int size); +void arch_free_bpf_trampoline(void *image, unsigned int size); +void arch_protect_bpf_trampoline(void *image, unsigned int size); +void arch_unprotect_bpf_trampoline(void *image, unsigned int size); +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr); + u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, @@ -1134,6 +1141,7 @@ enum bpf_tramp_prog_type { struct bpf_tramp_image { void *image; + int size; struct bpf_ksym ksym; struct percpu_ref pcref; void *ip_after_call; @@ -1318,7 +1326,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); /* Called only from JIT-enabled code, so there's no need for stubs. */ -void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); +void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); diff --git a/include/linux/filter.h b/include/linux/filter.h index 14354605ad26..12d907f17d36 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1067,7 +1067,7 @@ struct bpf_binary_header * bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns); -void bpf_prog_pack_free(struct bpf_binary_header *hdr); +void bpf_prog_pack_free(void *ptr, u32 size); static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) { diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index db6176fb64dc..4d53c53fc5aa 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -355,6 +355,7 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, void *image, void *image_end) { u32 flags; + int size; tlinks[BPF_TRAMP_FENTRY].links[0] = link; tlinks[BPF_TRAMP_FENTRY].nr_links = 1; @@ -362,6 +363,12 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, * and it must be used alone. */ flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0; + + size = arch_bpf_trampoline_size(model, flags, tlinks, NULL); + if (size < 0) + return size; + if (size > (unsigned long)image_end - (unsigned long)image) + return -E2BIG; return arch_prepare_bpf_trampoline(NULL, image, image_end, model, flags, tlinks, NULL); } @@ -515,7 +522,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, if (err) goto reset_unlock; } - set_memory_rox((long)st_map->image, 1); + arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE); /* Let bpf_link handle registration & unregistration. * * Pair with smp_load_acquire() during lookup_elem(). @@ -524,7 +531,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto unlock; } - set_memory_rox((long)st_map->image, 1); + arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE); err = st_ops->reg(kdata); if (likely(!err)) { /* This refcnt increment on the map here after @@ -547,8 +554,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, * there was a race in registering the struct_ops (under the same name) to * a sub-system through different struct_ops's maps. */ - set_memory_nx((long)st_map->image, 1); - set_memory_rw((long)st_map->image, 1); + arch_unprotect_bpf_trampoline(st_map->image, PAGE_SIZE); reset_unlock: bpf_struct_ops_map_put_progs(st_map); @@ -616,7 +622,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map) bpf_struct_ops_map_put_progs(st_map); bpf_map_area_free(st_map->links); if (st_map->image) { - bpf_jit_free_exec(st_map->image); + arch_free_bpf_trampoline(st_map->image, PAGE_SIZE); bpf_jit_uncharge_modmem(PAGE_SIZE); } bpf_map_area_free(st_map->uvalue); @@ -691,7 +697,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) return ERR_PTR(ret); } - st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); + st_map->image = arch_alloc_bpf_trampoline(PAGE_SIZE); if (!st_map->image) { /* __bpf_struct_ops_map_free() uses st_map->image as flag * for "charged or not". In this case, we need to unchange @@ -711,7 +717,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) } mutex_init(&st_map->lock); - set_vm_flush_reset_perms(st_map->image); bpf_map_init_from_attr(map, attr); return map; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ced511f44174..c34513d645c4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -928,20 +928,20 @@ out: return ptr; } -void bpf_prog_pack_free(struct bpf_binary_header *hdr) +void bpf_prog_pack_free(void *ptr, u32 size) { struct bpf_prog_pack *pack = NULL, *tmp; unsigned int nbits; unsigned long pos; mutex_lock(&pack_mutex); - if (hdr->size > BPF_PROG_PACK_SIZE) { - bpf_jit_free_exec(hdr); + if (size > BPF_PROG_PACK_SIZE) { + bpf_jit_free_exec(ptr); goto out; } list_for_each_entry(tmp, &pack_list, list) { - if ((void *)hdr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > (void *)hdr) { + if (ptr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > ptr) { pack = tmp; break; } @@ -950,10 +950,10 @@ void bpf_prog_pack_free(struct bpf_binary_header *hdr) if (WARN_ONCE(!pack, "bpf_prog_pack bug\n")) goto out; - nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size); - pos = ((unsigned long)hdr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT; + nbits = BPF_PROG_SIZE_TO_NBITS(size); + pos = ((unsigned long)ptr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT; - WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size), + WARN_ONCE(bpf_arch_text_invalidate(ptr, size), "bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n"); bitmap_clear(pack->bitmap, pos, nbits); @@ -1100,8 +1100,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, *rw_header = kvmalloc(size, GFP_KERNEL); if (!*rw_header) { - bpf_arch_text_copy(&ro_header->size, &size, sizeof(size)); - bpf_prog_pack_free(ro_header); + bpf_prog_pack_free(ro_header, size); bpf_jit_uncharge_modmem(size); return NULL; } @@ -1132,7 +1131,7 @@ int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, kvfree(rw_header); if (IS_ERR(ptr)) { - bpf_prog_pack_free(ro_header); + bpf_prog_pack_free(ro_header, ro_header->size); return PTR_ERR(ptr); } return 0; @@ -1153,7 +1152,7 @@ void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, { u32 size = ro_header->size; - bpf_prog_pack_free(ro_header); + bpf_prog_pack_free(ro_header, size); kvfree(rw_header); bpf_jit_uncharge_modmem(size); } diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index fa3e9225aedc..70fb82bf1637 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -150,14 +150,11 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, goto out; d->rw_image = bpf_jit_alloc_exec(PAGE_SIZE); if (!d->rw_image) { - u32 size = PAGE_SIZE; - - bpf_arch_text_copy(d->image, &size, sizeof(size)); - bpf_prog_pack_free((struct bpf_binary_header *)d->image); + bpf_prog_pack_free(d->image, PAGE_SIZE); d->image = NULL; goto out; } - bpf_image_ksym_add(d->image, &d->ksym); + bpf_image_ksym_add(d->image, PAGE_SIZE, &d->ksym); } prev_num_progs = d->num_progs; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index e97aeda3a86b..d382f5ebe06c 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -115,10 +115,10 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog) (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC); } -void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym) +void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym) { ksym->start = (unsigned long) data; - ksym->end = ksym->start + PAGE_SIZE; + ksym->end = ksym->start + size; bpf_ksym_add(ksym); perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, PAGE_SIZE, false, ksym->name); @@ -254,8 +254,8 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_a static void bpf_tramp_image_free(struct bpf_tramp_image *im) { bpf_image_ksym_del(&im->ksym); - bpf_jit_free_exec(im->image); - bpf_jit_uncharge_modmem(PAGE_SIZE); + arch_free_bpf_trampoline(im->image, im->size); + bpf_jit_uncharge_modmem(im->size); percpu_ref_exit(&im->pcref); kfree_rcu(im, rcu); } @@ -349,7 +349,7 @@ static void bpf_tramp_image_put(struct bpf_tramp_image *im) call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks); } -static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key) +static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size) { struct bpf_tramp_image *im; struct bpf_ksym *ksym; @@ -360,15 +360,15 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key) if (!im) goto out; - err = bpf_jit_charge_modmem(PAGE_SIZE); + err = bpf_jit_charge_modmem(size); if (err) goto out_free_im; + im->size = size; err = -ENOMEM; - im->image = image = bpf_jit_alloc_exec(PAGE_SIZE); + im->image = image = arch_alloc_bpf_trampoline(size); if (!image) goto out_uncharge; - set_vm_flush_reset_perms(image); err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL); if (err) @@ -377,13 +377,13 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key) ksym = &im->ksym; INIT_LIST_HEAD_RCU(&ksym->lnode); snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key); - bpf_image_ksym_add(image, ksym); + bpf_image_ksym_add(image, size, ksym); return im; out_free_image: - bpf_jit_free_exec(im->image); + arch_free_bpf_trampoline(im->image, im->size); out_uncharge: - bpf_jit_uncharge_modmem(PAGE_SIZE); + bpf_jit_uncharge_modmem(size); out_free_im: kfree(im); out: @@ -396,7 +396,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut struct bpf_tramp_links *tlinks; u32 orig_flags = tr->flags; bool ip_arg = false; - int err, total; + int err, total, size; tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg); if (IS_ERR(tlinks)) @@ -409,12 +409,6 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut goto out; } - im = bpf_tramp_image_alloc(tr->key); - if (IS_ERR(im)) { - err = PTR_ERR(im); - goto out; - } - /* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */ tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX); @@ -438,13 +432,31 @@ again: tr->flags |= BPF_TRAMP_F_ORIG_STACK; #endif - err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE, + size = arch_bpf_trampoline_size(&tr->func.model, tr->flags, + tlinks, tr->func.addr); + if (size < 0) { + err = size; + goto out; + } + + if (size > PAGE_SIZE) { + err = -E2BIG; + goto out; + } + + im = bpf_tramp_image_alloc(tr->key, size); + if (IS_ERR(im)) { + err = PTR_ERR(im); + goto out; + } + + err = arch_prepare_bpf_trampoline(im, im->image, im->image + size, &tr->func.model, tr->flags, tlinks, tr->func.addr); if (err < 0) goto out_free; - set_memory_rox((long)im->image, 1); + arch_protect_bpf_trampoline(im->image, im->size); WARN_ON(tr->cur_image && total == 0); if (tr->cur_image) @@ -464,9 +476,8 @@ again: tr->fops->func = NULL; tr->fops->trampoline = 0; - /* reset im->image memory attr for arch_prepare_bpf_trampoline */ - set_memory_nx((long)im->image, 1); - set_memory_rw((long)im->image, 1); + /* free im memory and reallocate later */ + bpf_tramp_image_free(im); goto again; } #endif @@ -1032,10 +1043,50 @@ bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog) } int __weak -arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, +arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, - void *orig_call) + void *func_addr) +{ + return -ENOTSUPP; +} + +void * __weak arch_alloc_bpf_trampoline(unsigned int size) +{ + void *image; + + if (WARN_ON_ONCE(size > PAGE_SIZE)) + return NULL; + image = bpf_jit_alloc_exec(PAGE_SIZE); + if (image) + set_vm_flush_reset_perms(image); + return image; +} + +void __weak arch_free_bpf_trampoline(void *image, unsigned int size) +{ + WARN_ON_ONCE(size > PAGE_SIZE); + /* bpf_jit_free_exec doesn't need "size", but + * bpf_prog_pack_free() needs it. + */ + bpf_jit_free_exec(image); +} + +void __weak arch_protect_bpf_trampoline(void *image, unsigned int size) +{ + WARN_ON_ONCE(size > PAGE_SIZE); + set_memory_rox((long)image, 1); +} + +void __weak arch_unprotect_bpf_trampoline(void *image, unsigned int size) +{ + WARN_ON_ONCE(size > PAGE_SIZE); + set_memory_nx((long)image, 1); + set_memory_rw((long)image, 1); +} + +int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr) { return -ENOTSUPP; } diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index 5918d1b32e19..2748f9d77b18 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -101,12 +101,11 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, goto out; } - image = bpf_jit_alloc_exec(PAGE_SIZE); + image = arch_alloc_bpf_trampoline(PAGE_SIZE); if (!image) { err = -ENOMEM; goto out; } - set_vm_flush_reset_perms(image); link = kzalloc(sizeof(*link), GFP_USER); if (!link) { @@ -124,7 +123,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, if (err < 0) goto out; - set_memory_rox((long)image, 1); + arch_protect_bpf_trampoline(image, PAGE_SIZE); prog_ret = dummy_ops_call_op(image, args); err = dummy_ops_copy_args(args); @@ -134,7 +133,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, err = -EFAULT; out: kfree(args); - bpf_jit_free_exec(image); + arch_free_bpf_trampoline(image, PAGE_SIZE); if (link) bpf_link_put(&link->link); kfree(tlinks); |