From e2c95a61656d29ceaac97b6a975c8a1f26e26f15 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 26 Nov 2018 14:05:38 +0100 Subject: bpf, ppc64: generalize fetching subprog into bpf_jit_get_func_addr Make fetching of the BPF call address from ppc64 JIT generic. ppc64 was using a slightly different variant rather than through the insns' imm field encoding as the target address would not fit into that space. Therefore, the target subprog number was encoded into the insns' offset and fetched through fp->aux->func[off]->bpf_func instead. Given there are other JITs with this issue and the mechanism of fetching the address is JIT-generic, move it into the core as a helper instead. On the JIT side, we get information on whether the retrieved address is a fixed one, that is, not changing through JIT passes, or a dynamic one. For the former, JITs can optimize their imm emission because this doesn't change jump offsets throughout JIT process. Signed-off-by: Daniel Borkmann Reviewed-by: Sandipan Das Tested-by: Sandipan Das Signed-off-by: Alexei Starovoitov --- arch/powerpc/net/bpf_jit_comp64.c | 57 ++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/net/bpf_jit_comp64.c') diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 50b129785aee..17482f5de3e2 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -166,7 +166,33 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) PPC_BLR(); } -static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) +static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, + u64 func) +{ +#ifdef PPC64_ELF_ABI_v1 + /* func points to the function descriptor */ + PPC_LI64(b2p[TMP_REG_2], func); + /* Load actual entry point from function descriptor */ + PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); + /* ... and move it to LR */ + PPC_MTLR(b2p[TMP_REG_1]); + /* + * Load TOC from function descriptor at offset 8. + * We can clobber r2 since we get called through a + * function pointer (so caller will save/restore r2) + * and since we don't use a TOC ourself. + */ + PPC_BPF_LL(2, b2p[TMP_REG_2], 8); +#else + /* We can clobber r12 */ + PPC_FUNC_ADDR(12, func); + PPC_MTLR(12); +#endif + PPC_BLRL(); +} + +static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, + u64 func) { unsigned int i, ctx_idx = ctx->idx; @@ -273,7 +299,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; - int i; + int i, ret; /* Start of epilogue code - will only be valid 2nd pass onwards */ u32 exit_addr = addrs[flen]; @@ -284,8 +310,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 src_reg = b2p[insn[i].src_reg]; s16 off = insn[i].off; s32 imm = insn[i].imm; + bool func_addr_fixed; + u64 func_addr; u64 imm64; - u8 *func; u32 true_cond; u32 tmp_idx; @@ -711,23 +738,15 @@ emit_clear: case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - /* bpf function call */ - if (insn[i].src_reg == BPF_PSEUDO_CALL) - if (!extra_pass) - func = NULL; - else if (fp->aux->func && off < fp->aux->func_cnt) - /* use the subprog id from the off - * field to lookup the callee address - */ - func = (u8 *) fp->aux->func[off]->bpf_func; - else - return -EINVAL; - /* kernel helper call */ - else - func = (u8 *) __bpf_call_base + imm; - - bpf_jit_emit_func_call(image, ctx, (u64)func); + ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + if (func_addr_fixed) + bpf_jit_emit_func_call_hlp(image, ctx, func_addr); + else + bpf_jit_emit_func_call_rel(image, ctx, func_addr); /* move return value from r3 to BPF_REG_0 */ PPC_MR(b2p[BPF_REG_0], 3); break; -- cgit From 025dceb0fab31c912c41b8f32577432231d83e6b Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 3 Dec 2018 17:51:04 +0530 Subject: bpf: powerpc64: optimize JIT passes for bpf function calls Once the JITed images for each function in a multi-function program are generated after the first three JIT passes, we only need to fix the target address for the branch instruction corresponding to each bpf-to-bpf function call. This introduces the following optimizations for reducing the work done by the JIT compiler when handling multi-function programs: [1] Instead of doing two extra passes to fix the bpf function calls, do just one as that would be sufficient. [2] During the extra pass, only overwrite the instruction sequences for the bpf-to-bpf function calls as everything else would still remain exactly the same. This also reduces the number of writes to the JITed image. [3] Do not regenerate the prologue and the epilogue during the extra pass as that would be redundant. Signed-off-by: Sandipan Das Signed-off-by: Daniel Borkmann --- arch/powerpc/net/bpf_jit_comp64.c | 66 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'arch/powerpc/net/bpf_jit_comp64.c') diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 17482f5de3e2..9393e231cbc2 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -891,6 +891,55 @@ cond_branch: return 0; } +/* Fix the branch target addresses for subprog calls */ +static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) +{ + const struct bpf_insn *insn = fp->insnsi; + bool func_addr_fixed; + u64 func_addr; + u32 tmp_idx; + int i, ret; + + for (i = 0; i < fp->len; i++) { + /* + * During the extra pass, only the branch target addresses for + * the subprog calls need to be fixed. All other instructions + * can left untouched. + * + * The JITed image length does not change because we already + * ensure that the JITed instruction sequence for these calls + * are of fixed length by padding them with NOPs. + */ + if (insn[i].code == (BPF_JMP | BPF_CALL) && + insn[i].src_reg == BPF_PSEUDO_CALL) { + ret = bpf_jit_get_func_addr(fp, &insn[i], true, + &func_addr, + &func_addr_fixed); + if (ret < 0) + return ret; + + /* + * Save ctx->idx as this would currently point to the + * end of the JITed image and set it to the offset of + * the instruction sequence corresponding to the + * subprog call temporarily. + */ + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + /* + * Restore ctx->idx here. This is safe as the length + * of the JITed sequence remains unchanged. + */ + ctx->idx = tmp_idx; + } + } + + return 0; +} + struct powerpc64_jit_data { struct bpf_binary_header *header; u32 *addrs; @@ -989,6 +1038,22 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + if (extra_pass) { + /* + * Do not touch the prologue and epilogue as they will remain + * unchanged. Only fix the branch target address for subprog + * calls in the body. + * + * This does not change the offsets and lengths of the subprog + * call instruction sequences and hence, the size of the JITed + * image as well. + */ + bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + + /* There is no need to perform the usual passes. */ + goto skip_codegen_passes; + } + /* Code generation passes 1-2 */ for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ @@ -1002,6 +1067,7 @@ skip_init_ctx: proglen - (cgctx.idx * 4), cgctx.seen); } +skip_codegen_passes: if (bpf_jit_enable > 1) /* * Note that we output the base address of the code_base -- cgit