From 86322ba9571a36b0b149a5f27fc3423c2dadbe39 Mon Sep 17 00:00:00 2001 From: Sabyasachi Gupta Date: Sat, 3 Nov 2018 10:54:52 +0530 Subject: arch/sparc: Use kzalloc_node Replaced kmalloc_node + memset with kzalloc_node Signed-off-by: Sabyasachi Gupta Signed-off-by: David S. Miller --- arch/sparc/kernel/iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/sparc') diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 40d008b0bd3e..05eb016fc41b 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -108,10 +108,9 @@ int iommu_table_init(struct iommu *iommu, int tsbsize, /* Allocate and initialize the free area map. */ sz = num_tsb_entries / 8; sz = (sz + 7UL) & ~7UL; - iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node); + iommu->tbl.map = kzalloc_node(sz, GFP_KERNEL, numa_node); if (!iommu->tbl.map) return -ENOMEM; - memset(iommu->tbl.map, 0, sz); iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT, (tlb_type != hypervisor ? iommu_flushall : NULL), -- cgit From c44768a33da81b4a0986e79bbf0588f1a0651dec Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 26 Nov 2018 13:03:46 -0800 Subject: sparc: Fix JIT fused branch convergance. On T4 and later sparc64 cpus we can use the fused compare and branch instruction. However, it can only be used if the branch destination is in the range of a signed 10-bit immediate offset. This amounts to 1024 instructions forwards or backwards. After the commit referenced in the Fixes: tag, the largest possible size program seen by the JIT explodes by a significant factor. As a result of this convergance takes many more passes since the expanded "BPF_LDX | BPF_MSH | BPF_B" code sequence, for example, contains several embedded branch on condition instructions. On each pass, as suddenly new fused compare and branch instances become valid, this makes thousands more in range for the next pass. And so on and so forth. This is most greatly exemplified by "BPF_MAXINSNS: exec all MSH" which takes 35 passes to converge, and shrinks the image by about 64K. To decrease the cost of this number of convergance passes, do the convergance pass before we have the program image allocated, just like other JITs (such as x86) do. Fixes: e0cea7ce988c ("bpf: implement ld_abs/ld_ind in native bpf") Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- arch/sparc/net/bpf_jit_comp_64.c | 77 +++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 28 deletions(-) (limited to 'arch/sparc') diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 222785af550b..7217d6359643 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1425,12 +1425,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) struct bpf_prog *tmp, *orig_prog = prog; struct sparc64_jit_data *jit_data; struct bpf_binary_header *header; + u32 prev_image_size, image_size; bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; - u32 image_size; u8 *image_ptr; - int pass; + int pass, i; if (!prog->jit_requested) return orig_prog; @@ -1461,61 +1461,82 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) header = jit_data->header; extra_pass = true; image_size = sizeof(u32) * ctx.idx; + prev_image_size = image_size; + pass = 1; goto skip_init_ctx; } memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; - ctx.offset = kcalloc(prog->len, sizeof(unsigned int), GFP_KERNEL); + ctx.offset = kmalloc_array(prog->len, sizeof(unsigned int), GFP_KERNEL); if (ctx.offset == NULL) { prog = orig_prog; goto out_off; } - /* Fake pass to detect features used, and get an accurate assessment - * of what the final image size will be. + /* Longest sequence emitted is for bswap32, 12 instructions. Pre-cook + * the offset array so that we converge faster. */ - if (build_body(&ctx)) { - prog = orig_prog; - goto out_off; - } - build_prologue(&ctx); - build_epilogue(&ctx); + for (i = 0; i < prog->len; i++) + ctx.offset[i] = i * (12 * 4); - /* Now we know the actual image size. */ - image_size = sizeof(u32) * ctx.idx; - header = bpf_jit_binary_alloc(image_size, &image_ptr, - sizeof(u32), jit_fill_hole); - if (header == NULL) { - prog = orig_prog; - goto out_off; - } - - ctx.image = (u32 *)image_ptr; -skip_init_ctx: - for (pass = 1; pass < 3; pass++) { + prev_image_size = ~0U; + for (pass = 1; pass < 40; pass++) { ctx.idx = 0; build_prologue(&ctx); - if (build_body(&ctx)) { - bpf_jit_binary_free(header); prog = orig_prog; goto out_off; } - build_epilogue(&ctx); if (bpf_jit_enable > 1) - pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass, - image_size - (ctx.idx * 4), + pr_info("Pass %d: size = %u, seen = [%c%c%c%c%c%c]\n", pass, + ctx.idx * 4, ctx.tmp_1_used ? '1' : ' ', ctx.tmp_2_used ? '2' : ' ', ctx.tmp_3_used ? '3' : ' ', ctx.saw_frame_pointer ? 'F' : ' ', ctx.saw_call ? 'C' : ' ', ctx.saw_tail_call ? 'T' : ' '); + + if (ctx.idx * 4 == prev_image_size) + break; + prev_image_size = ctx.idx * 4; + cond_resched(); + } + + /* Now we know the actual image size. */ + image_size = sizeof(u32) * ctx.idx; + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + if (header == NULL) { + prog = orig_prog; + goto out_off; + } + + ctx.image = (u32 *)image_ptr; +skip_init_ctx: + ctx.idx = 0; + + build_prologue(&ctx); + + if (build_body(&ctx)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; + } + + build_epilogue(&ctx); + + if (ctx.idx * 4 != prev_image_size) { + pr_err("bpf_jit: Failed to converge, prev_size=%u size=%d\n", + prev_image_size, ctx.idx * 4); + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; } if (bpf_jit_enable > 1) -- cgit From e2ac579a7a18bcd9e8cf14cf42eac0b8a2ba6c4b Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 26 Nov 2018 14:52:18 -0800 Subject: sparc: Correct ctx->saw_frame_pointer logic. We need to initialize the frame pointer register not just if it is seen as a source operand, but also if it is seen as the destination operand of a store or an atomic instruction (which effectively is a source operand). This is exercised by test_verifier's "non-invalid fp arithmetic" Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- arch/sparc/net/bpf_jit_comp_64.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/sparc') diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 7217d6359643..ec4da4dc98f1 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1270,6 +1270,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; u32 opcode = 0, rs2; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_2_used = true; emit_loadimm(imm, tmp2, ctx); @@ -1308,6 +1311,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp = bpf2sparc[TMP_REG_1]; u32 opcode = 0, rs2; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + switch (BPF_SIZE(code)) { case BPF_W: opcode = ST32; @@ -1340,6 +1346,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; const u8 tmp3 = bpf2sparc[TMP_REG_3]; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_1_used = true; ctx->tmp_2_used = true; ctx->tmp_3_used = true; @@ -1360,6 +1369,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; const u8 tmp3 = bpf2sparc[TMP_REG_3]; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_1_used = true; ctx->tmp_2_used = true; ctx->tmp_3_used = true; -- cgit From 2b9034b5eaddc09c0e9529b93446eb975f97f814 Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 26 Nov 2018 21:55:09 -0800 Subject: sparc: Adjust bpf JIT prologue for PSEUDO calls. Move all arguments into output registers from input registers. This path is exercised by test_verifier.c's "calls: two calls with args" test. Adjust BPF_TAILCALL_PROLOGUE_SKIP as needed. Let's also make the prologue length a constant size regardless of the combination of ->saw_frame_pointer and ->saw_tail_call settings. Signed-off-by: David S. Miller Signed-off-by: Daniel Borkmann --- arch/sparc/net/bpf_jit_comp_64.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/sparc') diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index ec4da4dc98f1..5fda4f7bf15d 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -791,7 +791,7 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src, } /* Just skip the save instruction and the ctx register move. */ -#define BPF_TAILCALL_PROLOGUE_SKIP 16 +#define BPF_TAILCALL_PROLOGUE_SKIP 32 #define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128) static void build_prologue(struct jit_ctx *ctx) @@ -824,9 +824,15 @@ static void build_prologue(struct jit_ctx *ctx) const u8 vfp = bpf2sparc[BPF_REG_FP]; emit(ADD | IMMED | RS1(FP) | S13(STACK_BIAS) | RD(vfp), ctx); + } else { + emit_nop(ctx); } emit_reg_move(I0, O0, ctx); + emit_reg_move(I1, O1, ctx); + emit_reg_move(I2, O2, ctx); + emit_reg_move(I3, O3, ctx); + emit_reg_move(I4, O4, ctx); /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */ } -- cgit From f6f8c1c09c224076a6c9ca3d97c24b81698f98f8 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 26 Nov 2018 17:59:40 +1100 Subject: sparc: suppress the implicit-fallthrough warning sparc builds with -Werror Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- arch/sparc/kernel/signal_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/sparc') diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index e9de1803a22e..ca70787efd8e 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -533,6 +533,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) regs->tpc -= 4; regs->tnpc -= 4; pt_regs_clear_syscall(regs); + /* fall through */ case ERESTART_RESTARTBLOCK: regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->tpc -= 4; -- cgit From e945067d95cc99a6e4601b6bda665f30c7917c3b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 26 Nov 2018 18:11:46 +1100 Subject: sparc32: suppress an implicit-fallthrough warning Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- arch/sparc/kernel/signal32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/sparc') diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 4c5b3fcbed94..e800ce13cc6e 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -683,6 +683,7 @@ void do_signal32(struct pt_regs * regs) regs->tpc -= 4; regs->tnpc -= 4; pt_regs_clear_syscall(regs); + /* fall through */ case ERESTART_RESTARTBLOCK: regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->tpc -= 4; -- cgit From e399ef194171717aefa78838d02b1e92267be2b1 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 26 Nov 2018 18:45:26 +1100 Subject: sparc32: supress another implicit-fallthrough warning Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- arch/sparc/kernel/signal_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/sparc') diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 5665261cee37..83953780ca01 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -508,6 +508,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) regs->pc -= 4; regs->npc -= 4; pt_regs_clear_syscall(regs); + /* fall through */ case ERESTART_RESTARTBLOCK: regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->pc -= 4; -- cgit From 9c4bf5e0db164f330a2d3e128e9832661f69f0e9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:37:40 -0500 Subject: sparc/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have sparc use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/sparc/kernel/ftrace.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/sparc') diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 915dda4ae412..684b84ce397f 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -126,20 +126,11 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long frame_pointer) { unsigned long return_hooker = (unsigned long) &return_to_handler; - struct ftrace_graph_ent trace; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return parent + 8UL; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return parent + 8UL; - - if (ftrace_push_return_trace(parent, self_addr, &trace.depth, - frame_pointer, NULL) == -EBUSY) + if (function_graph_enter(parent, self_addr, frame_pointer, NULL)) return parent + 8UL; return return_hooker; -- cgit