aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <[email protected]>2020-09-29 16:47:39 -0700
committerAlexei Starovoitov <[email protected]>2020-09-29 16:47:39 -0700
commit67e4ca74953ea27e2c0dac6907fb4271bd6d5d87 (patch)
tree8d408caa5ccf28ae27a7004b0c8ada8cc42c7366
parent6458bde368cee77e798d05cccd2316db4d748c41 (diff)
parent4d0b8c0b46a5e6f23ab8301780d689072c6c91fc (diff)
Merge branch 'bpf, x64: optimize JIT's pro/epilogue'
Maciej Fijalkowski says: ==================== Hi! This small set can be considered as a followup after recent addition of support for tailcalls in bpf subprograms and is focused on optimizing x64 JIT prologue and epilogue sections. Turns out the popping tail call counter is not needed anymore and %rsp handling when stack depth is 0 can be skipped. For longer explanations, please see commit messages. Thank you, Maciej ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
-rw-r--r--arch/x86/net/bpf_jit_comp.c35
1 files changed, 23 insertions, 12 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 26f43279b78b..796506dcfc42 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -281,7 +281,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
/* sub rsp, rounded_stack_depth */
- EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
+ if (stack_depth)
+ EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
if (tail_call_reachable)
EMIT1(0x50); /* push rax */
*pprog = prog;
@@ -407,9 +408,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
int pop_bytes = 0;
- int off1 = 49;
- int off2 = 38;
- int off3 = 16;
+ int off1 = 42;
+ int off2 = 31;
+ int off3 = 9;
int cnt = 0;
/* count the additional bytes used for popping callee regs from stack
@@ -421,6 +422,12 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
off2 += pop_bytes;
off3 += pop_bytes;
+ if (stack_depth) {
+ off1 += 7;
+ off2 += 7;
+ off3 += 7;
+ }
+
/*
* rdi - pointer to ctx
* rsi - pointer to bpf_array
@@ -465,8 +472,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
prog = *pprog;
EMIT1(0x58); /* pop rax */
- EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */
- round_up(stack_depth, 8));
+ if (stack_depth)
+ EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */
+ round_up(stack_depth, 8));
/* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */
EMIT4(0x48, 0x8B, 0x49, /* mov rcx, qword ptr [rcx + 32] */
@@ -491,7 +499,7 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
int pop_bytes = 0;
- int off1 = 27;
+ int off1 = 20;
int poke_off;
int cnt = 0;
@@ -506,10 +514,14 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
* total bytes for:
* - nop5/ jmpq $off
* - pop callee regs
- * - sub rsp, $val
+ * - sub rsp, $val if depth > 0
* - pop rax
*/
- poke_off = X86_PATCH_SIZE + pop_bytes + 7 + 1;
+ poke_off = X86_PATCH_SIZE + pop_bytes + 1;
+ if (stack_depth) {
+ poke_off += 7;
+ off1 += 7;
+ }
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
@@ -533,7 +545,8 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
pop_callee_regs(pprog, callee_regs_used);
prog = *pprog;
EMIT1(0x58); /* pop rax */
- EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
+ if (stack_depth)
+ EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
@@ -1441,8 +1454,6 @@ emit_jmp:
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
pop_callee_regs(&prog, callee_regs_used);
- if (tail_call_reachable)
- EMIT1(0x59); /* pop rcx, get rid of tail_call_cnt */
EMIT1(0xC9); /* leave */
EMIT1(0xC3); /* ret */
break;