diff options
| author | Linus Torvalds <[email protected]> | 2021-11-01 13:24:43 -0700 | 
|---|---|---|
| committer | Linus Torvalds <[email protected]> | 2021-11-01 13:24:43 -0700 | 
| commit | 43aa0a195f06101bcb5d8d711bba0dd24b33a1a0 (patch) | |
| tree | 0236661db875f519cc80e11fde210fdfc9b2be76 /arch/x86/net/bpf_jit_comp.c | |
| parent | 595b28fb0c8949463d8ec1e485f36d17c870ddb2 (diff) | |
| parent | 87c87ecd00c54ecd677798cb49ef27329e0fab41 (diff) | |
Merge tag 'objtool-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull objtool updates from Thomas Gleixner:
 - Improve retpoline code patching by separating it from alternatives
   which reduces memory footprint and allows to do better optimizations
   in the actual runtime patching.
 - Add proper retpoline support for x86/BPF
 - Address noinstr warnings in x86/kvm, lockdep and paravirtualization
   code
 - Add support to handle pv_opsindirect calls in the noinstr analysis
 - Classify symbols upfront and cache the result to avoid redundant
   str*cmp() invocations.
 - Add a CFI hash to reduce memory consumption which also reduces
   runtime on a allyesconfig by ~50%
 - Adjust XEN code to make objtool handling more robust and as a side
   effect to prevent text fragmentation due to placement of the
   hypercall page.
* tag 'objtool-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (41 commits)
  bpf,x86: Respect X86_FEATURE_RETPOLINE*
  bpf,x86: Simplify computing label offsets
  x86,bugs: Unconditionally allow spectre_v2=retpoline,amd
  x86/alternative: Add debug prints to apply_retpolines()
  x86/alternative: Try inline spectre_v2=retpoline,amd
  x86/alternative: Handle Jcc __x86_indirect_thunk_\reg
  x86/alternative: Implement .retpoline_sites support
  x86/retpoline: Create a retpoline thunk array
  x86/retpoline: Move the retpoline thunk declarations to nospec-branch.h
  x86/asm: Fixup odd GEN-for-each-reg.h usage
  x86/asm: Fix register order
  x86/retpoline: Remove unused replacement symbols
  objtool,x86: Replace alternatives with .retpoline_sites
  objtool: Shrink struct instruction
  objtool: Explicitly avoid self modifying code in .altinstr_replacement
  objtool: Classify symbols
  objtool: Support pv_opsindirect calls for noinstr
  x86/xen: Rework the xen_{cpu,irq,mmu}_opsarrays
  x86/xen: Mark xen_force_evtchn_callback() noinstr
  x86/xen: Make irq_disable() noinstr
  ...
Diffstat (limited to 'arch/x86/net/bpf_jit_comp.c')
| -rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 160 | 
1 files changed, 59 insertions, 101 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 9ea57389c554..39c802525fce 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -15,7 +15,6 @@  #include <asm/set_memory.h>  #include <asm/nospec-branch.h>  #include <asm/text-patching.h> -#include <asm/asm-prototypes.h>  static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)  { @@ -225,6 +224,14 @@ static void jit_fill_hole(void *area, unsigned int size)  struct jit_context {  	int cleanup_addr; /* Epilogue code offset */ + +	/* +	 * Program specific offsets of labels in the code; these rely on the +	 * JIT doing at least 2 passes, recording the position on the first +	 * pass, only to generate the correct offset on the second pass. +	 */ +	int tail_call_direct_label; +	int tail_call_indirect_label;  };  /* Maximum number of bytes emitted while JITing one eBPF insn */ @@ -380,20 +387,23 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,  	return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);  } -static int get_pop_bytes(bool *callee_regs_used) +#define EMIT_LFENCE()	EMIT3(0x0F, 0xAE, 0xE8) + +static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)  { -	int bytes = 0; +	u8 *prog = *pprog; -	if (callee_regs_used[3]) -		bytes += 2; -	if (callee_regs_used[2]) -		bytes += 2; -	if (callee_regs_used[1]) -		bytes += 2; -	if (callee_regs_used[0]) -		bytes += 1; +#ifdef CONFIG_RETPOLINE +	if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) { +		EMIT_LFENCE(); +		EMIT2(0xFF, 0xE0 + reg); +	} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { +		emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); +	} else +#endif +	EMIT2(0xFF, 0xE0 + reg); -	return bytes; +	*pprog = prog;  }  /* @@ -411,29 +421,12 @@ static int get_pop_bytes(bool *callee_regs_used)   * out:   */  static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, -					u32 stack_depth) +					u32 stack_depth, u8 *ip, +					struct jit_context *ctx)  {  	int tcc_off = -4 - round_up(stack_depth, 8); -	u8 *prog = *pprog; -	int pop_bytes = 0; -	int off1 = 42; -	int off2 = 31; -	int off3 = 9; - -	/* count the additional bytes used for popping callee regs from stack -	 * that need to be taken into account for each of the offsets that -	 * are used for bailing out of the tail call -	 */ -	pop_bytes = get_pop_bytes(callee_regs_used); -	off1 += pop_bytes; -	off2 += pop_bytes; -	off3 += pop_bytes; - -	if (stack_depth) { -		off1 += 7; -		off2 += 7; -		off3 += 7; -	} +	u8 *prog = *pprog, *start = *pprog; +	int offset;  	/*  	 * rdi - pointer to ctx @@ -448,8 +441,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,  	EMIT2(0x89, 0xD2);                        /* mov edx, edx */  	EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */  	      offsetof(struct bpf_array, map.max_entries)); -#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */ -	EMIT2(X86_JBE, OFFSET1);                  /* jbe out */ + +	offset = ctx->tail_call_indirect_label - (prog + 2 - start); +	EMIT2(X86_JBE, offset);                   /* jbe out */  	/*  	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) @@ -457,8 +451,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,  	 */  	EMIT2_off32(0x8B, 0x85, tcc_off);         /* mov eax, dword ptr [rbp - tcc_off] */  	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE) -	EMIT2(X86_JA, OFFSET2);                   /* ja out */ + +	offset = ctx->tail_call_indirect_label - (prog + 2 - start); +	EMIT2(X86_JA, offset);                    /* ja out */  	EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */  	EMIT2_off32(0x89, 0x85, tcc_off);         /* mov dword ptr [rbp - tcc_off], eax */ @@ -471,12 +466,11 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,  	 *	goto out;  	 */  	EMIT3(0x48, 0x85, 0xC9);                  /* test rcx,rcx */ -#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE) -	EMIT2(X86_JE, OFFSET3);                   /* je out */ -	*pprog = prog; -	pop_callee_regs(pprog, callee_regs_used); -	prog = *pprog; +	offset = ctx->tail_call_indirect_label - (prog + 2 - start); +	EMIT2(X86_JE, offset);                    /* je out */ + +	pop_callee_regs(&prog, callee_regs_used);  	EMIT1(0x58);                              /* pop rax */  	if (stack_depth) @@ -493,41 +487,21 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,  	 * rdi == ctx (1st arg)  	 * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET  	 */ -	RETPOLINE_RCX_BPF_JIT(); +	emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));  	/* out: */ +	ctx->tail_call_indirect_label = prog - start;  	*pprog = prog;  }  static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, -				      u8 **pprog, int addr, u8 *image, -				      bool *callee_regs_used, u32 stack_depth) +				      u8 **pprog, u8 *ip, +				      bool *callee_regs_used, u32 stack_depth, +				      struct jit_context *ctx)  {  	int tcc_off = -4 - round_up(stack_depth, 8); -	u8 *prog = *pprog; -	int pop_bytes = 0; -	int off1 = 20; -	int poke_off; - -	/* count the additional bytes used for popping callee regs to stack -	 * that need to be taken into account for jump offset that is used for -	 * bailing out from of the tail call when limit is reached -	 */ -	pop_bytes = get_pop_bytes(callee_regs_used); -	off1 += pop_bytes; - -	/* -	 * total bytes for: -	 * - nop5/ jmpq $off -	 * - pop callee regs -	 * - sub rsp, $val if depth > 0 -	 * - pop rax -	 */ -	poke_off = X86_PATCH_SIZE + pop_bytes + 1; -	if (stack_depth) { -		poke_off += 7; -		off1 += 7; -	} +	u8 *prog = *pprog, *start = *pprog; +	int offset;  	/*  	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) @@ -535,28 +509,30 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,  	 */  	EMIT2_off32(0x8B, 0x85, tcc_off);             /* mov eax, dword ptr [rbp - tcc_off] */  	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);         /* cmp eax, MAX_TAIL_CALL_CNT */ -	EMIT2(X86_JA, off1);                          /* ja out */ + +	offset = ctx->tail_call_direct_label - (prog + 2 - start); +	EMIT2(X86_JA, offset);                        /* ja out */  	EMIT3(0x83, 0xC0, 0x01);                      /* add eax, 1 */  	EMIT2_off32(0x89, 0x85, tcc_off);             /* mov dword ptr [rbp - tcc_off], eax */ -	poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE); +	poke->tailcall_bypass = ip + (prog - start);  	poke->adj_off = X86_TAIL_CALL_OFFSET; -	poke->tailcall_target = image + (addr - X86_PATCH_SIZE); +	poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;  	poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;  	emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,  		  poke->tailcall_bypass); -	*pprog = prog; -	pop_callee_regs(pprog, callee_regs_used); -	prog = *pprog; +	pop_callee_regs(&prog, callee_regs_used);  	EMIT1(0x58);                                  /* pop rax */  	if (stack_depth)  		EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));  	memcpy(prog, x86_nops[5], X86_PATCH_SIZE);  	prog += X86_PATCH_SIZE; +  	/* out: */ +	ctx->tail_call_direct_label = prog - start;  	*pprog = prog;  } @@ -1222,8 +1198,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,  			/* speculation barrier */  		case BPF_ST | BPF_NOSPEC:  			if (boot_cpu_has(X86_FEATURE_XMM2)) -				/* Emit 'lfence' */ -				EMIT3(0x0F, 0xAE, 0xE8); +				EMIT_LFENCE();  			break;  			/* ST: *(u8*)(dst_reg + off) = imm */ @@ -1412,13 +1387,16 @@ st:			if (is_imm8(insn->off))  		case BPF_JMP | BPF_TAIL_CALL:  			if (imm32)  				emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1], -							  &prog, addrs[i], image, +							  &prog, image + addrs[i - 1],  							  callee_regs_used, -							  bpf_prog->aux->stack_depth); +							  bpf_prog->aux->stack_depth, +							  ctx);  			else  				emit_bpf_tail_call_indirect(&prog,  							    callee_regs_used, -							    bpf_prog->aux->stack_depth); +							    bpf_prog->aux->stack_depth, +							    image + addrs[i - 1], +							    ctx);  			break;  			/* cond jump */ @@ -2124,24 +2102,6 @@ cleanup:  	return ret;  } -static int emit_fallback_jump(u8 **pprog) -{ -	u8 *prog = *pprog; -	int err = 0; - -#ifdef CONFIG_RETPOLINE -	/* Note that this assumes the the compiler uses external -	 * thunks for indirect calls. Both clang and GCC use the same -	 * naming convention for external thunks. -	 */ -	err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog); -#else -	EMIT2(0xFF, 0xE2);	/* jmp rdx */ -#endif -	*pprog = prog; -	return err; -} -  static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)  {  	u8 *jg_reloc, *prog = *pprog; @@ -2163,9 +2123,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)  		if (err)  			return err; -		err = emit_fallback_jump(&prog);	/* jmp thunk/indirect */ -		if (err) -			return err; +		emit_indirect_jump(&prog, 2 /* rdx */, prog);  		*pprog = prog;  		return 0;  |