diff options
Diffstat (limited to 'kernel/bpf/core.c')
| -rw-r--r-- | kernel/bpf/core.c | 95 |
1 files changed, 74 insertions, 21 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 5e31ee9f7512..0a28a8095d3e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -32,6 +32,8 @@ #include <linux/perf_event.h> #include <linux/extable.h> #include <linux/log2.h> + +#include <asm/barrier.h> #include <asm/unaligned.h> /* Registers */ @@ -1360,11 +1362,13 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) } /** - * __bpf_prog_run - run eBPF program on a given context + * ___bpf_prog_run - run eBPF program on a given context * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers * @insn: is the array of eBPF instructions * * Decode and execute eBPF instructions. + * + * Return: whatever value is in %BPF_R0 at program exit */ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) { @@ -1377,6 +1381,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) /* Non-UAPI available opcodes. */ [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS, [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, + [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC, [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B, [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H, [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W, @@ -1392,29 +1397,54 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) select_insn: goto *jumptable[insn->code]; - /* ALU */ -#define ALU(OPCODE, OP) \ - ALU64_##OPCODE##_X: \ - DST = DST OP SRC; \ - CONT; \ - ALU_##OPCODE##_X: \ - DST = (u32) DST OP (u32) SRC; \ - CONT; \ - ALU64_##OPCODE##_K: \ - DST = DST OP IMM; \ - CONT; \ - ALU_##OPCODE##_K: \ - DST = (u32) DST OP (u32) IMM; \ + /* Explicitly mask the register-based shift amounts with 63 or 31 + * to avoid undefined behavior. Normally this won't affect the + * generated code, for example, in case of native 64 bit archs such + * as x86-64 or arm64, the compiler is optimizing the AND away for + * the interpreter. In case of JITs, each of the JIT backends compiles + * the BPF shift operations to machine instructions which produce + * implementation-defined results in such a case; the resulting + * contents of the register may be arbitrary, but program behaviour + * as a whole remains defined. In other words, in case of JIT backends, + * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation. + */ + /* ALU (shifts) */ +#define SHT(OPCODE, OP) \ + ALU64_##OPCODE##_X: \ + DST = DST OP (SRC & 63); \ + CONT; \ + ALU_##OPCODE##_X: \ + DST = (u32) DST OP ((u32) SRC & 31); \ + CONT; \ + ALU64_##OPCODE##_K: \ + DST = DST OP IMM; \ + CONT; \ + ALU_##OPCODE##_K: \ + DST = (u32) DST OP (u32) IMM; \ + CONT; + /* ALU (rest) */ +#define ALU(OPCODE, OP) \ + ALU64_##OPCODE##_X: \ + DST = DST OP SRC; \ + CONT; \ + ALU_##OPCODE##_X: \ + DST = (u32) DST OP (u32) SRC; \ + CONT; \ + ALU64_##OPCODE##_K: \ + DST = DST OP IMM; \ + CONT; \ + ALU_##OPCODE##_K: \ + DST = (u32) DST OP (u32) IMM; \ CONT; - ALU(ADD, +) ALU(SUB, -) ALU(AND, &) ALU(OR, |) - ALU(LSH, <<) - ALU(RSH, >>) ALU(XOR, ^) ALU(MUL, *) + SHT(LSH, <<) + SHT(RSH, >>) +#undef SHT #undef ALU ALU_NEG: DST = (u32) -DST; @@ -1439,13 +1469,13 @@ select_insn: insn++; CONT; ALU_ARSH_X: - DST = (u64) (u32) (((s32) DST) >> SRC); + DST = (u64) (u32) (((s32) DST) >> (SRC & 31)); CONT; ALU_ARSH_K: DST = (u64) (u32) (((s32) DST) >> IMM); CONT; ALU64_ARSH_X: - (*(s64 *) &DST) >>= SRC; + (*(s64 *) &DST) >>= (SRC & 63); CONT; ALU64_ARSH_K: (*(s64 *) &DST) >>= IMM; @@ -1596,7 +1626,21 @@ out: COND_JMP(s, JSGE, >=) COND_JMP(s, JSLE, <=) #undef COND_JMP - /* STX and ST and LDX*/ + /* ST, STX and LDX*/ + ST_NOSPEC: + /* Speculation barrier for mitigating Speculative Store Bypass. + * In case of arm64, we rely on the firmware mitigation as + * controlled via the ssbd kernel parameter. Whenever the + * mitigation is enabled, it works for all of the kernel code + * with no need to provide any additional instructions here. + * In case of x86, we use 'lfence' insn for mitigation. We + * reuse preexisting logic from Spectre v1 mitigation that + * happens to produce the required code on x86 for v4 as well. + */ +#ifdef CONFIG_X86 + barrier_nospec(); +#endif + CONT; #define LDST(SIZEOP, SIZE) \ STX_MEM_##SIZEOP: \ *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ @@ -1836,6 +1880,9 @@ static void bpf_prog_select_func(struct bpf_prog *fp) * * Try to JIT eBPF program, if JIT is not available, use interpreter. * The BPF program will be executed via BPF_PROG_RUN() macro. + * + * Return: the &fp argument along with &err set to 0 for success or + * a negative errno code on failure */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { @@ -2211,8 +2258,14 @@ static void bpf_prog_free_deferred(struct work_struct *work) #endif if (aux->dst_trampoline) bpf_trampoline_put(aux->dst_trampoline); - for (i = 0; i < aux->func_cnt; i++) + for (i = 0; i < aux->func_cnt; i++) { + /* We can just unlink the subprog poke descriptor table as + * it was originally linked to the main program and is also + * released along with it. + */ + aux->func[i]->aux->poke_tab = NULL; bpf_jit_free(aux->func[i]); + } if (aux->func_cnt) { kfree(aux->func); bpf_prog_unlock_free(aux->prog); |