aboutsummaryrefslogtreecommitdiff
path: root/kernel/bpf/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/core.c')
-rw-r--r--kernel/bpf/core.c310
1 files changed, 288 insertions, 22 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 503d4211988a..ad5f55922a13 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -28,6 +28,9 @@
#include <linux/moduleloader.h>
#include <linux/bpf.h>
#include <linux/frame.h>
+#include <linux/rbtree_latch.h>
+#include <linux/kallsyms.h>
+#include <linux/rcupdate.h>
#include <asm/unaligned.h>
@@ -73,8 +76,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
- gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog_aux *aux;
struct bpf_prog *fp;
@@ -95,6 +97,8 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
fp->aux = aux;
fp->aux->prog = fp;
+ INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode);
+
return fp;
}
EXPORT_SYMBOL_GPL(bpf_prog_alloc);
@@ -102,8 +106,7 @@ EXPORT_SYMBOL_GPL(bpf_prog_alloc);
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
- gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *fp;
u32 pages, delta;
int ret;
@@ -290,6 +293,202 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
}
#ifdef CONFIG_BPF_JIT
+static __always_inline void
+bpf_get_prog_addr_region(const struct bpf_prog *prog,
+ unsigned long *symbol_start,
+ unsigned long *symbol_end)
+{
+ const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
+ unsigned long addr = (unsigned long)hdr;
+
+ WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
+
+ *symbol_start = addr;
+ *symbol_end = addr + hdr->pages * PAGE_SIZE;
+}
+
+static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+{
+ BUILD_BUG_ON(sizeof("bpf_prog_") +
+ sizeof(prog->tag) * 2 + 1 > KSYM_NAME_LEN);
+
+ sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
+ sym = bin2hex(sym, prog->tag, sizeof(prog->tag));
+ *sym = 0;
+}
+
+static __always_inline unsigned long
+bpf_get_prog_addr_start(struct latch_tree_node *n)
+{
+ unsigned long symbol_start, symbol_end;
+ const struct bpf_prog_aux *aux;
+
+ aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
+ bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
+
+ return symbol_start;
+}
+
+static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
+ struct latch_tree_node *b)
+{
+ return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b);
+}
+
+static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
+{
+ unsigned long val = (unsigned long)key;
+ unsigned long symbol_start, symbol_end;
+ const struct bpf_prog_aux *aux;
+
+ aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
+ bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
+
+ if (val < symbol_start)
+ return -1;
+ if (val >= symbol_end)
+ return 1;
+
+ return 0;
+}
+
+static const struct latch_tree_ops bpf_tree_ops = {
+ .less = bpf_tree_less,
+ .comp = bpf_tree_comp,
+};
+
+static DEFINE_SPINLOCK(bpf_lock);
+static LIST_HEAD(bpf_kallsyms);
+static struct latch_tree_root bpf_tree __cacheline_aligned;
+
+int bpf_jit_kallsyms __read_mostly;
+
+static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
+{
+ WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
+ list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms);
+ latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
+}
+
+static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
+{
+ if (list_empty(&aux->ksym_lnode))
+ return;
+
+ latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
+ list_del_rcu(&aux->ksym_lnode);
+}
+
+static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
+{
+ return fp->jited && !bpf_prog_was_classic(fp);
+}
+
+static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
+{
+ return list_empty(&fp->aux->ksym_lnode) ||
+ fp->aux->ksym_lnode.prev == LIST_POISON2;
+}
+
+void bpf_prog_kallsyms_add(struct bpf_prog *fp)
+{
+ if (!bpf_prog_kallsyms_candidate(fp) ||
+ !capable(CAP_SYS_ADMIN))
+ return;
+
+ spin_lock_bh(&bpf_lock);
+ bpf_prog_ksym_node_add(fp->aux);
+ spin_unlock_bh(&bpf_lock);
+}
+
+void bpf_prog_kallsyms_del(struct bpf_prog *fp)
+{
+ if (!bpf_prog_kallsyms_candidate(fp))
+ return;
+
+ spin_lock_bh(&bpf_lock);
+ bpf_prog_ksym_node_del(fp->aux);
+ spin_unlock_bh(&bpf_lock);
+}
+
+static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
+{
+ struct latch_tree_node *n;
+
+ if (!bpf_jit_kallsyms_enabled())
+ return NULL;
+
+ n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
+ return n ?
+ container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
+ NULL;
+}
+
+const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
+ unsigned long *off, char *sym)
+{
+ unsigned long symbol_start, symbol_end;
+ struct bpf_prog *prog;
+ char *ret = NULL;
+
+ rcu_read_lock();
+ prog = bpf_prog_kallsyms_find(addr);
+ if (prog) {
+ bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end);
+ bpf_get_prog_name(prog, sym);
+
+ ret = sym;
+ if (size)
+ *size = symbol_end - symbol_start;
+ if (off)
+ *off = addr - symbol_start;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+bool is_bpf_text_address(unsigned long addr)
+{
+ bool ret;
+
+ rcu_read_lock();
+ ret = bpf_prog_kallsyms_find(addr) != NULL;
+ rcu_read_unlock();
+
+ return ret;
+}
+
+int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
+ char *sym)
+{
+ unsigned long symbol_start, symbol_end;
+ struct bpf_prog_aux *aux;
+ unsigned int it = 0;
+ int ret = -ERANGE;
+
+ if (!bpf_jit_kallsyms_enabled())
+ return ret;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) {
+ if (it++ != symnum)
+ continue;
+
+ bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
+ bpf_get_prog_name(aux->prog, sym);
+
+ *value = symbol_start;
+ *type = BPF_SYM_ELF_TYPE;
+
+ ret = 0;
+ break;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
struct bpf_binary_header *
bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
unsigned int alignment,
@@ -326,6 +525,24 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
module_memfree(hdr);
}
+/* This symbol is only overridden by archs that have different
+ * requirements than the usual eBPF JITs, f.e. when they only
+ * implement cBPF JIT, do not set images read-only, etc.
+ */
+void __weak bpf_jit_free(struct bpf_prog *fp)
+{
+ if (fp->jited) {
+ struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
+
+ bpf_jit_binary_unlock_ro(hdr);
+ bpf_jit_binary_free(hdr);
+
+ WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
+ }
+
+ bpf_prog_unlock_free(fp);
+}
+
int bpf_jit_harden __read_mostly;
static int bpf_jit_blind_insn(const struct bpf_insn *from,
@@ -436,8 +653,7 @@ out:
static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
- gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *fp;
fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
@@ -547,10 +763,10 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
*
* Decode and execute eBPF instructions.
*/
-static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
+static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
+ u64 *stack)
{
- u64 stack[MAX_BPF_STACK / sizeof(u64)];
- u64 regs[MAX_BPF_REG], tmp;
+ u64 tmp;
static const void *jumptable[256] = {
[0 ... 255] = &&default_label,
/* Now overwrite non-defaults ... */
@@ -608,7 +824,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
/* Call instruction */
[BPF_JMP | BPF_CALL] = &&JMP_CALL,
- [BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL,
+ [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
/* Jumps */
[BPF_JMP | BPF_JA] = &&JMP_JA,
[BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
@@ -658,9 +874,6 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
#define CONT ({ insn++; goto select_insn; })
#define CONT_JMP ({ insn++; goto select_insn; })
- FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
- ARG1 = (u64) (unsigned long) ctx;
-
select_insn:
goto *jumptable[insn->code];
@@ -939,12 +1152,12 @@ out:
LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
off = IMM;
load_word:
- /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
- * only appearing in the programs where ctx ==
- * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
- * == BPF_R6, bpf_convert_filter() saves it in BPF_R6,
- * internal BPF verifier will check that BPF_R6 ==
- * ctx.
+ /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
+ * appearing in the programs where ctx == skb
+ * (see may_access_skb() in the verifier). All programs
+ * keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6,
+ * bpf_convert_filter() saves it in BPF_R6, internal BPF
+ * verifier will check that BPF_R6 == ctx.
*
* BPF_ABS and BPF_IND are wrappers of function calls,
* so they scratch BPF_R1-BPF_R5 registers, preserve
@@ -1003,7 +1216,39 @@ load_byte:
WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
return 0;
}
-STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */
+STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
+
+#define PROG_NAME(stack_size) __bpf_prog_run##stack_size
+#define DEFINE_BPF_PROG_RUN(stack_size) \
+static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
+{ \
+ u64 stack[stack_size / sizeof(u64)]; \
+ u64 regs[MAX_BPF_REG]; \
+\
+ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
+ ARG1 = (u64) (unsigned long) ctx; \
+ return ___bpf_prog_run(regs, insn, stack); \
+}
+
+#define EVAL1(FN, X) FN(X)
+#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
+#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
+#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
+#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
+#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
+
+EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192);
+EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384);
+EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);
+
+#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),
+
+static unsigned int (*interpreters[])(const void *ctx,
+ const struct bpf_insn *insn) = {
+EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
+EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
+EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
+};
bool bpf_prog_array_compatible(struct bpf_array *array,
const struct bpf_prog *fp)
@@ -1052,7 +1297,9 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
*/
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
{
- fp->bpf_func = (void *) __bpf_prog_run;
+ u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+
+ fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
/* eBPF JITs can rewrite the program in case constant
* blinding is active. However, in case of error during
@@ -1154,12 +1401,22 @@ const struct bpf_func_proto bpf_tail_call_proto = {
.arg3_type = ARG_ANYTHING,
};
-/* For classic BPF JITs that don't implement bpf_int_jit_compile(). */
+/* Stub for JITs that only support cBPF. eBPF programs are interpreted.
+ * It is encouraged to implement bpf_int_jit_compile() instead, so that
+ * eBPF and implicitly also cBPF can get JITed!
+ */
struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
{
return prog;
}
+/* Stub for JITs that support eBPF. All cBPF code gets transformed into
+ * eBPF by the kernel and is later compiled by bpf_int_jit_compile().
+ */
+void __weak bpf_jit_compile(struct bpf_prog *prog)
+{
+}
+
bool __weak bpf_helper_changes_pkt_data(void *func)
{
return false;
@@ -1173,3 +1430,12 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
{
return -EFAULT;
}
+
+/* All definitions of tracepoints related to BPF. */
+#define CREATE_TRACE_POINTS
+#include <linux/bpf_trace.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu);