aboutsummaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorAlexei Starovoitov <[email protected]>2023-09-16 09:34:23 -0700
committerAlexei Starovoitov <[email protected]>2023-09-16 09:36:44 -0700
commitec6f1b4db95b7eedb3fe85f4f14e08fa0e9281c3 (patch)
tree2094312e53ff2b27e6aa2cb71fd29cc5b41175c4 /include/linux
parentc4ab64e6da42030c866f0589d1bfc13a037432dd (diff)
parentd2a93715bfb0655a63bb1687f43f48eb2e61717b (diff)
Merge branch 'exceptions-1-2'
Kumar Kartikeya Dwivedi says: ==================== Exceptions - 1/2 This series implements the _first_ part of the runtime and verifier support needed to enable BPF exceptions. Exceptions thrown from programs are processed as an immediate exit from the program, which unwinds all the active stack frames until the main stack frame, and returns to the BPF program's caller. The ability to perform this unwinding safely allows the program to test conditions that are always true at runtime but which the verifier has no visibility into. Thus, it also reduces verification effort by safely terminating redundant paths that can be taken within a program. The patches to perform runtime resource cleanup during the frame-by-frame unwinding will be posted as a follow-up to this set. It must be noted that exceptions are not an error handling mechanism for unlikely runtime conditions, but a way to safely terminate the execution of a program in presence of conditions that should never occur at runtime. They are meant to serve higher-level primitives such as program assertions. The following kfuncs and macros are introduced: Assertion macros are also introduced, please see patch 13 for their documentation. /* Description * Throw a BPF exception from the program, immediately terminating its * execution and unwinding the stack. The supplied 'cookie' parameter * will be the return value of the program when an exception is thrown, * and the default exception callback is used. Otherwise, if an exception * callback is set using the '__exception_cb(callback)' declaration tag * on the main program, the 'cookie' parameter will be the callback's only * input argument. * * Thus, in case of default exception callback, 'cookie' is subjected to * constraints on the program's return value (as with R0 on exit). * Otherwise, the return value of the marked exception callback will be * subjected to the same checks. * * Note that throwing an exception with lingering resources (locks, * references, etc.) will lead to a verification error. * * Note that callbacks *cannot* call this helper. * Returns * Never. * Throws * An exception with the specified 'cookie' value. */ extern void bpf_throw(u64 cookie) __ksym; /* This macro must be used to mark the exception callback corresponding to the * main program. For example: * * int exception_cb(u64 cookie) { * return cookie; * } * * SEC("tc") * __exception_cb(exception_cb) * int main_prog(struct __sk_buff *ctx) { * ... * return TC_ACT_OK; * } * * Here, exception callback for the main program will be 'exception_cb'. Note * that this attribute can only be used once, and multiple exception callbacks * specified for the main program will lead to verification error. */ \#define __exception_cb(name) __attribute__((btf_decl_tag("exception_callback:" #name))) As such, a program can only install an exception handler once for the lifetime of a BPF program, and this handler cannot be changed at runtime. The purpose of the handler is to simply interpret the cookie value supplied by the bpf_throw call, and execute user-defined logic corresponding to it. The primary purpose of allowing a handler is to control the return value of the program. The default handler returns the cookie value passed to bpf_throw when an exception is thrown. Fixing the handler for the lifetime of the program eliminates tricky and expensive handling in case of runtime changes of the handler callback when programs begin to nest, where it becomes more complex to save and restore the active handler at runtime. This version of offline unwinding based BPF exceptions is truly zero overhead, with the exception of generation of a default callback which contains a few instructions to return a default return value (0) when no exception callback is supplied by the user. Callbacks are disallowed from throwing BPF exceptions for now, since such exceptions need to cross the callback helper boundary (and therefore must care about unwinding kernel state), however it is possible to lift this restriction in the future follow-up. Exceptions terminate propogating at program boundaries, hence both BPF_PROG_TYPE_EXT and tail call targets return to their caller context the return value of the exception callback, in the event that they throw an exception. Thus, exceptions do not cross extension or tail call boundary. However, this is mostly an implementation choice, and can be changed to suit more user-friendly semantics. Changelog: ---------- v2 -> v3 v2: https://lore.kernel.org/bpf/[email protected] * Add Dave's Acked-by. * Address all comments from Alexei. * Use bpf_is_subprog to check for main prog in bpf_stack_walker. * Drop accidental leftover hunk in libbpf patch. * Split libbpf patch's refactoring to aid review * Disable fentry/fexit in addition to freplace for exception cb. * Add selftests for fentry/fexit/freplace on exception cb and main prog. * Use btf_find_by_name_kind in bpf_find_exception_callback_insn_off (Martin) * Split KASAN patch into two to aid backporting (Andrey) * Move exception callback append step to bpf_object__reloacte (Andrii) * Ensure that the exception callback name is unique (Andrii) * Keep ASM implementation of assertion macros instead of C, as it does not achieve intended results for bpf_assert_range and other cases. v1 -> v2 v1: https://lore.kernel.org/bpf/[email protected] * Address all comments from Alexei. * Fix a few bugs and corner cases in the implementations found during testing. Also add new selftests for these cases. * Reinstate patch to consider ksym.end part of the program (but reworked to cover other corner cases). * Implement new style of tagging exception callbacks, add libbpf support for the new declaration tag. * Limit support to 64-bit integer types for assertion macros. The compiler ends up performing shifts or bitwise and operations when finally making use of the value, which defeats the purpose of the macro. On noalu32 mode, the shifts may also happen before use, hurting reliability. * Comprehensively test assertion macros and their side effects on the verifier state, register bounds, etc. * Fix a KASAN false positive warning. RFC v1 -> v1 RFC v1: https://lore.kernel.org/bpf/[email protected] * Completely rework the unwinding infrastructure to use offline unwinding support. * Remove the runtime exception state and program rewriting code. * Make bpf_set_exception_callback idempotent to avoid vexing synchronization and state clobbering issues in presence of program nesting. * Disable bpf_throw within callback functions, for now. * Allow bpf_throw in tail call programs and extension programs, removing limitations of rewrite based unwinding. * Expand selftests. ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bpf.h13
-rw-r--r--include/linux/bpf_verifier.h8
-rw-r--r--include/linux/filter.h8
-rw-r--r--include/linux/kasan.h2
4 files changed, 29 insertions, 2 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b9e573159432..30063a760b5a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1389,6 +1389,7 @@ struct bpf_prog_aux {
u32 stack_depth;
u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */
+ u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
u32 attach_btf_id; /* in-kernel BTF type id to attach to */
u32 ctx_arg_info_size;
@@ -1409,6 +1410,8 @@ struct bpf_prog_aux {
bool sleepable;
bool tail_call_reachable;
bool xdp_has_frags;
+ bool exception_cb;
+ bool exception_boundary;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
/* function name for valid attach_btf_id */
@@ -1431,6 +1434,7 @@ struct bpf_prog_aux {
int cgroup_atype; /* enum cgroup_bpf_attach_type */
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
char name[BPF_OBJ_NAME_LEN];
+ unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp);
#ifdef CONFIG_SECURITY
void *security;
#endif
@@ -2418,9 +2422,11 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *regs);
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *reg);
+ struct bpf_reg_state *reg, bool is_ex_cb);
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
struct btf *btf, const struct btf_type *t);
+const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
+ int comp_idx, const char *tag_key);
struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);
@@ -3194,4 +3200,9 @@ static inline gfp_t bpf_memcg_flags(gfp_t flags)
return flags;
}
+static inline bool bpf_is_subprog(const struct bpf_prog *prog)
+{
+ return prog->aux->func_idx != 0;
+}
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index a3236651ec64..94ec766432f5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -300,6 +300,7 @@ struct bpf_func_state {
bool in_callback_fn;
struct tnum callback_ret_range;
bool in_async_callback_fn;
+ bool in_exception_callback_fn;
/* The following fields should be last. See copy_func_state() */
int acquired_refs;
@@ -541,7 +542,9 @@ struct bpf_subprog_info {
bool has_tail_call;
bool tail_call_reachable;
bool has_ld_abs;
+ bool is_cb;
bool is_async_cb;
+ bool is_exception_cb;
};
struct bpf_verifier_env;
@@ -588,6 +591,8 @@ struct bpf_verifier_env {
u32 used_map_cnt; /* number of used maps */
u32 used_btf_cnt; /* number of used BTF objects */
u32 id_gen; /* used to generate unique reg IDs */
+ u32 hidden_subprog_cnt; /* number of hidden subprogs */
+ int exception_callback_subprog;
bool explore_alu_limits;
bool allow_ptr_leaks;
bool allow_uninit_stack;
@@ -595,10 +600,11 @@ struct bpf_verifier_env {
bool bypass_spec_v1;
bool bypass_spec_v4;
bool seen_direct_write;
+ bool seen_exception;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
- struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
+ struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */
union {
struct bpf_idmap idmap_scratch;
struct bpf_idset idset_scratch;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0138832ad571..27406aee2d40 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -954,6 +954,8 @@ bool bpf_jit_needs_zext(void);
bool bpf_jit_supports_subprog_tailcalls(void);
bool bpf_jit_supports_kfunc_call(void);
bool bpf_jit_supports_far_kfunc_call(void);
+bool bpf_jit_supports_exceptions(void);
+void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
bool bpf_helper_changes_pkt_data(void *func);
static inline bool bpf_dump_raw_ok(const struct cred *cred)
@@ -1169,6 +1171,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
bool is_bpf_text_address(unsigned long addr);
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym);
+struct bpf_prog *bpf_prog_ksym_find(unsigned long addr);
static inline const char *
bpf_address_lookup(unsigned long addr, unsigned long *size,
@@ -1236,6 +1239,11 @@ static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value,
return -ERANGE;
}
+static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
+{
+ return NULL;
+}
+
static inline const char *
bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 819b6bc8ac08..7a463f814db2 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -283,8 +283,10 @@ static inline bool kasan_check_byte(const void *address)
#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK)
void kasan_unpoison_task_stack(struct task_struct *task);
+asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
#else
static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+static inline void kasan_unpoison_task_stack_below(const void *watermark) {}
#endif
#ifdef CONFIG_KASAN_GENERIC