aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAlexei Starovoitov <[email protected]>2022-11-17 19:11:34 -0800
committerAlexei Starovoitov <[email protected]>2022-11-17 19:22:15 -0800
commitdb6bf999544c8c8dcae093e91eba4570647874b1 (patch)
treefc2189d72c73316748a1a856eb6c5d3aa39f8ab8 /include
parent98b2afc8a67f651ed01fc7d5a7e2528e63dd4e08 (diff)
parent0a2f85a1be4328d29aefa54684d10c23a3298fef (diff)
Merge branch 'Allocated objects, BPF linked lists'
Kumar Kartikeya Dwivedi says: ==================== This series introduces user defined BPF objects of a type in program BTF. This allows BPF programs to allocate their own objects, build their own object hierarchies, and use the basic building blocks provided by BPF runtime to build their own data structures flexibly. Then, we introduce the support for single ownership BPF linked lists, which can be put inside BPF maps, or allocated objects, and hold such allocated objects as elements. It works as an instrusive collection, which is done to allow making allocated objects part of multiple data structures at the same time in the future. The eventual goal of this and future patches is to allow one to do some limited form of kernel style programming in BPF C, and allow programmers to build their own complex data structures flexibly out of basic building blocks. The key difference will be that such programs are verified to be safe, preserve runtime integrity of the system, and are proven to be bug free as far as the invariants of BPF specific APIs are concerned. One immediate use case that will be using the entire infrastructure this series is introducing will be managing percpu NMI safe linked lists inside BPF programs. The other use case this will serve in the near future will be linking kernel structures like XDP frame and sk_buff directly into user data structures (rbtree, pifomap, etc.) for packet queueing. This will follow single ownership concept included in this series. The user has complete control of the internal locking, and hence also the batching of operations for each critical section. The features are: - Allocated objects. - bpf_obj_new, bpf_obj_drop to allocate and free them. - Single ownership BPF linked lists. - Support for them in BPF maps. - Support for them in allocated objects. - Global spin locks. - Spin locks inside allocated objects. Some other notable things: - Completely static verification of locking. - Kfunc argument handling has been completely reworked. - Argument rewriting support for kfuncs. - A new bpf_experimental.h header as a dumping ground for these APIs. Any functionality exposed in this series is NOT part of UAPI. It is only available through use of kfuncs, and structs that can be added to map value may also change their size or name in the future. Hence, every feature in this series must be considered experimental. Follow-ups: ----------- * Support for kptrs (local and kernel) in local storage and percpu maps + kptr tests * Fixes for helper access checks rebasing on top of this series Next steps: ----------- * NMI safe percpu single ownership linked lists (using local_t protection). * Lockless linked lists. * Allow RCU protected BPF allocated objects. This then allows RCU protected list lookups, since spinlock protection for readers does not scale. * Introduce bpf_refcount for local kptrs, shared ownership. * Introduce shared ownership linked lists. * Documentation. Changelog: ---------- v9 -> v10 v9: https://lore.kernel.org/bpf/[email protected] * Deduplicate code to find btf_record of reg (Alexei) * Add linked_list test to DENYLIST.aarch64 (Alexei) * Disable some linked list tests for now so that they compile with clang nightly (Alexei) v8 -> v9 v8: https://lore.kernel.org/bpf/[email protected] * Fix up commit log of patch 2, Simplify patch 3 * Explain the implicit requirement of bpf_list_head requiring map BTF to match in btf_record_equal in a separate patch. v7 -> v8 v7: https://lore.kernel.org/bpf/[email protected] * Fix early return in map_check_btf (Dan Carpenter) * Fix two memory leak bugs in local storage maps, outer maps * Address comments from Alexei and Dave * More local kptr -> allocated object renaming * Use krealloc with NULL instead kmalloc + krealloc * Drop WARN_ON_ONCE for field_offs parsing * Combine kfunc add + remove patches into one * Drop STRONG suffix from KF_ARG_PTR_TO_KPTR * Rename is_kfunc_arg_ret_buf_size to is_kfunc_arg_scalar_with_name * Remove redundant check for reg->type and arg type in it * Drop void * ret type check * Remove code duplication in checks for NULL pointer with offset != 0 * Fix two bpf_list_node typos * Improve log message for bpf_list_head operations * Improve comments for active_lock struct * Improve comments for Implementation details of process_spin_lock * Add Dave's acks v6 -> v7 v6: https://lore.kernel.org/bpf/[email protected] * Fix uninitialized variable warning (Dan Carpenter, Kernel Test Robot) * One more local_kptr renaming v5 -> v6 v5: https://lore.kernel.org/bpf/[email protected] * Replace (i && !off) check with next_off, include test (Andrii) * Drop local kptrs naming (Andrii, Alexei) * Drop reg->precise == EXACT patch (Andrii) * Add comment about ptr member of struct active_lock (Andrii) * Use btf__new_empty + btf__add_xxx APIs (Andrii) * Address other misc nits from Andrii v4 -> v5 v4: https://lore.kernel.org/bpf/[email protected] * Add a lot more selftests (failure, success, runtime, BTF) * Make sure series is bisect friendly * Move list draining out of spin lock * This exposed an issue where bpf_mem_free can now be called in map_free path without migrate_disable, also fixed that. * Rename MEM_ALLOC -> MEM_RINGBUF, MEM_TYPE_LOCAL -> MEM_ALLOC (Alexei) * Group lock identity into a struct active_lock { ptr, id } (Dave) * Split set_release_on_unlock logic into separate patch (Alexei) v3 -> v4 v3: https://lore.kernel.org/bpf/[email protected] * Fix compiler error for !CONFIG_BPF_SYSCALL (Kernel Test Robot) * Fix error due to BUILD_BUG_ON on 32-bit platforms (Kernel Test Robot) v2 -> v3 v2: https://lore.kernel.org/bpf/[email protected] * Add ack from Dave for patch 5 * Rename btf_type_fields -> btf_record, btf_type_fields_off -> btf_field_offs, rename functions similarly (Alexei) * Remove 'kind' component from contains declaration tag (Alexei) * Move bpf_list_head, bpf_list_node definitions to UAPI bpf.h (Alexei) * Add note in commit log about modifying btf_struct_access API (Dave) * Downgrade WARN_ON_ONCE to verbose(env, "...") and return -EFAULT (Dave) * Add type_is_local_kptr wrapper to avoid noisy checks (Dave) * Remove unused flags parameter from bpf_kptr_new (Alexei) * Rename bpf_kptr_new -> bpf_obj_new, bpf_kptr_drop -> bpf_obj_drop (Alexei) * Reword comment in ref_obj_id_set_release_on_unlock (Dave) * Fix return type of ref_obj_id_set_release_on_unlock (Dave) * Introduce is_bpf_list_api_kfunc to dedup checks (Dave) * Disallow BPF_WRITE to untrusted local kptrs * Add details about soundness of check_reg_allocation_locked logic * List untrusted local kptrs for PROBE_MEM handling v1 -> v2 v1: https://lore.kernel.org/bpf/[email protected] * Rebase on bpf-next to resolve merge conflict in DENYLIST.s390x * Fix a couple of mental lapses in bpf_list_head_free RFC v1 -> v1 RFC v1: https://lore.kernel.org/bpf/[email protected] * Mostly a complete rewrite of BTF parsing, refactor existing code (Kartikeya) * Rebase kfunc rewrite for bpf-next, add support for more changes * Cache type metadata in BTF to avoid recomputation inside verifier (Kartikeya) * Remove __kernel tag, make things similar to map values, reserve bpf_ prefix * bpf_kptr_new, bpf_kptr_drop * Rename precision state enum values (Alexei) * Drop explicit constructor/destructor support (Alexei) * Rewrite code for constructing/destructing objects and offload to runtime * Minimize duplication in bpf_map_value_off_desc handling (Alexei) * Expose global memory allocator (Alexei) * Address other nits from Alexei * Split out local kptrs in maps, more kptrs in maps support into a follow up Links: ------ * Dave's BPF RB-Tree RFC series v1 (Discussion thread) https://lore.kernel.org/bpf/[email protected] v2 (With support for static locks) https://lore.kernel.org/bpf/[email protected] * BPF Linked Lists Discussion https://lore.kernel.org/bpf/CAP01T74U30+yeBHEgmgzTJ-XYxZ0zj71kqCDJtTH9YQNfTK+Xw@mail.gmail.com * BPF Memory Allocator from Alexei https://lore.kernel.org/bpf/[email protected] * BPF Memory Allocator UAPI Discussion https://lore.kernel.org/bpf/[email protected] ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h51
-rw-r--r--include/linux/bpf_verifier.h25
-rw-r--r--include/linux/btf.h67
3 files changed, 120 insertions, 23 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e60a5c052473..8b32376ce746 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -54,6 +54,8 @@ struct cgroup;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
extern struct kobject *btf_kobj;
+extern struct bpf_mem_alloc bpf_global_ma;
+extern bool bpf_global_ma_set;
typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
@@ -177,6 +179,7 @@ enum btf_field_type {
BPF_KPTR_REF = (1 << 3),
BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
BPF_LIST_HEAD = (1 << 4),
+ BPF_LIST_NODE = (1 << 5),
};
struct btf_field_kptr {
@@ -190,6 +193,7 @@ struct btf_field_list_head {
struct btf *btf;
u32 value_btf_id;
u32 node_offset;
+ struct btf_record *value_rec;
};
struct btf_field {
@@ -277,6 +281,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
return "kptr";
case BPF_LIST_HEAD:
return "bpf_list_head";
+ case BPF_LIST_NODE:
+ return "bpf_list_node";
default:
WARN_ON_ONCE(1);
return "unknown";
@@ -295,6 +301,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
return sizeof(u64);
case BPF_LIST_HEAD:
return sizeof(struct bpf_list_head);
+ case BPF_LIST_NODE:
+ return sizeof(struct bpf_list_node);
default:
WARN_ON_ONCE(1);
return 0;
@@ -313,6 +321,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
return __alignof__(u64);
case BPF_LIST_HEAD:
return __alignof__(struct bpf_list_head);
+ case BPF_LIST_NODE:
+ return __alignof__(struct bpf_list_node);
default:
WARN_ON_ONCE(1);
return 0;
@@ -326,16 +336,19 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f
return rec->field_mask & type;
}
-static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
+static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj)
{
- if (!IS_ERR_OR_NULL(map->record)) {
- struct btf_field *fields = map->record->fields;
- u32 cnt = map->record->cnt;
- int i;
+ int i;
- for (i = 0; i < cnt; i++)
- memset(dst + fields[i].offset, 0, btf_field_type_size(fields[i].type));
- }
+ if (!foffs)
+ return;
+ for (i = 0; i < foffs->cnt; i++)
+ memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]);
+}
+
+static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
+{
+ bpf_obj_init(map->field_offs, dst);
}
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
@@ -525,6 +538,11 @@ enum bpf_type_flag {
/* Size is known at compile time. */
MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS),
+ /* MEM is of an allocated object of type in program BTF. This is used to
+ * tag PTR_TO_BTF_ID allocated using bpf_obj_new.
+ */
+ MEM_ALLOC = BIT(11 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
@@ -2096,22 +2114,11 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
const char *func_name,
struct btf_func_model *m);
-struct bpf_kfunc_arg_meta {
- u64 r0_size;
- bool r0_rdonly;
- int ref_obj_id;
- u32 flags;
-};
-
struct bpf_reg_state;
int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *regs);
int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *regs);
-int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
- const struct btf *btf, u32 func_id,
- struct bpf_reg_state *regs,
- struct bpf_kfunc_arg_meta *meta);
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *reg);
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
@@ -2792,4 +2799,10 @@ struct bpf_key {
bool has_ref;
};
#endif /* CONFIG_KEYS */
+
+static inline bool type_is_alloc(u32 type)
+{
+ return type & MEM_ALLOC;
+}
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 1a32baa78ce2..23f30c685f28 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -223,6 +223,11 @@ struct bpf_reference_state {
* exiting a callback function.
*/
int callback_ref;
+ /* Mark the reference state to release the registers sharing the same id
+ * on bpf_spin_unlock (for nodes that we will lose ownership to but are
+ * safe to access inside the critical section).
+ */
+ bool release_on_unlock;
};
/* state of the program:
@@ -323,7 +328,21 @@ struct bpf_verifier_state {
u32 branches;
u32 insn_idx;
u32 curframe;
- u32 active_spin_lock;
+ /* For every reg representing a map value or allocated object pointer,
+ * we consider the tuple of (ptr, id) for them to be unique in verifier
+ * context and conside them to not alias each other for the purposes of
+ * tracking lock state.
+ */
+ struct {
+ /* This can either be reg->map_ptr or reg->btf. If ptr is NULL,
+ * there's no active lock held, and other fields have no
+ * meaning. If non-NULL, it indicates that a lock is held and
+ * id member has the reg->id of the register which can be >= 0.
+ */
+ void *ptr;
+ /* This will be reg->id */
+ u32 id;
+ } active_lock;
bool speculative;
/* first and last insn idx of this verifier state */
@@ -419,6 +438,8 @@ struct bpf_insn_aux_data {
*/
struct bpf_loop_inline_state loop_inline_state;
};
+ u64 obj_new_size; /* remember the size of type passed to bpf_obj_new to rewrite R1 */
+ struct btf_struct_meta *kptr_struct_meta;
u64 map_key_state; /* constant (32 bit) key tracking for maps */
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
@@ -589,8 +610,6 @@ int check_ptr_off_reg(struct bpf_verifier_env *env,
int check_func_arg_reg_off(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno,
enum bpf_arg_type arg_type);
-int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
- u32 regno);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
diff --git a/include/linux/btf.h b/include/linux/btf.h
index d80345fa566b..d5b26380a60f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -6,6 +6,8 @@
#include <linux/types.h>
#include <linux/bpfptr.h>
+#include <linux/bsearch.h>
+#include <linux/btf_ids.h>
#include <uapi/linux/btf.h>
#include <uapi/linux/bpf.h>
@@ -78,6 +80,17 @@ struct btf_id_dtor_kfunc {
u32 kfunc_btf_id;
};
+struct btf_struct_meta {
+ u32 btf_id;
+ struct btf_record *record;
+ struct btf_field_offs *field_offs;
+};
+
+struct btf_struct_metas {
+ u32 cnt;
+ struct btf_struct_meta types[];
+};
+
typedef void (*btf_dtor_kfunc_t)(void *);
extern const struct file_operations btf_fops;
@@ -165,6 +178,7 @@ int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
u32 field_mask, u32 value_size);
+int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec);
struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
@@ -324,6 +338,16 @@ static inline bool btf_type_is_struct(const struct btf_type *t)
return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
}
+static inline bool __btf_type_is_struct(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
+}
+
+static inline bool btf_type_is_array(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
+}
+
static inline u16 btf_type_vlen(const struct btf_type *t)
{
return BTF_INFO_VLEN(t->info);
@@ -408,9 +432,27 @@ static inline struct btf_param *btf_params(const struct btf_type *t)
return (struct btf_param *)(t + 1);
}
-#ifdef CONFIG_BPF_SYSCALL
+static inline int btf_id_cmp_func(const void *a, const void *b)
+{
+ const int *pa = a, *pb = b;
+
+ return *pa - *pb;
+}
+
+static inline bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
+{
+ return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
+}
+
+static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
+{
+ return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
+}
+
struct bpf_prog;
+struct bpf_verifier_log;
+#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
@@ -423,6 +465,13 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
struct module *owner);
+struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id);
+const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg);
+bool btf_types_are_same(const struct btf *btf1, u32 id1,
+ const struct btf *btf2, u32 id2);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
u32 type_id)
@@ -454,6 +503,22 @@ static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dt
{
return 0;
}
+static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id)
+{
+ return NULL;
+}
+static inline const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg)
+{
+ return NULL;
+}
+static inline bool btf_types_are_same(const struct btf *btf1, u32 id1,
+ const struct btf *btf2, u32 id2)
+{
+ return false;
+}
#endif
static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t)