diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/Kbuild | 1 | ||||
-rw-r--r-- | include/asm-generic/cfi.h | 5 | ||||
-rw-r--r-- | include/linux/bpf.h | 143 | ||||
-rw-r--r-- | include/linux/bpf_verifier.h | 66 | ||||
-rw-r--r-- | include/linux/cfi.h | 12 | ||||
-rw-r--r-- | include/linux/filter.h | 4 | ||||
-rw-r--r-- | include/linux/if_vlan.h | 4 | ||||
-rw-r--r-- | include/linux/lsm_hook_defs.h | 15 | ||||
-rw-r--r-- | include/linux/mlx5/device.h | 2 | ||||
-rw-r--r-- | include/linux/security.h | 43 | ||||
-rw-r--r-- | include/linux/skbuff.h | 13 | ||||
-rw-r--r-- | include/net/xdp.h | 20 | ||||
-rw-r--r-- | include/net/xdp_sock_drv.h | 17 | ||||
-rw-r--r-- | include/net/xfrm.h | 9 | ||||
-rw-r--r-- | include/net/xsk_buff_pool.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 46 | ||||
-rw-r--r-- | include/uapi/linux/netdev.h | 3 |
17 files changed, 349 insertions, 56 deletions
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index def242528b1d..d436bee4d129 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -11,6 +11,7 @@ mandatory-y += bitops.h mandatory-y += bug.h mandatory-y += bugs.h mandatory-y += cacheflush.h +mandatory-y += cfi.h mandatory-y += checksum.h mandatory-y += compat.h mandatory-y += current.h diff --git a/include/asm-generic/cfi.h b/include/asm-generic/cfi.h new file mode 100644 index 000000000000..41fac3537bf9 --- /dev/null +++ b/include/asm-generic/cfi.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_GENERIC_CFI_H +#define __ASM_GENERIC_CFI_H + +#endif /* __ASM_GENERIC_CFI_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d5d8e2083610..2f54cc0436c4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -29,6 +29,7 @@ #include <linux/rcupdate_trace.h> #include <linux/static_call.h> #include <linux/memcontrol.h> +#include <linux/cfi.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -51,6 +52,10 @@ struct module; struct bpf_func_state; struct ftrace_ops; struct cgroup; +struct bpf_token; +struct user_namespace; +struct super_block; +struct inode; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -106,7 +111,11 @@ struct bpf_map_ops { /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, int fd); - void (*map_fd_put_ptr)(void *ptr); + /* If need_defer is true, the implementation should guarantee that + * the to-be-put element is still alive before the bpf program, which + * may manipulate it, exists. + */ + void (*map_fd_put_ptr)(struct bpf_map *map, void *ptr, bool need_defer); int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, @@ -272,7 +281,11 @@ struct bpf_map { */ atomic64_t refcnt ____cacheline_aligned; atomic64_t usercnt; - struct work_struct work; + /* rcu is used before freeing and work is only used during freeing */ + union { + struct work_struct work; + struct rcu_head rcu; + }; struct mutex freeze_mutex; atomic64_t writecnt; /* 'Ownership' of program-containing map is claimed by the first program @@ -288,6 +301,9 @@ struct bpf_map { } owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ + bool free_after_mult_rcu_gp; + bool free_after_rcu_gp; + atomic64_t sleepable_refcnt; s64 __percpu *elem_count; }; @@ -1044,6 +1060,17 @@ struct btf_func_model { */ #define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7) +/* + * Indicate the trampoline should be suitable to receive indirect calls; + * without this indirectly calling the generated code can result in #UD/#CP, + * depending on the CFI options. + * + * Used by bpf_struct_ops. + * + * Incompatible with FENTRY usage, overloads @func_addr argument. + */ +#define BPF_TRAMP_F_INDIRECT BIT(8) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ @@ -1083,10 +1110,17 @@ struct bpf_tramp_run_ctx; * fexit = a set of program to run after original function */ struct bpf_tramp_image; -int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, - void *orig_call); + void *func_addr); +void *arch_alloc_bpf_trampoline(unsigned int size); +void arch_free_bpf_trampoline(void *image, unsigned int size); +void arch_protect_bpf_trampoline(void *image, unsigned int size); +void arch_unprotect_bpf_trampoline(void *image, unsigned int size); +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr); + u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, @@ -1119,6 +1153,7 @@ enum bpf_tramp_prog_type { struct bpf_tramp_image { void *image; + int size; struct bpf_ksym ksym; struct percpu_ref pcref; void *ip_after_call; @@ -1188,7 +1223,11 @@ struct bpf_dispatcher { #endif }; -static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( +#ifndef __bpfcall +#define __bpfcall __nocfi +#endif + +static __always_inline __bpfcall unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, bpf_func_t bpf_func) @@ -1280,7 +1319,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func #define DEFINE_BPF_DISPATCHER(name) \ __BPF_DISPATCHER_SC(name); \ - noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ + noinline __bpfcall unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func) \ @@ -1303,7 +1342,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); /* Called only from JIT-enabled code, so there's no need for stubs. */ -void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); +void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); @@ -1430,6 +1469,9 @@ struct bpf_prog_aux { struct bpf_kfunc_desc_tab *kfunc_tab; struct bpf_kfunc_btf_tab *kfunc_btf_tab; u32 size_poke_tab; +#ifdef CONFIG_FINEIBT + struct bpf_ksym ksym_prefix; +#endif struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; @@ -1442,10 +1484,11 @@ struct bpf_prog_aux { int cgroup_atype; /* enum cgroup_bpf_attach_type */ struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; - unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp); + u64 (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp, u64, u64); #ifdef CONFIG_SECURITY void *security; #endif + struct bpf_token *token; struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; @@ -1570,6 +1613,31 @@ struct bpf_link_primer { u32 id; }; +struct bpf_mount_opts { + kuid_t uid; + kgid_t gid; + umode_t mode; + + /* BPF token-related delegation options */ + u64 delegate_cmds; + u64 delegate_maps; + u64 delegate_progs; + u64 delegate_attachs; +}; + +struct bpf_token { + struct work_struct work; + atomic64_t refcnt; + struct user_namespace *userns; + u64 allowed_cmds; + u64 allowed_maps; + u64 allowed_progs; + u64 allowed_attachs; +#ifdef CONFIG_SECURITY + void *security; +#endif +}; + struct bpf_struct_ops_value; struct btf_member; @@ -1640,6 +1708,7 @@ struct bpf_struct_ops { struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; u32 type_id; u32 value_id; + void *cfi_stubs; }; #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) @@ -1653,6 +1722,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, struct bpf_tramp_link *link, const struct btf_func_model *model, + void *stub_func, void *image, void *image_end); static inline bool bpf_try_module_get(const void *data, struct module *owner) { @@ -2027,6 +2097,7 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } +extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; @@ -2161,24 +2232,26 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map) extern int sysctl_unprivileged_bpf_disabled; -static inline bool bpf_allow_ptr_leaks(void) +bool bpf_token_capable(const struct bpf_token *token, int cap); + +static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_allow_uninit_stack(void) +static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v1(void) +static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) { - return cpu_mitigations_off() || perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v4(void) +static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) { - return cpu_mitigations_off() || perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } int bpf_map_new_fd(struct bpf_map *map, int flags); @@ -2195,8 +2268,21 @@ int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); +void bpf_token_inc(struct bpf_token *token); +void bpf_token_put(struct bpf_token *token); +int bpf_token_create(union bpf_attr *attr); +struct bpf_token *bpf_token_get_from_fd(u32 ufd); + +bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); +bool bpf_token_allow_prog_type(const struct bpf_token *token, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type); + int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); +struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, + umode_t mode); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ @@ -2431,7 +2517,7 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *reg, bool is_ex_cb); + struct bpf_reg_state *reg, u32 *nargs); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf, const struct btf_type *t); const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, @@ -2440,7 +2526,8 @@ const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); -const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog); void bpf_task_storage_free(struct task_struct *task); void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); @@ -2559,6 +2646,24 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } +static inline bool bpf_token_capable(const struct bpf_token *token, int cap) +{ + return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); +} + +static inline void bpf_token_inc(struct bpf_token *token) +{ +} + +static inline void bpf_token_put(struct bpf_token *token) +{ +} + +static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline void __dev_flush(void) { } @@ -2682,7 +2787,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log, } static inline const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id) +bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { return NULL; } diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d99a636d36a7..c2819a6579a5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -275,6 +275,11 @@ struct bpf_reference_state { int callback_ref; }; +struct bpf_retval_range { + s32 minval; + s32 maxval; +}; + /* state of the program: * type of all registers and stack info */ @@ -297,8 +302,8 @@ struct bpf_func_state { * void foo(void) { bpf_timer_set_callback(,foo); } */ u32 async_entry_cnt; + struct bpf_retval_range callback_ret_range; bool in_callback_fn; - struct tnum callback_ret_range; bool in_async_callback_fn; bool in_exception_callback_fn; /* For callback calling functions that limit number of possible @@ -316,16 +321,48 @@ struct bpf_func_state { /* The following fields should be last. See copy_func_state() */ int acquired_refs; struct bpf_reference_state *refs; - int allocated_stack; + /* The state of the stack. Each element of the array describes BPF_REG_SIZE + * (i.e. 8) bytes worth of stack memory. + * stack[0] represents bytes [*(r10-8)..*(r10-1)] + * stack[1] represents bytes [*(r10-16)..*(r10-9)] + * ... + * stack[allocated_stack/8 - 1] represents [*(r10-allocated_stack)..*(r10-allocated_stack+7)] + */ struct bpf_stack_state *stack; + /* Size of the current stack, in bytes. The stack state is tracked below, in + * `stack`. allocated_stack is always a multiple of BPF_REG_SIZE. + */ + int allocated_stack; +}; + +#define MAX_CALL_FRAMES 8 + +/* instruction history flags, used in bpf_jmp_history_entry.flags field */ +enum { + /* instruction references stack slot through PTR_TO_STACK register; + * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8) + * and accessed stack slot's index in next 6 bits (MAX_BPF_STACK is 512, + * 8 bytes per slot, so slot index (spi) is [0, 63]) + */ + INSN_F_FRAMENO_MASK = 0x7, /* 3 bits */ + + INSN_F_SPI_MASK = 0x3f, /* 6 bits */ + INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */ + + INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */ }; -struct bpf_idx_pair { - u32 prev_idx; +static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); +static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); + +struct bpf_jmp_history_entry { u32 idx; + /* insn idx can't be bigger than 1 million */ + u32 prev_idx : 22; + /* special flags, e.g., whether insn is doing register stack spill/load */ + u32 flags : 10; }; -#define MAX_CALL_FRAMES 8 /* Maximum number of register states that can exist at once */ #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES) struct bpf_verifier_state { @@ -408,7 +445,7 @@ struct bpf_verifier_state { * For most states jmp_history_cnt is [0-3]. * For loops can go up to ~40. */ - struct bpf_idx_pair *jmp_history; + struct bpf_jmp_history_entry *jmp_history; u32 jmp_history_cnt; u32 dfs_depth; u32 callback_unroll_depth; @@ -574,12 +611,12 @@ struct bpf_subprog_info { u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ - bool has_tail_call; - bool tail_call_reachable; - bool has_ld_abs; - bool is_cb; - bool is_async_cb; - bool is_exception_cb; + bool has_tail_call: 1; + bool tail_call_reachable: 1; + bool has_ld_abs: 1; + bool is_cb: 1; + bool is_async_cb: 1; + bool is_exception_cb: 1; }; struct bpf_verifier_env; @@ -631,6 +668,10 @@ struct bpf_verifier_env { int exception_callback_subprog; bool explore_alu_limits; bool allow_ptr_leaks; + /* Allow access to uninitialized stack memory. Writes with fixed offset are + * always allowed, so this refers to reads (with fixed or variable offset), + * to writes with variable offset and to indirect (helper) accesses. + */ bool allow_uninit_stack; bool bpf_capable; bool bypass_spec_v1; @@ -651,6 +692,7 @@ struct bpf_verifier_env { int cur_stack; } cfg; struct backtrack_state bt; + struct bpf_jmp_history_entry *cur_hist_ent; u32 pass_cnt; /* number of times do_check() was called */ u32 subprog_cnt; /* number of instructions analyzed by the verifier */ diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 3552ec82b725..f0df518e11dd 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -9,6 +9,14 @@ #include <linux/bug.h> #include <linux/module.h> +#include <asm/cfi.h> + +#ifndef cfi_get_offset +static inline int cfi_get_offset(void) +{ + return 0; +} +#endif #ifdef CONFIG_CFI_CLANG enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, @@ -38,4 +46,8 @@ static inline void module_cfi_finalize(const Elf_Ehdr *hdr, #endif /* CONFIG_ARCH_USES_CFI_TRAPS */ #endif /* CONFIG_MODULES */ +#ifndef CFI_NOSEAL +#define CFI_NOSEAL(x) +#endif + #endif /* _LINUX_CFI_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index a4953fafc8cb..12d907f17d36 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1067,7 +1067,7 @@ struct bpf_binary_header * bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns); -void bpf_prog_pack_free(struct bpf_binary_header *hdr); +void bpf_prog_pack_free(void *ptr, u32 size); static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) { @@ -1139,7 +1139,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) return false; if (!bpf_jit_harden) return false; - if (bpf_jit_harden == 1 && bpf_capable()) + if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF)) return false; return true; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 3028af87716e..c1645c86eed9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -540,7 +540,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) struct vlan_ethhdr *veth = skb_vlan_eth_hdr(skb); if (!eth_type_vlan(veth->h_vlan_proto)) - return -EINVAL; + return -ENODATA; *vlan_tci = ntohs(veth->h_vlan_TCI); return 0; @@ -561,7 +561,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, return 0; } else { *vlan_tci = 0; - return -EINVAL; + return -ENODATA; } } diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ff217a5ce552..3fdd00b452ac 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -398,10 +398,17 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size) LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) -LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map) -LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map) -LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux) -LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) +LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token) +LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map) +LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token) +LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) +LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, + struct path *path) +LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token) +LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd) +LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) #endif /* CONFIG_BPF_SYSCALL */ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 820bca965fb6..01275c6e8468 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -918,7 +918,7 @@ static inline u8 get_cqe_tls_offload(struct mlx5_cqe64 *cqe) return (cqe->tls_outer_l3_tunneled >> 3) & 0x3; } -static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe) +static inline bool cqe_has_vlan(const struct mlx5_cqe64 *cqe) { return cqe->l4_l3_hdr_type & 0x1; } diff --git a/include/linux/security.h b/include/linux/security.h index 1d1df326c881..00809d2d5c38 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -32,6 +32,7 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/sockptr.h> +#include <linux/bpf.h> struct linux_binprm; struct cred; @@ -2020,15 +2021,22 @@ static inline void securityfs_remove(struct dentry *dentry) union bpf_attr; struct bpf_map; struct bpf_prog; -struct bpf_prog_aux; +struct bpf_token; #ifdef CONFIG_SECURITY extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); extern int security_bpf_prog(struct bpf_prog *prog); -extern int security_bpf_map_alloc(struct bpf_map *map); +extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token); extern void security_bpf_map_free(struct bpf_map *map); -extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux); -extern void security_bpf_prog_free(struct bpf_prog_aux *aux); +extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token); +extern void security_bpf_prog_free(struct bpf_prog *prog); +extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, + struct path *path); +extern void security_bpf_token_free(struct bpf_token *token); +extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +extern int security_bpf_token_capable(const struct bpf_token *token, int cap); #else static inline int security_bpf(int cmd, union bpf_attr *attr, unsigned int size) @@ -2046,7 +2054,8 @@ static inline int security_bpf_prog(struct bpf_prog *prog) return 0; } -static inline int security_bpf_map_alloc(struct bpf_map *map) +static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token) { return 0; } @@ -2054,13 +2063,33 @@ static inline int security_bpf_map_alloc(struct bpf_map *map) static inline void security_bpf_map_free(struct bpf_map *map) { } -static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux) +static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token) { return 0; } -static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) +static inline void security_bpf_prog_free(struct bpf_prog *prog) { } + +static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, + struct path *path) +{ + return 0; +} + +static inline void security_bpf_token_free(struct bpf_token *token) +{ } + +static inline int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd) +{ + return 0; +} + +static inline int security_bpf_token_capable(const struct bpf_token *token, int cap) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_BPF_SYSCALL */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7ce38874dbd1..ea5c8ab3ed00 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4247,10 +4247,13 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, { const void *a = skb_metadata_end(skb_a); const void *b = skb_metadata_end(skb_b); - /* Using more efficient variant than plain call to memcmp(). */ -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 u64 diffs = 0; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + BITS_PER_LONG != 64) + goto slow; + + /* Using more efficient variant than plain call to memcmp(). */ switch (meta_len) { #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) @@ -4270,11 +4273,11 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, fallthrough; case 4: diffs |= __it_diff(a, b, 32); break; + default: +slow: + return memcmp(a - meta_len, b - meta_len, meta_len); } return diffs; -#else - return memcmp(a - meta_len, b - meta_len, meta_len); -#endif } static inline bool skb_metadata_differs(const struct sk_buff *skb_a, diff --git a/include/net/xdp.h b/include/net/xdp.h index 349c36fb5fd8..e6770dd40c91 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -16,7 +16,7 @@ * * The XDP RX-queue info (xdp_rxq_info) is associated with the driver * level RX-ring queues. It is information that is specific to how - * the driver have configured a given RX-ring queue. + * the driver has configured a given RX-ring queue. * * Each xdp_buff frame received in the driver carries a (pointer) * reference to this xdp_rxq_info structure. This provides the XDP @@ -32,7 +32,7 @@ * The struct is not directly tied to the XDP prog. A new XDP prog * can be attached as long as it doesn't change the underlying * RX-ring. If the RX-ring does change significantly, the NIC driver - * naturally need to stop the RX-ring before purging and reallocating + * naturally needs to stop the RX-ring before purging and reallocating * memory. In that process the driver MUST call unregister (which * also applies for driver shutdown and unload). The register API is * also mandatory during RX-ring setup. @@ -369,7 +369,12 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp) static inline bool xdp_metalen_invalid(unsigned long metalen) { - return (metalen & (sizeof(__u32) - 1)) || (metalen > 32); + unsigned long meta_max; + + meta_max = type_max(typeof_member(struct skb_shared_info, meta_len)); + BUILD_BUG_ON(!__builtin_constant_p(meta_max)); + + return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max; } struct xdp_attachment_info { @@ -399,6 +404,10 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, NETDEV_XDP_RX_METADATA_HASH, \ bpf_xdp_metadata_rx_hash, \ xmo_rx_hash) \ + XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \ + NETDEV_XDP_RX_METADATA_VLAN_TAG, \ + bpf_xdp_metadata_rx_vlan_tag, \ + xmo_rx_vlan_tag) \ enum xdp_rx_metadata { #define XDP_METADATA_KFUNC(name, _, __, ___) name, @@ -427,6 +436,7 @@ enum xdp_rss_hash_type { XDP_RSS_L4_UDP = BIT(5), XDP_RSS_L4_SCTP = BIT(6), XDP_RSS_L4_IPSEC = BIT(7), /* L4 based hash include IPSEC SPI */ + XDP_RSS_L4_ICMP = BIT(8), /* Second part: RSS hash type combinations used for driver HW mapping */ XDP_RSS_TYPE_NONE = 0, @@ -442,11 +452,13 @@ enum xdp_rss_hash_type { XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP, XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, XDP_RSS_TYPE_L4_IPV4_IPSEC = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC, + XDP_RSS_TYPE_L4_IPV4_ICMP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_ICMP, XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP, XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP, XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, XDP_RSS_TYPE_L4_IPV6_IPSEC = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC, + XDP_RSS_TYPE_L4_IPV6_ICMP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_ICMP, XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR, XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR, @@ -457,6 +469,8 @@ struct xdp_metadata_ops { int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, enum xdp_rss_hash_type *rss_type); + int (*xmo_rx_vlan_tag)(const struct xdp_md *ctx, __be16 *vlan_proto, + u16 *vlan_tci); }; #ifdef CONFIG_NET diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 81e02de3f453..b62bb8525a5f 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -14,6 +14,12 @@ #ifdef CONFIG_XDP_SOCKETS +struct xsk_cb_desc { + void *src; + u8 off; + u8 bytes; +}; + void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max); @@ -47,6 +53,12 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, xp_set_rxq_info(pool, rxq); } +static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool, + struct xsk_cb_desc *desc) +{ + xp_fill_cb(pool, desc); +} + static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) { #ifdef CONFIG_NET_RX_BUSY_POLL @@ -274,6 +286,11 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, { } +static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool, + struct xsk_cb_desc *desc) +{ +} + static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) { return 0; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c9bb0f892f55..1d107241b901 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -2190,4 +2190,13 @@ static inline int register_xfrm_interface_bpf(void) #endif +#if IS_ENABLED(CONFIG_DEBUG_INFO_BTF) +int register_xfrm_state_bpf(void); +#else +static inline int register_xfrm_state_bpf(void) +{ + return 0; +} +#endif + #endif /* _NET_XFRM_H */ diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 8d48d37ab7c0..99dd7376df6a 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -12,6 +12,7 @@ struct xsk_buff_pool; struct xdp_rxq_info; +struct xsk_cb_desc; struct xsk_queue; struct xdp_desc; struct xdp_umem; @@ -135,6 +136,7 @@ static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p /* AF_XDP ZC drivers, via xdp_sock_buff.h */ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq); +void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc); int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs, struct page **pages, u32 nr_pages); void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b1e8c5bdfc82..42f4d3090efe 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -847,6 +847,36 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * + * BPF_TOKEN_CREATE + * Description + * Create BPF token with embedded information about what + * BPF-related functionality it allows: + * - a set of allowed bpf() syscall commands; + * - a set of allowed BPF map types to be created with + * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; + * - a set of allowed BPF program types and BPF program attach + * types to be loaded with BPF_PROG_LOAD command, if + * BPF_PROG_LOAD itself is allowed. + * + * BPF token is created (derived) from an instance of BPF FS, + * assuming it has necessary delegation mount options specified. + * This BPF token can be passed as an extra parameter to various + * bpf() syscall commands to grant BPF subsystem functionality to + * unprivileged processes. + * + * When created, BPF token is "associated" with the owning + * user namespace of BPF FS instance (super block) that it was + * derived from, and subsequent BPF operations performed with + * BPF token would be performing capabilities checks (i.e., + * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within + * that user namespace. Without BPF token, such capabilities + * have to be granted in init user namespace, making bpf() + * syscall incompatible with user namespace, for the most part. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -901,6 +931,8 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, + BPF_TOKEN_CREATE, + __MAX_BPF_CMD, }; enum bpf_map_type { @@ -951,6 +983,7 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, + __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -995,6 +1028,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, + __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1074,9 +1108,11 @@ enum bpf_link_type { BPF_LINK_TYPE_TCX = 11, BPF_LINK_TYPE_UPROBE_MULTI = 12, BPF_LINK_TYPE_NETKIT = 13, - MAX_BPF_LINK_TYPE, + __MAX_BPF_LINK_TYPE, }; +#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE + enum bpf_perf_event_type { BPF_PERF_EVENT_UNSPEC = 0, BPF_PERF_EVENT_UPROBE = 1, @@ -1401,6 +1437,7 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; + __u32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -1470,6 +1507,7 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; + __u32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1582,6 +1620,7 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; + __u32 btf_token_fd; }; struct { @@ -1712,6 +1751,11 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; + struct { /* struct used by BPF_TOKEN_CREATE command */ + __u32 flags; + __u32 bpffs_fd; + } token_create; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 424c5e28f495..93cb411adf72 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -44,10 +44,13 @@ enum netdev_xdp_act { * timestamp via bpf_xdp_metadata_rx_timestamp(). * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet * hash via bpf_xdp_metadata_rx_hash(). + * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive + * packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag(). */ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, NETDEV_XDP_RX_METADATA_HASH = 2, + NETDEV_XDP_RX_METADATA_VLAN_TAG = 4, }; /** |