diff options
Diffstat (limited to 'kernel/bpf')
| -rw-r--r-- | kernel/bpf/bpf_inode_storage.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/bpf_struct_ops.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/btf.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/core.c | 10 | ||||
| -rw-r--r-- | kernel/bpf/disasm.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/inode.c | 4 | ||||
| -rw-r--r-- | kernel/bpf/preload/bpf_preload_kern.c | 19 | ||||
| -rw-r--r-- | kernel/bpf/stackmap.c | 12 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 5 | ||||
| -rw-r--r-- | kernel/bpf/trampoline.c | 248 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 133 | 
11 files changed, 329 insertions, 110 deletions
| diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 6639640523c0..b58b2efb9b43 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -109,7 +109,7 @@ static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)  	fd = *(int *)key;  	f = fget_raw(fd);  	if (!f) -		return NULL; +		return ERR_PTR(-EBADF);  	sdata = inode_storage_lookup(f->f_inode, map, true);  	fput(f); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 1a666a975416..70f6fd4fa305 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -430,7 +430,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,  		tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;  		tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; -		err = arch_prepare_bpf_trampoline(image, +		err = arch_prepare_bpf_trampoline(NULL, image,  						  st_map->image + PAGE_SIZE,  						  &st_ops->func_models[i], 0,  						  tprogs, NULL); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2efeb5f4b343..b1a76fe046cb 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4321,8 +4321,6 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,  		 * is not supported yet.  		 * BPF_PROG_TYPE_RAW_TRACEPOINT is fine.  		 */ -		if (log->level & BPF_LOG_LEVEL) -			bpf_log(log, "arg#%d type is not a struct\n", arg);  		return NULL;  	}  	tname = btf_name_by_offset(btf, t->name_off); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 0ae015ad1e05..75244ecb2389 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -827,7 +827,7 @@ static int __init bpf_jit_charge_init(void)  }  pure_initcall(bpf_jit_charge_init); -static int bpf_jit_charge_modmem(u32 pages) +int bpf_jit_charge_modmem(u32 pages)  {  	if (atomic_long_add_return(pages, &bpf_jit_current) >  	    (bpf_jit_limit >> PAGE_SHIFT)) { @@ -840,7 +840,7 @@ static int bpf_jit_charge_modmem(u32 pages)  	return 0;  } -static void bpf_jit_uncharge_modmem(u32 pages) +void bpf_jit_uncharge_modmem(u32 pages)  {  	atomic_long_sub(pages, &bpf_jit_current);  } @@ -1118,6 +1118,8 @@ static void bpf_prog_clone_free(struct bpf_prog *fp)  	 * clone is guaranteed to not be locked.  	 */  	fp->aux = NULL; +	fp->stats = NULL; +	fp->active = NULL;  	__bpf_prog_free(fp);  } @@ -2342,6 +2344,10 @@ bool __weak bpf_helper_changes_pkt_data(void *func)  /* Return TRUE if the JIT backend wants verifier to enable sub-register usage   * analysis code and wants explicit zero extension inserted by verifier.   * Otherwise, return FALSE. + * + * The verifier inserts an explicit zero extension after BPF_CMPXCHGs even if + * you don't override this. JITs that don't want these extra insns can detect + * them using insn_is_zext.   */  bool __weak bpf_jit_needs_zext(void)  { diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index 3acc7e0b6916..faa54d58972c 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -84,7 +84,7 @@ static const char *const bpf_atomic_alu_string[16] = {  	[BPF_ADD >> 4]  = "add",  	[BPF_AND >> 4]  = "and",  	[BPF_OR >> 4]  = "or", -	[BPF_XOR >> 4]  = "or", +	[BPF_XOR >> 4]  = "xor",  };  static const char *const bpf_ldst_string[] = { diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 1576ff331ee4..d2de2abec35b 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -543,11 +543,11 @@ int bpf_obj_get_user(const char __user *pathname, int flags)  		return PTR_ERR(raw);  	if (type == BPF_TYPE_PROG) -		ret = bpf_prog_new_fd(raw); +		ret = (f_flags != O_RDWR) ? -EINVAL : bpf_prog_new_fd(raw);  	else if (type == BPF_TYPE_MAP)  		ret = bpf_map_new_fd(raw, f_flags);  	else if (type == BPF_TYPE_LINK) -		ret = bpf_link_new_fd(raw); +		ret = (f_flags != O_RDWR) ? -EINVAL : bpf_link_new_fd(raw);  	else  		return -ENOENT; diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c index 79c5772465f1..53736e52c1df 100644 --- a/kernel/bpf/preload/bpf_preload_kern.c +++ b/kernel/bpf/preload/bpf_preload_kern.c @@ -60,9 +60,12 @@ static int finish(void)  			 &magic, sizeof(magic), &pos);  	if (n != sizeof(magic))  		return -EPIPE; +  	tgid = umd_ops.info.tgid; -	wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); -	umd_ops.info.tgid = NULL; +	if (tgid) { +		wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); +		umd_cleanup_helper(&umd_ops.info); +	}  	return 0;  } @@ -80,10 +83,18 @@ static int __init load_umd(void)  static void __exit fini_umd(void)  { +	struct pid *tgid; +  	bpf_preload_ops = NULL; +  	/* kill UMD in case it's still there due to earlier error */ -	kill_pid(umd_ops.info.tgid, SIGKILL, 1); -	umd_ops.info.tgid = NULL; +	tgid = umd_ops.info.tgid; +	if (tgid) { +		kill_pid(tgid, SIGKILL, 1); + +		wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); +		umd_cleanup_helper(&umd_ops.info); +	}  	umd_unload_blob(&umd_ops.info);  }  late_initcall(load_umd); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index be35bfb7fb13..6fbc2abe9c91 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -517,9 +517,17 @@ const struct bpf_func_proto bpf_get_stack_proto = {  BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,  	   u32, size, u64, flags)  { -	struct pt_regs *regs = task_pt_regs(task); +	struct pt_regs *regs; +	long res; -	return __bpf_get_stack(regs, task, NULL, buf, size, flags); +	if (!try_get_task_stack(task)) +		return -EFAULT; + +	regs = task_pt_regs(task); +	res = __bpf_get_stack(regs, task, NULL, buf, size, flags); +	put_task_stack(task); + +	return res;  }  BTF_ID_LIST_SINGLE(bpf_get_task_stack_btf_ids, struct, task_struct) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c859bc46d06c..250503482cda 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -854,6 +854,11 @@ static int map_create(union bpf_attr *attr)  			err = PTR_ERR(btf);  			goto free_map;  		} +		if (btf_is_kernel(btf)) { +			btf_put(btf); +			err = -EACCES; +			goto free_map; +		}  		map->btf = btf;  		if (attr->btf_value_type_id) { diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 7bc3b3209224..4aa8b52adf25 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -9,6 +9,7 @@  #include <linux/btf.h>  #include <linux/rcupdate_trace.h>  #include <linux/rcupdate_wait.h> +#include <linux/module.h>  /* dummy _ops. The verifier will operate on target program's ops. */  const struct bpf_verifier_ops bpf_extension_verifier_ops = { @@ -57,19 +58,10 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym)  			   PAGE_SIZE, true, ksym->name);  } -static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr) -{ -	struct bpf_ksym *ksym = &tr->ksym; - -	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key); -	bpf_image_ksym_add(tr->image, ksym); -} -  static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)  {  	struct bpf_trampoline *tr;  	struct hlist_head *head; -	void *image;  	int i;  	mutex_lock(&trampoline_mutex); @@ -84,14 +76,6 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)  	if (!tr)  		goto out; -	/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ -	image = bpf_jit_alloc_exec_page(); -	if (!image) { -		kfree(tr); -		tr = NULL; -		goto out; -	} -  	tr->key = key;  	INIT_HLIST_NODE(&tr->hlist);  	hlist_add_head(&tr->hlist, head); @@ -99,14 +83,31 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)  	mutex_init(&tr->mutex);  	for (i = 0; i < BPF_TRAMP_MAX; i++)  		INIT_HLIST_HEAD(&tr->progs_hlist[i]); -	tr->image = image; -	INIT_LIST_HEAD_RCU(&tr->ksym.lnode); -	bpf_trampoline_ksym_add(tr);  out:  	mutex_unlock(&trampoline_mutex);  	return tr;  } +static int bpf_trampoline_module_get(struct bpf_trampoline *tr) +{ +	struct module *mod; +	int err = 0; + +	preempt_disable(); +	mod = __module_text_address((unsigned long) tr->func.addr); +	if (mod && !try_module_get(mod)) +		err = -ENOENT; +	preempt_enable(); +	tr->mod = mod; +	return err; +} + +static void bpf_trampoline_module_put(struct bpf_trampoline *tr) +{ +	module_put(tr->mod); +	tr->mod = NULL; +} +  static int is_ftrace_location(void *ip)  {  	long addr; @@ -128,6 +129,9 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)  		ret = unregister_ftrace_direct((long)ip, (long)old_addr);  	else  		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); + +	if (!ret) +		bpf_trampoline_module_put(tr);  	return ret;  } @@ -154,10 +158,16 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)  		return ret;  	tr->func.ftrace_managed = ret; +	if (bpf_trampoline_module_get(tr)) +		return -ENOENT; +  	if (tr->func.ftrace_managed)  		ret = register_ftrace_direct((long)ip, (long)new_addr);  	else  		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); + +	if (ret) +		bpf_trampoline_module_put(tr);  	return ret;  } @@ -185,10 +195,142 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)  	return tprogs;  } +static void __bpf_tramp_image_put_deferred(struct work_struct *work) +{ +	struct bpf_tramp_image *im; + +	im = container_of(work, struct bpf_tramp_image, work); +	bpf_image_ksym_del(&im->ksym); +	bpf_jit_free_exec(im->image); +	bpf_jit_uncharge_modmem(1); +	percpu_ref_exit(&im->pcref); +	kfree_rcu(im, rcu); +} + +/* callback, fexit step 3 or fentry step 2 */ +static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu) +{ +	struct bpf_tramp_image *im; + +	im = container_of(rcu, struct bpf_tramp_image, rcu); +	INIT_WORK(&im->work, __bpf_tramp_image_put_deferred); +	schedule_work(&im->work); +} + +/* callback, fexit step 2. Called after percpu_ref_kill confirms. */ +static void __bpf_tramp_image_release(struct percpu_ref *pcref) +{ +	struct bpf_tramp_image *im; + +	im = container_of(pcref, struct bpf_tramp_image, pcref); +	call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu); +} + +/* callback, fexit or fentry step 1 */ +static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu) +{ +	struct bpf_tramp_image *im; + +	im = container_of(rcu, struct bpf_tramp_image, rcu); +	if (im->ip_after_call) +		/* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */ +		percpu_ref_kill(&im->pcref); +	else +		/* the case of fentry trampoline */ +		call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu); +} + +static void bpf_tramp_image_put(struct bpf_tramp_image *im) +{ +	/* The trampoline image that calls original function is using: +	 * rcu_read_lock_trace to protect sleepable bpf progs +	 * rcu_read_lock to protect normal bpf progs +	 * percpu_ref to protect trampoline itself +	 * rcu tasks to protect trampoline asm not covered by percpu_ref +	 * (which are few asm insns before __bpf_tramp_enter and +	 *  after __bpf_tramp_exit) +	 * +	 * The trampoline is unreachable before bpf_tramp_image_put(). +	 * +	 * First, patch the trampoline to avoid calling into fexit progs. +	 * The progs will be freed even if the original function is still +	 * executing or sleeping. +	 * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on +	 * first few asm instructions to execute and call into +	 * __bpf_tramp_enter->percpu_ref_get. +	 * Then use percpu_ref_kill to wait for the trampoline and the original +	 * function to finish. +	 * Then use call_rcu_tasks() to make sure few asm insns in +	 * the trampoline epilogue are done as well. +	 * +	 * In !PREEMPT case the task that got interrupted in the first asm +	 * insns won't go through an RCU quiescent state which the +	 * percpu_ref_kill will be waiting for. Hence the first +	 * call_rcu_tasks() is not necessary. +	 */ +	if (im->ip_after_call) { +		int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP, +					     NULL, im->ip_epilogue); +		WARN_ON(err); +		if (IS_ENABLED(CONFIG_PREEMPTION)) +			call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks); +		else +			percpu_ref_kill(&im->pcref); +		return; +	} + +	/* The trampoline without fexit and fmod_ret progs doesn't call original +	 * function and doesn't use percpu_ref. +	 * Use call_rcu_tasks_trace() to wait for sleepable progs to finish. +	 * Then use call_rcu_tasks() to wait for the rest of trampoline asm +	 * and normal progs. +	 */ +	call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks); +} + +static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx) +{ +	struct bpf_tramp_image *im; +	struct bpf_ksym *ksym; +	void *image; +	int err = -ENOMEM; + +	im = kzalloc(sizeof(*im), GFP_KERNEL); +	if (!im) +		goto out; + +	err = bpf_jit_charge_modmem(1); +	if (err) +		goto out_free_im; + +	err = -ENOMEM; +	im->image = image = bpf_jit_alloc_exec_page(); +	if (!image) +		goto out_uncharge; + +	err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL); +	if (err) +		goto out_free_image; + +	ksym = &im->ksym; +	INIT_LIST_HEAD_RCU(&ksym->lnode); +	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu_%u", key, idx); +	bpf_image_ksym_add(image, ksym); +	return im; + +out_free_image: +	bpf_jit_free_exec(im->image); +out_uncharge: +	bpf_jit_uncharge_modmem(1); +out_free_im: +	kfree(im); +out: +	return ERR_PTR(err); +} +  static int bpf_trampoline_update(struct bpf_trampoline *tr)  { -	void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2; -	void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2; +	struct bpf_tramp_image *im;  	struct bpf_tramp_progs *tprogs;  	u32 flags = BPF_TRAMP_F_RESTORE_REGS;  	int err, total; @@ -198,41 +340,42 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)  		return PTR_ERR(tprogs);  	if (total == 0) { -		err = unregister_fentry(tr, old_image); +		err = unregister_fentry(tr, tr->cur_image->image); +		bpf_tramp_image_put(tr->cur_image); +		tr->cur_image = NULL;  		tr->selector = 0;  		goto out;  	} +	im = bpf_tramp_image_alloc(tr->key, tr->selector); +	if (IS_ERR(im)) { +		err = PTR_ERR(im); +		goto out; +	} +  	if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||  	    tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)  		flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; -	/* Though the second half of trampoline page is unused a task could be -	 * preempted in the middle of the first half of trampoline and two -	 * updates to trampoline would change the code from underneath the -	 * preempted task. Hence wait for tasks to voluntarily schedule or go -	 * to userspace. -	 * The same trampoline can hold both sleepable and non-sleepable progs. -	 * synchronize_rcu_tasks_trace() is needed to make sure all sleepable -	 * programs finish executing. -	 * Wait for these two grace periods together. -	 */ -	synchronize_rcu_mult(call_rcu_tasks, call_rcu_tasks_trace); - -	err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2, +	err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,  					  &tr->func.model, flags, tprogs,  					  tr->func.addr);  	if (err < 0)  		goto out; -	if (tr->selector) +	WARN_ON(tr->cur_image && tr->selector == 0); +	WARN_ON(!tr->cur_image && tr->selector); +	if (tr->cur_image)  		/* progs already running at this address */ -		err = modify_fentry(tr, old_image, new_image); +		err = modify_fentry(tr, tr->cur_image->image, im->image);  	else  		/* first time registering */ -		err = register_fentry(tr, new_image); +		err = register_fentry(tr, im->image);  	if (err)  		goto out; +	if (tr->cur_image) +		bpf_tramp_image_put(tr->cur_image); +	tr->cur_image = im;  	tr->selector++;  out:  	kfree(tprogs); @@ -364,17 +507,12 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)  		goto out;  	if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))  		goto out; -	bpf_image_ksym_del(&tr->ksym); -	/* This code will be executed when all bpf progs (both sleepable and -	 * non-sleepable) went through -	 * bpf_prog_put()->call_rcu[_tasks_trace]()->bpf_prog_free_deferred(). -	 * Hence no need for another synchronize_rcu_tasks_trace() here, -	 * but synchronize_rcu_tasks() is still needed, since trampoline -	 * may not have had any sleepable programs and we need to wait -	 * for tasks to get out of trampoline code before freeing it. +	/* This code will be executed even when the last bpf_tramp_image +	 * is alive. All progs are detached from the trampoline and the +	 * trampoline image is patched with jmp into epilogue to skip +	 * fexit progs. The fentry-only trampoline will be freed via +	 * multiple rcu callbacks.  	 */ -	synchronize_rcu_tasks(); -	bpf_jit_free_exec(tr->image);  	hlist_del(&tr->hlist);  	kfree(tr);  out: @@ -478,8 +616,18 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)  	rcu_read_unlock_trace();  } +void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr) +{ +	percpu_ref_get(&tr->pcref); +} + +void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr) +{ +	percpu_ref_put(&tr->pcref); +} +  int __weak -arch_prepare_bpf_trampoline(void *image, void *image_end, +arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,  			    const struct btf_func_model *m, u32 flags,  			    struct bpf_tramp_progs *tprogs,  			    void *orig_call) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1dda9d81f12c..3a738724a380 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -504,6 +504,13 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id)  		func_id == BPF_FUNC_skc_to_tcp_request_sock;  } +static bool is_cmpxchg_insn(const struct bpf_insn *insn) +{ +	return BPF_CLASS(insn->code) == BPF_STX && +	       BPF_MODE(insn->code) == BPF_ATOMIC && +	       insn->imm == BPF_CMPXCHG; +} +  /* string representation of 'enum bpf_reg_type' */  static const char * const reg_type_str[] = {  	[NOT_INIT]		= "?", @@ -1120,7 +1127,7 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)  		reg->type = PTR_TO_RDWR_BUF;  		break;  	default: -		WARN_ON("unknown nullable register type"); +		WARN_ONCE(1, "unknown nullable register type");  	}  } @@ -1703,7 +1710,11 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,  	}  	if (class == BPF_STX) { -		if (reg->type != SCALAR_VALUE) +		/* BPF_STX (including atomic variants) has multiple source +		 * operands, one of which is a ptr. Check whether the caller is +		 * asking about it. +		 */ +		if (t == SRC_OP && reg->type != SCALAR_VALUE)  			return true;  		return BPF_SIZE(code) == BPF_DW;  	} @@ -1735,22 +1746,38 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,  	return true;  } -/* Return TRUE if INSN doesn't have explicit value define. */ -static bool insn_no_def(struct bpf_insn *insn) +/* Return the regno defined by the insn, or -1. */ +static int insn_def_regno(const struct bpf_insn *insn)  { -	u8 class = BPF_CLASS(insn->code); - -	return (class == BPF_JMP || class == BPF_JMP32 || -		class == BPF_STX || class == BPF_ST); +	switch (BPF_CLASS(insn->code)) { +	case BPF_JMP: +	case BPF_JMP32: +	case BPF_ST: +		return -1; +	case BPF_STX: +		if (BPF_MODE(insn->code) == BPF_ATOMIC && +		    (insn->imm & BPF_FETCH)) { +			if (insn->imm == BPF_CMPXCHG) +				return BPF_REG_0; +			else +				return insn->src_reg; +		} else { +			return -1; +		} +	default: +		return insn->dst_reg; +	}  }  /* Return TRUE if INSN has defined any 32-bit value explicitly. */  static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)  { -	if (insn_no_def(insn)) +	int dst_reg = insn_def_regno(insn); + +	if (dst_reg == -1)  		return false; -	return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP); +	return !is_reg64(env, insn, dst_reg, NULL, DST_OP);  }  static void mark_insn_zext(struct bpf_verifier_env *env, @@ -5834,10 +5861,14 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,  {  	bool mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||  			    (opcode == BPF_SUB && !off_is_neg); -	u32 off; +	u32 off, max;  	switch (ptr_reg->type) {  	case PTR_TO_STACK: +		/* Offset 0 is out-of-bounds, but acceptable start for the +		 * left direction, see BPF_REG_FP. +		 */ +		max = MAX_BPF_STACK + mask_to_left;  		/* Indirect variable offset stack access is prohibited in  		 * unprivileged mode so it's not handled here.  		 */ @@ -5845,16 +5876,17 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,  		if (mask_to_left)  			*ptr_limit = MAX_BPF_STACK + off;  		else -			*ptr_limit = -off; -		return 0; +			*ptr_limit = -off - 1; +		return *ptr_limit >= max ? -ERANGE : 0;  	case PTR_TO_MAP_VALUE: +		max = ptr_reg->map_ptr->value_size;  		if (mask_to_left) {  			*ptr_limit = ptr_reg->umax_value + ptr_reg->off;  		} else {  			off = ptr_reg->smin_value + ptr_reg->off; -			*ptr_limit = ptr_reg->map_ptr->value_size - off; +			*ptr_limit = ptr_reg->map_ptr->value_size - off - 1;  		} -		return 0; +		return *ptr_limit >= max ? -ERANGE : 0;  	default:  		return -EINVAL;  	} @@ -5907,6 +5939,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,  	u32 alu_state, alu_limit;  	struct bpf_reg_state tmp;  	bool ret; +	int err;  	if (can_skip_alu_sanitation(env, insn))  		return 0; @@ -5922,10 +5955,13 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,  	alu_state |= ptr_is_dst_reg ?  		     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; -	if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) -		return 0; -	if (update_alu_sanitation_state(aux, alu_state, alu_limit)) -		return -EACCES; +	err = retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg); +	if (err < 0) +		return err; + +	err = update_alu_sanitation_state(aux, alu_state, alu_limit); +	if (err < 0) +		return err;  do_sim:  	/* Simulate and find potential out-of-bounds access under  	 * speculative execution from truncation as a result of @@ -6076,7 +6112,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,  	case BPF_ADD:  		ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);  		if (ret < 0) { -			verbose(env, "R%d tried to add from different maps or paths\n", dst); +			verbose(env, "R%d tried to add from different maps, paths, or prohibited types\n", dst);  			return ret;  		}  		/* We can take a fixed offset as long as it doesn't overflow @@ -6131,7 +6167,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,  	case BPF_SUB:  		ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);  		if (ret < 0) { -			verbose(env, "R%d tried to sub from different maps or paths\n", dst); +			verbose(env, "R%d tried to sub from different maps, paths, or prohibited types\n", dst);  			return ret;  		}  		if (dst_reg == off_reg) { @@ -9029,6 +9065,10 @@ static int check_btf_info(struct bpf_verifier_env *env,  	btf = btf_get_by_fd(attr->prog_btf_fd);  	if (IS_ERR(btf))  		return PTR_ERR(btf); +	if (btf_is_kernel(btf)) { +		btf_put(btf); +		return -EACCES; +	}  	env->prog->aux->btf = btf;  	err = check_btf_func(env, attr, uattr); @@ -11006,9 +11046,10 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,  	for (i = 0; i < len; i++) {  		int adj_idx = i + delta;  		struct bpf_insn insn; -		u8 load_reg; +		int load_reg;  		insn = insns[adj_idx]; +		load_reg = insn_def_regno(&insn);  		if (!aux[adj_idx].zext_dst) {  			u8 code, class;  			u32 imm_rnd; @@ -11018,14 +11059,14 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,  			code = insn.code;  			class = BPF_CLASS(code); -			if (insn_no_def(&insn)) +			if (load_reg == -1)  				continue;  			/* NOTE: arg "reg" (the fourth one) is only used for -			 *       BPF_STX which has been ruled out in above -			 *       check, it is safe to pass NULL here. +			 *       BPF_STX + SRC_OP, so it is safe to pass NULL +			 *       here.  			 */ -			if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) { +			if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {  				if (class == BPF_LD &&  				    BPF_MODE(code) == BPF_IMM)  					i++; @@ -11040,31 +11081,28 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,  			imm_rnd = get_random_int();  			rnd_hi32_patch[0] = insn;  			rnd_hi32_patch[1].imm = imm_rnd; -			rnd_hi32_patch[3].dst_reg = insn.dst_reg; +			rnd_hi32_patch[3].dst_reg = load_reg;  			patch = rnd_hi32_patch;  			patch_len = 4;  			goto apply_patch_buffer;  		} -		if (!bpf_jit_needs_zext()) +		/* Add in an zero-extend instruction if a) the JIT has requested +		 * it or b) it's a CMPXCHG. +		 * +		 * The latter is because: BPF_CMPXCHG always loads a value into +		 * R0, therefore always zero-extends. However some archs' +		 * equivalent instruction only does this load when the +		 * comparison is successful. This detail of CMPXCHG is +		 * orthogonal to the general zero-extension behaviour of the +		 * CPU, so it's treated independently of bpf_jit_needs_zext. +		 */ +		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))  			continue; -		/* zext_dst means that we want to zero-extend whatever register -		 * the insn defines, which is dst_reg most of the time, with -		 * the notable exception of BPF_STX + BPF_ATOMIC + BPF_FETCH. -		 */ -		if (BPF_CLASS(insn.code) == BPF_STX && -		    BPF_MODE(insn.code) == BPF_ATOMIC) { -			/* BPF_STX + BPF_ATOMIC insns without BPF_FETCH do not -			 * define any registers, therefore zext_dst cannot be -			 * set. -			 */ -			if (WARN_ON(!(insn.imm & BPF_FETCH))) -				return -EINVAL; -			load_reg = insn.imm == BPF_CMPXCHG ? BPF_REG_0 -							   : insn.src_reg; -		} else { -			load_reg = insn.dst_reg; +		if (WARN_ON(load_reg == -1)) { +			verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n"); +			return -EFAULT;  		}  		zext_patch[0] = insn; @@ -11635,7 +11673,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)  			off_reg = issrc ? insn->src_reg : insn->dst_reg;  			if (isneg)  				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); -			*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1); +			*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);  			*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);  			*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);  			*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); @@ -12120,6 +12158,11 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)  	u32 btf_id, member_idx;  	const char *mname; +	if (!prog->gpl_compatible) { +		verbose(env, "struct ops programs must have a GPL compatible license\n"); +		return -EINVAL; +	} +  	btf_id = prog->aux->attach_btf_id;  	st_ops = bpf_struct_ops_find(btf_id);  	if (!st_ops) { |