diff options
Diffstat (limited to 'kernel/bpf')
| -rw-r--r-- | kernel/bpf/bpf_lsm.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/btf.c | 4 | ||||
| -rw-r--r-- | kernel/bpf/hashtab.c | 4 | ||||
| -rw-r--r-- | kernel/bpf/memalloc.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/offload.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 24 | ||||
| -rw-r--r-- | kernel/bpf/task_iter.c | 39 | ||||
| -rw-r--r-- | kernel/bpf/trampoline.c | 4 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 56 | 
9 files changed, 87 insertions, 52 deletions
| diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 9ea42a45da47..e14c822f8911 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -51,7 +51,6 @@ BTF_SET_END(bpf_lsm_current_hooks)   */  BTF_SET_START(bpf_lsm_locked_sockopt_hooks)  #ifdef CONFIG_SECURITY_NETWORK -BTF_ID(func, bpf_lsm_socket_sock_rcv_skb)  BTF_ID(func, bpf_lsm_sock_graft)  BTF_ID(func, bpf_lsm_inet_csk_clone)  BTF_ID(func, bpf_lsm_inet_conn_established) @@ -351,8 +350,10 @@ BTF_ID(func, bpf_lsm_bpf_prog_alloc_security)  BTF_ID(func, bpf_lsm_bpf_prog_free_security)  BTF_ID(func, bpf_lsm_file_alloc_security)  BTF_ID(func, bpf_lsm_file_free_security) +#ifdef CONFIG_SECURITY_NETWORK  BTF_ID(func, bpf_lsm_sk_alloc_security)  BTF_ID(func, bpf_lsm_sk_free_security) +#endif /* CONFIG_SECURITY_NETWORK */  BTF_ID(func, bpf_lsm_task_free)  BTF_SET_END(untrusted_lsm_hooks) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index f7dd8af06413..b7017cae6fd1 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7782,9 +7782,9 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c  	sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL); -	return 0;  end: -	btf_free_dtor_kfunc_tab(btf); +	if (ret) +		btf_free_dtor_kfunc_tab(btf);  	btf_put(btf);  	return ret;  } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 5aa2b5525f79..66bded144377 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -152,7 +152,7 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab,  {  	unsigned long flags; -	hash = hash & HASHTAB_MAP_LOCK_MASK; +	hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1);  	preempt_disable();  	if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { @@ -171,7 +171,7 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab,  				      struct bucket *b, u32 hash,  				      unsigned long flags)  { -	hash = hash & HASHTAB_MAP_LOCK_MASK; +	hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1);  	raw_spin_unlock_irqrestore(&b->raw_lock, flags);  	__this_cpu_dec(*(htab->map_locked[hash]));  	preempt_enable(); diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index ebcc3dd0fa19..1db156405b68 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -71,7 +71,7 @@ static int bpf_mem_cache_idx(size_t size)  	if (size <= 192)  		return size_index[(size - 1) / 8] - 1; -	return fls(size - 1) - 1; +	return fls(size - 1) - 2;  }  #define NUM_CACHES 11 diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 13e4efc971e6..190d9f9dc987 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -216,9 +216,6 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog)  	if (offload->dev_state)  		offload->offdev->ops->destroy(prog); -	/* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */ -	bpf_prog_free_id(prog, true); -  	list_del_init(&offload->offloads);  	kfree(offload);  	prog->aux->offload = NULL; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 64131f88c553..ecca9366c7a6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1972,7 +1972,7 @@ static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)  		return;  	if (audit_enabled == AUDIT_OFF)  		return; -	if (op == BPF_AUDIT_LOAD) +	if (!in_irq() && !irqs_disabled())  		ctx = audit_context();  	ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);  	if (unlikely(!ab)) @@ -2001,7 +2001,7 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)  	return id > 0 ? 0 : id;  } -void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) +void bpf_prog_free_id(struct bpf_prog *prog)  {  	unsigned long flags; @@ -2013,18 +2013,10 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)  	if (!prog->aux->id)  		return; -	if (do_idr_lock) -		spin_lock_irqsave(&prog_idr_lock, flags); -	else -		__acquire(&prog_idr_lock); - +	spin_lock_irqsave(&prog_idr_lock, flags);  	idr_remove(&prog_idr, prog->aux->id);  	prog->aux->id = 0; - -	if (do_idr_lock) -		spin_unlock_irqrestore(&prog_idr_lock, flags); -	else -		__release(&prog_idr_lock); +	spin_unlock_irqrestore(&prog_idr_lock, flags);  }  static void __bpf_prog_put_rcu(struct rcu_head *rcu) @@ -2067,17 +2059,15 @@ static void bpf_prog_put_deferred(struct work_struct *work)  	prog = aux->prog;  	perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);  	bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); +	bpf_prog_free_id(prog);  	__bpf_prog_put_noref(prog, true);  } -static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) +static void __bpf_prog_put(struct bpf_prog *prog)  {  	struct bpf_prog_aux *aux = prog->aux;  	if (atomic64_dec_and_test(&aux->refcnt)) { -		/* bpf_prog_free_id() must be called first */ -		bpf_prog_free_id(prog, do_idr_lock); -  		if (in_irq() || irqs_disabled()) {  			INIT_WORK(&aux->work, bpf_prog_put_deferred);  			schedule_work(&aux->work); @@ -2089,7 +2079,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)  void bpf_prog_put(struct bpf_prog *prog)  { -	__bpf_prog_put(prog, true); +	__bpf_prog_put(prog);  }  EXPORT_SYMBOL_GPL(bpf_prog_put); diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index c2a2182ce570..c4ab9d6cdbe9 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -438,6 +438,7 @@ struct bpf_iter_seq_task_vma_info {  	 */  	struct bpf_iter_seq_task_common common;  	struct task_struct *task; +	struct mm_struct *mm;  	struct vm_area_struct *vma;  	u32 tid;  	unsigned long prev_vm_start; @@ -456,16 +457,19 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)  	enum bpf_task_vma_iter_find_op op;  	struct vm_area_struct *curr_vma;  	struct task_struct *curr_task; +	struct mm_struct *curr_mm;  	u32 saved_tid = info->tid;  	/* If this function returns a non-NULL vma, it holds a reference to -	 * the task_struct, and holds read lock on vma->mm->mmap_lock. +	 * the task_struct, holds a refcount on mm->mm_users, and holds +	 * read lock on vma->mm->mmap_lock.  	 * If this function returns NULL, it does not hold any reference or  	 * lock.  	 */  	if (info->task) {  		curr_task = info->task;  		curr_vma = info->vma; +		curr_mm = info->mm;  		/* In case of lock contention, drop mmap_lock to unblock  		 * the writer.  		 * @@ -504,13 +508,15 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)  		 *    4.2) VMA2 and VMA2' covers different ranges, process  		 *         VMA2'.  		 */ -		if (mmap_lock_is_contended(curr_task->mm)) { +		if (mmap_lock_is_contended(curr_mm)) {  			info->prev_vm_start = curr_vma->vm_start;  			info->prev_vm_end = curr_vma->vm_end;  			op = task_vma_iter_find_vma; -			mmap_read_unlock(curr_task->mm); -			if (mmap_read_lock_killable(curr_task->mm)) +			mmap_read_unlock(curr_mm); +			if (mmap_read_lock_killable(curr_mm)) { +				mmput(curr_mm);  				goto finish; +			}  		} else {  			op = task_vma_iter_next_vma;  		} @@ -535,42 +541,47 @@ again:  			op = task_vma_iter_find_vma;  		} -		if (!curr_task->mm) +		curr_mm = get_task_mm(curr_task); +		if (!curr_mm)  			goto next_task; -		if (mmap_read_lock_killable(curr_task->mm)) +		if (mmap_read_lock_killable(curr_mm)) { +			mmput(curr_mm);  			goto finish; +		}  	}  	switch (op) {  	case task_vma_iter_first_vma: -		curr_vma = find_vma(curr_task->mm, 0); +		curr_vma = find_vma(curr_mm, 0);  		break;  	case task_vma_iter_next_vma: -		curr_vma = find_vma(curr_task->mm, curr_vma->vm_end); +		curr_vma = find_vma(curr_mm, curr_vma->vm_end);  		break;  	case task_vma_iter_find_vma:  		/* We dropped mmap_lock so it is necessary to use find_vma  		 * to find the next vma. This is similar to the  mechanism  		 * in show_smaps_rollup().  		 */ -		curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1); +		curr_vma = find_vma(curr_mm, info->prev_vm_end - 1);  		/* case 1) and 4.2) above just use curr_vma */  		/* check for case 2) or case 4.1) above */  		if (curr_vma &&  		    curr_vma->vm_start == info->prev_vm_start &&  		    curr_vma->vm_end == info->prev_vm_end) -			curr_vma = find_vma(curr_task->mm, curr_vma->vm_end); +			curr_vma = find_vma(curr_mm, curr_vma->vm_end);  		break;  	}  	if (!curr_vma) {  		/* case 3) above, or case 2) 4.1) with vma->next == NULL */ -		mmap_read_unlock(curr_task->mm); +		mmap_read_unlock(curr_mm); +		mmput(curr_mm);  		goto next_task;  	}  	info->task = curr_task;  	info->vma = curr_vma; +	info->mm = curr_mm;  	return curr_vma;  next_task: @@ -579,6 +590,7 @@ next_task:  	put_task_struct(curr_task);  	info->task = NULL; +	info->mm = NULL;  	info->tid++;  	goto again; @@ -587,6 +599,7 @@ finish:  		put_task_struct(curr_task);  	info->task = NULL;  	info->vma = NULL; +	info->mm = NULL;  	return NULL;  } @@ -658,7 +671,9 @@ static void task_vma_seq_stop(struct seq_file *seq, void *v)  		 */  		info->prev_vm_start = ~0UL;  		info->prev_vm_end = info->vma->vm_end; -		mmap_read_unlock(info->task->mm); +		mmap_read_unlock(info->mm); +		mmput(info->mm); +		info->mm = NULL;  		put_task_struct(info->task);  		info->task = NULL;  	} diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 11f5ec0b8016..d0ed7d6f5eec 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -488,6 +488,10 @@ again:  		/* reset fops->func and fops->trampoline for re-register */  		tr->fops->func = NULL;  		tr->fops->trampoline = 0; + +		/* reset im->image memory attr for arch_prepare_bpf_trampoline */ +		set_memory_nx((long)im->image, 1); +		set_memory_rw((long)im->image, 1);  		goto again;  	}  #endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a5255a0dcbb6..7ee218827259 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1054,6 +1054,8 @@ static void print_insn_state(struct bpf_verifier_env *env,   */  static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)  { +	size_t alloc_bytes; +	void *orig = dst;  	size_t bytes;  	if (ZERO_OR_NULL_PTR(src)) @@ -1062,11 +1064,11 @@ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t  	if (unlikely(check_mul_overflow(n, size, &bytes)))  		return NULL; -	if (ksize(dst) < ksize(src)) { -		kfree(dst); -		dst = kmalloc_track_caller(kmalloc_size_roundup(bytes), flags); -		if (!dst) -			return NULL; +	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes)); +	dst = krealloc(orig, alloc_bytes, flags); +	if (!dst) { +		kfree(orig); +		return NULL;  	}  	memcpy(dst, src, bytes); @@ -2746,6 +2748,12 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,  			 */  			if (insn->src_reg == 0 && is_callback_calling_function(insn->imm))  				return -ENOTSUPP; +			/* kfunc with imm==0 is invalid and fixup_kfunc_call will +			 * catch this error later. Make backtracking conservative +			 * with ENOTSUPP. +			 */ +			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0) +				return -ENOTSUPP;  			/* regular helper call sets R0 */  			*reg_mask &= ~1;  			if (*reg_mask & 0x3f) { @@ -3235,13 +3243,24 @@ static bool __is_pointer_value(bool allow_ptr_leaks,  	return reg->type != SCALAR_VALUE;  } +/* Copy src state preserving dst->parent and dst->live fields */ +static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src) +{ +	struct bpf_reg_state *parent = dst->parent; +	enum bpf_reg_liveness live = dst->live; + +	*dst = *src; +	dst->parent = parent; +	dst->live = live; +} +  static void save_register_state(struct bpf_func_state *state,  				int spi, struct bpf_reg_state *reg,  				int size)  {  	int i; -	state->stack[spi].spilled_ptr = *reg; +	copy_register_state(&state->stack[spi].spilled_ptr, reg);  	if (size == BPF_REG_SIZE)  		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; @@ -3287,7 +3306,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,  		bool sanitize = reg && is_spillable_regtype(reg->type);  		for (i = 0; i < size; i++) { -			if (state->stack[spi].slot_type[i] == STACK_INVALID) { +			u8 type = state->stack[spi].slot_type[i]; + +			if (type != STACK_MISC && type != STACK_ZERO) {  				sanitize = true;  				break;  			} @@ -3567,7 +3588,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,  				 */  				s32 subreg_def = state->regs[dst_regno].subreg_def; -				state->regs[dst_regno] = *reg; +				copy_register_state(&state->regs[dst_regno], reg);  				state->regs[dst_regno].subreg_def = subreg_def;  			} else {  				for (i = 0; i < size; i++) { @@ -3588,7 +3609,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,  		if (dst_regno >= 0) {  			/* restore register state from stack */ -			state->regs[dst_regno] = *reg; +			copy_register_state(&state->regs[dst_regno], reg);  			/* mark reg as written since spilled pointer state likely  			 * has its liveness marks cleared by is_state_visited()  			 * which resets stack/reg liveness for state transitions @@ -9582,7 +9603,7 @@ do_sim:  	 */  	if (!ptr_is_dst_reg) {  		tmp = *dst_reg; -		*dst_reg = *ptr_reg; +		copy_register_state(dst_reg, ptr_reg);  	}  	ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,  					env->insn_idx); @@ -10835,7 +10856,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)  					 * to propagate min/max range.  					 */  					src_reg->id = ++env->id_gen; -				*dst_reg = *src_reg; +				copy_register_state(dst_reg, src_reg);  				dst_reg->live |= REG_LIVE_WRITTEN;  				dst_reg->subreg_def = DEF_NOT_SUBREG;  			} else { @@ -10846,7 +10867,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)  						insn->src_reg);  					return -EACCES;  				} else if (src_reg->type == SCALAR_VALUE) { -					*dst_reg = *src_reg; +					copy_register_state(dst_reg, src_reg);  					/* Make sure ID is cleared otherwise  					 * dst_reg min/max could be incorrectly  					 * propagated into src_reg by find_equal_scalars() @@ -11645,7 +11666,7 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate,  	bpf_for_each_reg_in_vstate(vstate, state, reg, ({  		if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) -			*reg = *known_reg; +			copy_register_state(reg, known_reg);  	}));  } @@ -11822,10 +11843,17 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,  	 *      register B - not null  	 * for JNE A, B, ... - A is not null in the false branch;  	 * for JEQ A, B, ... - A is not null in the true branch. +	 * +	 * Since PTR_TO_BTF_ID points to a kernel struct that does +	 * not need to be null checked by the BPF program, i.e., +	 * could be null even without PTR_MAYBE_NULL marking, so +	 * only propagate nullness when neither reg is that type.  	 */  	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&  	    __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) && -	    type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type)) { +	    type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) && +	    base_type(src_reg->type) != PTR_TO_BTF_ID && +	    base_type(dst_reg->type) != PTR_TO_BTF_ID) {  		eq_branch_regs = NULL;  		switch (opcode) {  		case BPF_JEQ: |