diff options
Diffstat (limited to 'kernel/bpf/syscall.c')
| -rw-r--r-- | kernel/bpf/syscall.c | 138 | 
1 files changed, 95 insertions, 43 deletions
| diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6c772adabad2..cb17e1cd1d43 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -48,6 +48,47 @@ static const struct bpf_map_ops * const bpf_map_types[] = {  #undef BPF_MAP_TYPE  }; +/* + * If we're handed a bigger struct than we know of, ensure all the unknown bits + * are 0 - i.e. new user-space does not rely on any kernel feature extensions + * we don't know about yet. + * + * There is a ToCToU between this function call and the following + * copy_from_user() call. However, this is not a concern since this function is + * meant to be a future-proofing of bits. + */ +static int check_uarg_tail_zero(void __user *uaddr, +				size_t expected_size, +				size_t actual_size) +{ +	unsigned char __user *addr; +	unsigned char __user *end; +	unsigned char val; +	int err; + +	if (unlikely(actual_size > PAGE_SIZE))	/* silly large */ +		return -E2BIG; + +	if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size))) +		return -EFAULT; + +	if (actual_size <= expected_size) +		return 0; + +	addr = uaddr + expected_size; +	end  = uaddr + actual_size; + +	for (; addr < end; addr++) { +		err = get_user(val, addr); +		if (err) +			return err; +		if (val) +			return -E2BIG; +	} + +	return 0; +} +  static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)  {  	struct bpf_map *map; @@ -64,7 +105,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)  	return map;  } -void *bpf_map_area_alloc(size_t size) +void *bpf_map_area_alloc(size_t size, int numa_node)  {  	/* We definitely need __GFP_NORETRY, so OOM killer doesn't  	 * trigger under memory pressure as we really just want to @@ -74,12 +115,13 @@ void *bpf_map_area_alloc(size_t size)  	void *area;  	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { -		area = kmalloc(size, GFP_USER | flags); +		area = kmalloc_node(size, GFP_USER | flags, numa_node);  		if (area != NULL)  			return area;  	} -	return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL); +	return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, +					   __builtin_return_address(0));  }  void bpf_map_area_free(void *area) @@ -268,10 +310,11 @@ int bpf_map_new_fd(struct bpf_map *map)  		   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \  		   sizeof(attr->CMD##_LAST_FIELD)) != NULL -#define BPF_MAP_CREATE_LAST_FIELD inner_map_fd +#define BPF_MAP_CREATE_LAST_FIELD numa_node  /* called via syscall */  static int map_create(union bpf_attr *attr)  { +	int numa_node = bpf_map_attr_numa_node(attr);  	struct bpf_map *map;  	int err; @@ -279,6 +322,11 @@ static int map_create(union bpf_attr *attr)  	if (err)  		return -EINVAL; +	if (numa_node != NUMA_NO_NODE && +	    ((unsigned int)numa_node >= nr_node_ids || +	     !node_online(numa_node))) +		return -EINVAL; +  	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */  	map = find_and_alloc_map(attr);  	if (IS_ERR(map)) @@ -870,7 +918,7 @@ struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)  EXPORT_SYMBOL_GPL(bpf_prog_inc);  /* prog_idr_lock should have been held */ -static struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) +struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)  {  	int refold; @@ -886,6 +934,7 @@ static struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)  	return prog;  } +EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);  static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)  { @@ -1047,6 +1096,40 @@ static int bpf_obj_get(const union bpf_attr *attr)  #define BPF_PROG_ATTACH_LAST_FIELD attach_flags +static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) +{ +	struct bpf_prog *prog = NULL; +	int ufd = attr->target_fd; +	struct bpf_map *map; +	struct fd f; +	int err; + +	f = fdget(ufd); +	map = __bpf_map_get(f); +	if (IS_ERR(map)) +		return PTR_ERR(map); + +	if (attach) { +		prog = bpf_prog_get_type(attr->attach_bpf_fd, +					 BPF_PROG_TYPE_SK_SKB); +		if (IS_ERR(prog)) { +			fdput(f); +			return PTR_ERR(prog); +		} +	} + +	err = sock_map_prog(map, prog, attr->attach_type); +	if (err) { +		fdput(f); +		if (prog) +			bpf_prog_put(prog); +		return err; +	} + +	fdput(f); +	return 0; +} +  static int bpf_prog_attach(const union bpf_attr *attr)  {  	enum bpf_prog_type ptype; @@ -1074,6 +1157,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)  	case BPF_CGROUP_SOCK_OPS:  		ptype = BPF_PROG_TYPE_SOCK_OPS;  		break; +	case BPF_SK_SKB_STREAM_PARSER: +	case BPF_SK_SKB_STREAM_VERDICT: +		return sockmap_get_from_fd(attr, true);  	default:  		return -EINVAL;  	} @@ -1122,7 +1208,10 @@ static int bpf_prog_detach(const union bpf_attr *attr)  		ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);  		cgroup_put(cgrp);  		break; - +	case BPF_SK_SKB_STREAM_PARSER: +	case BPF_SK_SKB_STREAM_VERDICT: +		ret = sockmap_get_from_fd(attr, false); +		break;  	default:  		return -EINVAL;  	} @@ -1246,32 +1335,6 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)  	return fd;  } -static int check_uarg_tail_zero(void __user *uaddr, -				size_t expected_size, -				size_t actual_size) -{ -	unsigned char __user *addr; -	unsigned char __user *end; -	unsigned char val; -	int err; - -	if (actual_size <= expected_size) -		return 0; - -	addr = uaddr + expected_size; -	end  = uaddr + actual_size; - -	for (; addr < end; addr++) { -		err = get_user(val, addr); -		if (err) -			return err; -		if (val) -			return -E2BIG; -	} - -	return 0; -} -  static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,  				   const union bpf_attr *attr,  				   union bpf_attr __user *uattr) @@ -1393,17 +1456,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz  	if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)  		return -EPERM; -	if (!access_ok(VERIFY_READ, uattr, 1)) -		return -EFAULT; - -	if (size > PAGE_SIZE)	/* silly large */ -		return -E2BIG; - -	/* If we're handed a bigger struct than we know of, -	 * ensure all the unknown bits are 0 - i.e. new -	 * user-space does not rely on any kernel feature -	 * extensions we dont know about yet. -	 */  	err = check_uarg_tail_zero(uattr, sizeof(attr), size);  	if (err)  		return err; |