diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/hashtab.c | 129 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 8 | 
2 files changed, 129 insertions, 8 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4a9e71a7c41f..34debc1a9641 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -62,7 +62,14 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);  static bool htab_is_lru(const struct bpf_htab *htab)  { -	return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH; +	return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH || +		htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +} + +static bool htab_is_percpu(const struct bpf_htab *htab) +{ +	return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH || +		htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;  }  static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, @@ -85,7 +92,7 @@ static void htab_free_elems(struct bpf_htab *htab)  {  	int i; -	if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH) +	if (!htab_is_percpu(htab))  		goto free_elems;  	for (i = 0; i < htab->map.max_entries; i++) { @@ -122,7 +129,7 @@ static int prealloc_init(struct bpf_htab *htab)  	if (!htab->elems)  		return -ENOMEM; -	if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH) +	if (!htab_is_percpu(htab))  		goto skip_percpu_elems;  	for (i = 0; i < htab->map.max_entries; i++) { @@ -195,8 +202,10 @@ static int alloc_extra_elems(struct bpf_htab *htab)  /* Called from syscall */  static struct bpf_map *htab_map_alloc(union bpf_attr *attr)  { -	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_HASH; -	bool lru = attr->map_type == BPF_MAP_TYPE_LRU_HASH; +	bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || +		       attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); +	bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || +		    attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);  	/* percpu_lru means each cpu has its own LRU list.  	 * it is different from BPF_MAP_TYPE_PERCPU_HASH where  	 * the map's value itself is percpu.  percpu_lru has @@ -823,12 +832,84 @@ err:  	return ret;  } +static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, +					     void *value, u64 map_flags, +					     bool onallcpus) +{ +	struct bpf_htab *htab = container_of(map, struct bpf_htab, map); +	struct htab_elem *l_new = NULL, *l_old; +	struct hlist_head *head; +	unsigned long flags; +	struct bucket *b; +	u32 key_size, hash; +	int ret; + +	if (unlikely(map_flags > BPF_EXIST)) +		/* unknown flags */ +		return -EINVAL; + +	WARN_ON_ONCE(!rcu_read_lock_held()); + +	key_size = map->key_size; + +	hash = htab_map_hash(key, key_size); + +	b = __select_bucket(htab, hash); +	head = &b->head; + +	/* For LRU, we need to alloc before taking bucket's +	 * spinlock because LRU's elem alloc may need +	 * to remove older elem from htab and this removal +	 * operation will need a bucket lock. +	 */ +	if (map_flags != BPF_EXIST) { +		l_new = prealloc_lru_pop(htab, key, hash); +		if (!l_new) +			return -ENOMEM; +	} + +	/* bpf_map_update_elem() can be called in_irq() */ +	raw_spin_lock_irqsave(&b->lock, flags); + +	l_old = lookup_elem_raw(head, hash, key, key_size); + +	ret = check_flags(htab, l_old, map_flags); +	if (ret) +		goto err; + +	if (l_old) { +		bpf_lru_node_set_ref(&l_old->lru_node); + +		/* per-cpu hash map can update value in-place */ +		pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), +				value, onallcpus); +	} else { +		pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size), +				value, onallcpus); +		hlist_add_head_rcu(&l_new->hash_node, head); +		l_new = NULL; +	} +	ret = 0; +err: +	raw_spin_unlock_irqrestore(&b->lock, flags); +	if (l_new) +		bpf_lru_push_free(&htab->lru, &l_new->lru_node); +	return ret; +} +  static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,  				       void *value, u64 map_flags)  {  	return __htab_percpu_map_update_elem(map, key, value, map_flags, false);  } +static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, +					   void *value, u64 map_flags) +{ +	return __htab_lru_percpu_map_update_elem(map, key, value, map_flags, +						 false); +} +  /* Called from syscall or from eBPF program */  static int htab_map_delete_elem(struct bpf_map *map, void *key)  { @@ -976,8 +1057,21 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)  		return NULL;  } +static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) +{ +	struct htab_elem *l = __htab_map_lookup_elem(map, key); + +	if (l) { +		bpf_lru_node_set_ref(&l->lru_node); +		return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); +	} + +	return NULL; +} +  int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)  { +	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);  	struct htab_elem *l;  	void __percpu *pptr;  	int ret = -ENOENT; @@ -993,6 +1087,8 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)  	l = __htab_map_lookup_elem(map, key);  	if (!l)  		goto out; +	if (htab_is_lru(htab)) +		bpf_lru_node_set_ref(&l->lru_node);  	pptr = htab_elem_get_ptr(l, map->key_size);  	for_each_possible_cpu(cpu) {  		bpf_long_memcpy(value + off, @@ -1008,10 +1104,16 @@ out:  int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,  			   u64 map_flags)  { +	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);  	int ret;  	rcu_read_lock(); -	ret = __htab_percpu_map_update_elem(map, key, value, map_flags, true); +	if (htab_is_lru(htab)) +		ret = __htab_lru_percpu_map_update_elem(map, key, value, +							map_flags, true); +	else +		ret = __htab_percpu_map_update_elem(map, key, value, map_flags, +						    true);  	rcu_read_unlock();  	return ret; @@ -1031,11 +1133,26 @@ static struct bpf_map_type_list htab_percpu_type __read_mostly = {  	.type = BPF_MAP_TYPE_PERCPU_HASH,  }; +static const struct bpf_map_ops htab_lru_percpu_ops = { +	.map_alloc = htab_map_alloc, +	.map_free = htab_map_free, +	.map_get_next_key = htab_map_get_next_key, +	.map_lookup_elem = htab_lru_percpu_map_lookup_elem, +	.map_update_elem = htab_lru_percpu_map_update_elem, +	.map_delete_elem = htab_lru_map_delete_elem, +}; + +static struct bpf_map_type_list htab_lru_percpu_type __read_mostly = { +	.ops = &htab_lru_percpu_ops, +	.type = BPF_MAP_TYPE_LRU_PERCPU_HASH, +}; +  static int __init register_htab_map(void)  {  	bpf_register_map_type(&htab_type);  	bpf_register_map_type(&htab_percpu_type);  	bpf_register_map_type(&htab_lru_type); +	bpf_register_map_type(&htab_lru_percpu_type);  	return 0;  }  late_initcall(register_htab_map); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 233e3ac836a6..ce1b7de7d72c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -292,6 +292,7 @@ static int map_lookup_elem(union bpf_attr *attr)  		goto free_key;  	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || +	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||  	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)  		value_size = round_up(map->value_size, 8) * num_possible_cpus();  	else @@ -302,7 +303,8 @@ static int map_lookup_elem(union bpf_attr *attr)  	if (!value)  		goto free_key; -	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { +	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || +	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {  		err = bpf_percpu_hash_copy(map, key, value);  	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {  		err = bpf_percpu_array_copy(map, key, value); @@ -366,6 +368,7 @@ static int map_update_elem(union bpf_attr *attr)  		goto free_key;  	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || +	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||  	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)  		value_size = round_up(map->value_size, 8) * num_possible_cpus();  	else @@ -385,7 +388,8 @@ static int map_update_elem(union bpf_attr *attr)  	 */  	preempt_disable();  	__this_cpu_inc(bpf_prog_active); -	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { +	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || +	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {  		err = bpf_percpu_hash_update(map, key, value, attr->flags);  	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {  		err = bpf_percpu_array_update(map, key, value, attr->flags);  |