diff options
Diffstat (limited to 'kernel/bpf/memalloc.c')
| -rw-r--r-- | kernel/bpf/memalloc.c | 94 | 
1 files changed, 90 insertions, 4 deletions
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 9c49ae53deaf..cf1941516643 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -459,8 +459,7 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c)   * Typical case will be between 11K and 116K closer to 11K.   * bpf progs can and should share bpf_mem_cache when possible.   */ - -static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) +static void init_refill_work(struct bpf_mem_cache *c)  {  	init_irq_work(&c->refill_work, bpf_mem_refill);  	if (c->unit_size <= 256) { @@ -476,7 +475,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)  		c->high_watermark = max(96 * 256 / c->unit_size, 3);  	}  	c->batch = max((c->high_watermark - c->low_watermark) / 4 * 3, 1); +} +static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) +{  	/* To avoid consuming memory assume that 1st run of bpf  	 * prog won't be doing more than 4 map_update_elem from  	 * irq disabled region @@ -484,6 +486,31 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)  	alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);  } +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) +{ +	struct llist_node *first; +	unsigned int obj_size; + +	/* For per-cpu allocator, the size of free objects in free list doesn't +	 * match with unit_size and now there is no way to get the size of +	 * per-cpu pointer saved in free object, so just skip the checking. +	 */ +	if (c->percpu_size) +		return 0; + +	first = c->free_llist.first; +	if (!first) +		return 0; + +	obj_size = ksize(first); +	if (obj_size != c->unit_size) { +		WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n", +			  idx, obj_size, c->unit_size); +		return -EINVAL; +	} +	return 0; +} +  /* When size != 0 bpf_mem_cache for each cpu.   * This is typical bpf hash map use case when all elements have equal size.   * @@ -494,10 +521,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)  int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)  {  	static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; +	int cpu, i, err, unit_size, percpu_size = 0;  	struct bpf_mem_caches *cc, __percpu *pcc;  	struct bpf_mem_cache *c, __percpu *pc;  	struct obj_cgroup *objcg = NULL; -	int cpu, i, unit_size, percpu_size = 0;  	if (size) {  		pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL); @@ -521,6 +548,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)  			c->objcg = objcg;  			c->percpu_size = percpu_size;  			c->tgt = c; +			init_refill_work(c);  			prefill_mem_cache(c, cpu);  		}  		ma->cache = pc; @@ -534,6 +562,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)  	pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);  	if (!pcc)  		return -ENOMEM; +	err = 0;  #ifdef CONFIG_MEMCG_KMEM  	objcg = get_obj_cgroup_from_current();  #endif @@ -544,11 +573,30 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)  			c->unit_size = sizes[i];  			c->objcg = objcg;  			c->tgt = c; + +			init_refill_work(c); +			/* Another bpf_mem_cache will be used when allocating +			 * c->unit_size in bpf_mem_alloc(), so doesn't prefill +			 * for the bpf_mem_cache because these free objects will +			 * never be used. +			 */ +			if (i != bpf_mem_cache_idx(c->unit_size)) +				continue;  			prefill_mem_cache(c, cpu); +			err = check_obj_size(c, i); +			if (err) +				goto out;  		}  	} + +out:  	ma->caches = pcc; -	return 0; +	/* refill_work is either zeroed or initialized, so it is safe to +	 * call irq_work_sync(). +	 */ +	if (err) +		bpf_mem_alloc_destroy(ma); +	return err;  }  static void drain_mem_cache(struct bpf_mem_cache *c) @@ -916,3 +964,41 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)  	return !ret ? NULL : ret + LLIST_NODE_SZ;  } + +/* Most of the logic is taken from setup_kmalloc_cache_index_table() */ +static __init int bpf_mem_cache_adjust_size(void) +{ +	unsigned int size, index; + +	/* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be +	 * up-to 256-bytes. +	 */ +	size = KMALLOC_MIN_SIZE; +	if (size <= 192) +		index = size_index[(size - 1) / 8]; +	else +		index = fls(size - 1) - 1; +	for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8) +		size_index[(size - 1) / 8] = index; + +	/* The minimal alignment is 64-bytes, so disable 96-bytes cache and +	 * use 128-bytes cache instead. +	 */ +	if (KMALLOC_MIN_SIZE >= 64) { +		index = size_index[(128 - 1) / 8]; +		for (size = 64 + 8; size <= 96; size += 8) +			size_index[(size - 1) / 8] = index; +	} + +	/* The minimal alignment is 128-bytes, so disable 192-bytes cache and +	 * use 256-bytes cache instead. +	 */ +	if (KMALLOC_MIN_SIZE >= 128) { +		index = fls(256 - 1) - 1; +		for (size = 128 + 8; size <= 192; size += 8) +			size_index[(size - 1) / 8] = index; +	} + +	return 0; +} +subsys_initcall(bpf_mem_cache_adjust_size);  |