diff options
author | Alexei Starovoitov <[email protected]> | 2022-10-21 19:17:38 -0700 |
---|---|---|
committer | Alexei Starovoitov <[email protected]> | 2022-10-21 19:17:38 -0700 |
commit | bed54aeb6ac1ced7e0ea27a82ee52af856610ff0 (patch) | |
tree | 880478f51c4b63a582c3b1dba2a5730d31fe9394 | |
parent | dbe69b29988465b011f198f2797b1c2b6980b50e (diff) | |
parent | fa4447cb73b2bfe7175f1b7ffdc70580fcfcb991 (diff) |
Merge branch 'Wait for busy refill_work when destroying bpf memory allocator'
Hou Tao says:
====================
From: Hou Tao <[email protected]>
Hi,
The patchset aims to fix one problem of bpf memory allocator destruction
when there is PREEMPT_RT kernel or kernel with arch_irq_work_has_interrupt()
being false (e.g. 1-cpu arm32 host or mips). The root cause is that
there may be busy refill_work when the allocator is destroying and it
may incur oops or other problems as shown in patch #1. Patch #1 fixes
the problem by waiting for the completion of irq work during destroying
and patch #2 is just a clean-up patch based on patch #1. Please see
individual patches for more details.
Comments are always welcome.
Change Log:
v2:
* patch 1: fix typos and add notes about the overhead of irq_work_sync()
* patch 1 & 2: add Acked-by tags from [email protected]
v1: https://lore.kernel.org/bpf/[email protected]/T/#t
====================
Signed-off-by: Alexei Starovoitov <[email protected]>
-rw-r--r-- | kernel/bpf/memalloc.c | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 5f83be1d2018..4901fa1048cd 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -418,14 +418,17 @@ static void drain_mem_cache(struct bpf_mem_cache *c) /* No progs are using this bpf_mem_cache, but htab_map_free() called * bpf_mem_cache_free() for all remaining elements and they can be in * free_by_rcu or in waiting_for_gp lists, so drain those lists now. + * + * Except for waiting_for_gp list, there are no concurrent operations + * on these lists, so it is safe to use __llist_del_all(). */ llist_for_each_safe(llnode, t, __llist_del_all(&c->free_by_rcu)) free_one(c, llnode); llist_for_each_safe(llnode, t, llist_del_all(&c->waiting_for_gp)) free_one(c, llnode); - llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist)) + llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist)) free_one(c, llnode); - llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist_extra)) + llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist_extra)) free_one(c, llnode); } @@ -493,6 +496,16 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma) rcu_in_progress = 0; for_each_possible_cpu(cpu) { c = per_cpu_ptr(ma->cache, cpu); + /* + * refill_work may be unfinished for PREEMPT_RT kernel + * in which irq work is invoked in a per-CPU RT thread. + * It is also possible for kernel with + * arch_irq_work_has_interrupt() being false and irq + * work is invoked in timer interrupt. So waiting for + * the completion of irq work to ease the handling of + * concurrency. + */ + irq_work_sync(&c->refill_work); drain_mem_cache(c); rcu_in_progress += atomic_read(&c->call_rcu_in_progress); } @@ -507,6 +520,7 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma) cc = per_cpu_ptr(ma->caches, cpu); for (i = 0; i < NUM_CACHES; i++) { c = &cc->cache[i]; + irq_work_sync(&c->refill_work); drain_mem_cache(c); rcu_in_progress += atomic_read(&c->call_rcu_in_progress); } |