aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSuren Baghdasaryan <[email protected]>2021-10-28 14:36:14 -0700
committerLinus Torvalds <[email protected]>2021-10-28 17:18:55 -0700
commit337546e83fc7e50917f44846beee936abb9c9f1f (patch)
treef80fd37b3424bfc7a9f7fb5f5460290b3956d148
parenteac96c3efdb593df1a57bb5b95dbe037bfa9a522 (diff)
mm/oom_kill.c: prevent a race between process_mrelease and exit_mmap
Race between process_mrelease and exit_mmap, where free_pgtables is called while __oom_reap_task_mm is in progress, leads to kernel crash during pte_offset_map_lock call. oom-reaper avoids this race by setting MMF_OOM_VICTIM flag and causing exit_mmap to take and release mmap_write_lock, blocking it until oom-reaper releases mmap_read_lock. Reusing MMF_OOM_VICTIM for process_mrelease would be the simplest way to fix this race, however that would be considered a hack. Fix this race by elevating mm->mm_users and preventing exit_mmap from executing until process_mrelease is finished. Patch slightly refactors the code to adapt for a possible mmget_not_zero failure. This fix has considerable negative impact on process_mrelease performance and will likely need later optimization. Link: https://lkml.kernel.org/r/[email protected] Fixes: 884a7e5964e0 ("mm: introduce process_mrelease system call") Signed-off-by: Suren Baghdasaryan <[email protected]> Acked-by: Michal Hocko <[email protected]> Cc: David Rientjes <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Roman Gushchin <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Minchan Kim <[email protected]> Cc: Christian Brauner <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Oleg Nesterov <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Jann Horn <[email protected]> Cc: Shakeel Butt <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Christian Brauner <[email protected]> Cc: Florian Weimer <[email protected]> Cc: Jan Engelhardt <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
-rw-r--r--mm/oom_kill.c23
1 files changed, 12 insertions, 11 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 831340e7ad8b..989f35a2bbb1 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -1150,7 +1150,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
struct task_struct *task;
struct task_struct *p;
unsigned int f_flags;
- bool reap = true;
+ bool reap = false;
struct pid *pid;
long ret = 0;
@@ -1177,15 +1177,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
goto put_task;
}
- mm = p->mm;
- mmgrab(mm);
-
- /* If the work has been done already, just exit with success */
- if (test_bit(MMF_OOM_SKIP, &mm->flags))
- reap = false;
- else if (!task_will_free_mem(p)) {
- reap = false;
- ret = -EINVAL;
+ if (mmget_not_zero(p->mm)) {
+ mm = p->mm;
+ if (task_will_free_mem(p))
+ reap = true;
+ else {
+ /* Error only if the work has not been done already */
+ if (!test_bit(MMF_OOM_SKIP, &mm->flags))
+ ret = -EINVAL;
+ }
}
task_unlock(p);
@@ -1201,7 +1201,8 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
mmap_read_unlock(mm);
drop_mm:
- mmdrop(mm);
+ if (mm)
+ mmput(mm);
put_task:
put_task_struct(task);
put_pid: