From f91e6b41dd11daffb138e3afdb4804aefc3d4e1b Mon Sep 17 00:00:00 2001 From: Lokesh Gidra Date: Thu, 15 Feb 2024 10:27:53 -0800 Subject: userfaultfd: move userfaultfd_ctx struct to header file Patch series "per-vma locks in userfaultfd", v7. Performing userfaultfd operations (like copy/move etc.) in critical section of mmap_lock (read-mode) causes significant contention on the lock when operations requiring the lock in write-mode are taking place concurrently. We can use per-vma locks instead to significantly reduce the contention issue. Android runtime's Garbage Collector uses userfaultfd for concurrent compaction. mmap-lock contention during compaction potentially causes jittery experience for the user. During one such reproducible scenario, we observed the following improvements with this patch-set: - Wall clock time of compaction phase came down from ~3s to <500ms - Uninterruptible sleep time (across all threads in the process) was ~10ms (none in mmap_lock) during compaction, instead of >20s This patch (of 4): Move the struct to userfaultfd_k.h to be accessible from mm/userfaultfd.c. There are no other changes in the struct. This is required to prepare for using per-vma locks in userfaultfd operations. Link: https://lkml.kernel.org/r/20240215182756.3448972-1-lokeshgidra@google.com Link: https://lkml.kernel.org/r/20240215182756.3448972-2-lokeshgidra@google.com Signed-off-by: Lokesh Gidra Reviewed-by: Mike Rapoport (IBM) Reviewed-by: Liam R. Howlett Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Brian Geffon Cc: David Hildenbrand Cc: Jann Horn Cc: Kalesh Singh Cc: Matthew Wilcox (Oracle) Cc: Nicolas Geoffray Cc: Peter Xu Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Tim Murray Signed-off-by: Andrew Morton --- include/linux/userfaultfd_k.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index e4056547fbe6..691d928ee864 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -36,6 +36,45 @@ #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) +/* + * Start with fault_pending_wqh and fault_wqh so they're more likely + * to be in the same cacheline. + * + * Locking order: + * fd_wqh.lock + * fault_pending_wqh.lock + * fault_wqh.lock + * event_wqh.lock + * + * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, + * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's + * also taken in IRQ context. + */ +struct userfaultfd_ctx { + /* waitqueue head for the pending (i.e. not read) userfaults */ + wait_queue_head_t fault_pending_wqh; + /* waitqueue head for the userfaults */ + wait_queue_head_t fault_wqh; + /* waitqueue head for the pseudo fd to wakeup poll/read */ + wait_queue_head_t fd_wqh; + /* waitqueue head for events */ + wait_queue_head_t event_wqh; + /* a refile sequence protected by fault_pending_wqh lock */ + seqcount_spinlock_t refile_seq; + /* pseudo fd refcounting */ + refcount_t refcount; + /* userfaultfd syscall flags */ + unsigned int flags; + /* features requested from the userspace */ + unsigned int features; + /* released */ + bool released; + /* memory mappings are changing because of non-cooperative event */ + atomic_t mmap_changing; + /* mm with one ore more vmas attached to this userfaultfd_ctx */ + struct mm_struct *mm; +}; + extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); /* A combined operation mode + behavior flags. */ -- cgit