userfaultfd: move userfaultfd_ctx struct to header file

Patch series "per-vma locks in userfaultfd", v7. Performing userfaultfd operations (like copy/move etc.) in critical section of mmap_lock (read-mode) causes significant contention on the lock when operations requiring the lock in write-mode are taking place concurrently. We can use per-vma locks instead to significantly reduce the contention issue. Android runtime's Garbage Collector uses userfaultfd for concurrent compaction. mmap-lock contention during compaction potentially causes jittery experience for the user. During one such reproducible scenario, we observed the following improvements with this patch-set: - Wall clock time of compaction phase came down from ~3s to <500ms - Uninterruptible sleep time (across all threads in the process) was ~10ms (none in mmap_lock) during compaction, instead of >20s This patch (of 4): Move the struct to userfaultfd_k.h to be accessible from mm/userfaultfd.c. There are no other changes in the struct. This is required to prepare for using per-vma locks in userfaultfd operations. Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Lokesh Gidra <[email protected]> Reviewed-by: Mike Rapoport (IBM) <[email protected]> Reviewed-by: Liam R. Howlett <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Axel Rasmussen <[email protected]> Cc: Brian Geffon <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Jann Horn <[email protected]> Cc: Kalesh Singh <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Nicolas Geoffray <[email protected]> Cc: Peter Xu <[email protected]> Cc: Ryan Roberts <[email protected]> Cc: Suren Baghdasaryan <[email protected]> Cc: Tim Murray <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
author: Lokesh Gidra <[email protected]> 2024-02-15 10:27:53 -0800
committer: Andrew Morton <[email protected]> 2024-02-22 15:27:20 -0800
commit: f91e6b41dd11daffb138e3afdb4804aefc3d4e1b (patch)
tree: 9f01d44fc430c776e29fd516a9a4724ea47f1362 /include
parent: 952237b5a9b79acf1212b40ad1e993557e3cb0ca (diff)
1 files changed, 39 insertions, 0 deletions
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index e4056547fbe6..691d928ee864 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -36,6 +36,45 @@
 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 
+/*
+ * Start with fault_pending_wqh and fault_wqh so they're more likely
+ * to be in the same cacheline.
+ *
+ * Locking order:
+ *	fd_wqh.lock
+ *		fault_pending_wqh.lock
+ *			fault_wqh.lock
+ *		event_wqh.lock
+ *
+ * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
+ * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
+ * also taken in IRQ context.
+ */
+struct userfaultfd_ctx {
+	/* waitqueue head for the pending (i.e. not read) userfaults */
+	wait_queue_head_t fault_pending_wqh;
+	/* waitqueue head for the userfaults */
+	wait_queue_head_t fault_wqh;
+	/* waitqueue head for the pseudo fd to wakeup poll/read */
+	wait_queue_head_t fd_wqh;
+	/* waitqueue head for events */
+	wait_queue_head_t event_wqh;
+	/* a refile sequence protected by fault_pending_wqh lock */
+	seqcount_spinlock_t refile_seq;
+	/* pseudo fd refcounting */
+	refcount_t refcount;
+	/* userfaultfd syscall flags */
+	unsigned int flags;
+	/* features requested from the userspace */
+	unsigned int features;
+	/* released */
+	bool released;
+	/* memory mappings are changing because of non-cooperative event */
+	atomic_t mmap_changing;
+	/* mm with one ore more vmas attached to this userfaultfd_ctx */
+	struct mm_struct *mm;
+};
+
 extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
 
 /* A combined operation mode + behavior flags. */
author	Lokesh Gidra <[email protected]>	2024-02-15 10:27:53 -0800
committer	Andrew Morton <[email protected]>	2024-02-22 15:27:20 -0800
commit	f91e6b41dd11daffb138e3afdb4804aefc3d4e1b (patch)
tree	9f01d44fc430c776e29fd516a9a4724ea47f1362 /include
parent	952237b5a9b79acf1212b40ad1e993557e3cb0ca (diff)