aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteven Price <[email protected]>2020-02-03 17:35:54 -0800
committerLinus Torvalds <[email protected]>2020-02-04 03:05:25 +0000
commitfbf56346b855872db45af7c1274180f9d91f46cd (patch)
tree30a520c91c81e5a33ee997cae81316ab00776bd7
parent488ae6a2b933cb538b5d91b1c0a3420188d28771 (diff)
mm: pagewalk: don't lock PTEs for walk_page_range_novma()
walk_page_range_novma() can be used to walk page tables or the kernel or for firmware. These page tables may contain entries that are not backed by a struct page and so it isn't (in general) possible to take the PTE lock for the pte_entry() callback. So update walk_pte_range() to only take the lock when no_vma==false by splitting out the inner loop to a separate function and add a comment explaining the difference to walk_page_range_novma(). Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Steven Price <[email protected]> Cc: Albert Ou <[email protected]> Cc: Alexandre Ghiti <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Ard Biesheuvel <[email protected]> Cc: Arnd Bergmann <[email protected]> Cc: Benjamin Herrenschmidt <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Christian Borntraeger <[email protected]> Cc: Dave Hansen <[email protected]> Cc: David S. Miller <[email protected]> Cc: Heiko Carstens <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: James Hogan <[email protected]> Cc: James Morse <[email protected]> Cc: Jerome Glisse <[email protected]> Cc: "Liang, Kan" <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: Paul Burton <[email protected]> Cc: Paul Mackerras <[email protected]> Cc: Paul Walmsley <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Ralf Baechle <[email protected]> Cc: Russell King <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vasily Gorbik <[email protected]> Cc: Vineet Gupta <[email protected]> Cc: Will Deacon <[email protected]> Cc: Zong Li <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
-rw-r--r--mm/pagewalk.c35
1 files changed, 28 insertions, 7 deletions
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index d5773465f6da..4b5ee92ba079 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -4,15 +4,12 @@
#include <linux/sched.h>
#include <linux/hugetlb.h>
-static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+static int walk_pte_range_inner(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
{
- pte_t *pte;
- int err = 0;
const struct mm_walk_ops *ops = walk->ops;
- spinlock_t *ptl;
+ int err = 0;
- pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
for (;;) {
err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
if (err)
@@ -22,8 +19,26 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
break;
pte++;
}
+ return err;
+}
+
+static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ pte_t *pte;
+ int err = 0;
+ spinlock_t *ptl;
+
+ if (walk->no_vma) {
+ pte = pte_offset_map(pmd, addr);
+ err = walk_pte_range_inner(pte, addr, end, walk);
+ pte_unmap(pte);
+ } else {
+ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ err = walk_pte_range_inner(pte, addr, end, walk);
+ pte_unmap_unlock(pte, ptl);
+ }
- pte_unmap_unlock(pte, ptl);
return err;
}
@@ -394,6 +409,12 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
return err;
}
+/*
+ * Similar to walk_page_range() but can walk any page tables even if they are
+ * not backed by VMAs. Because 'unusual' entries may be walked this function
+ * will also not lock the PTEs for the pte_entry() callback. This is useful for
+ * walking the kernel pages tables or page tables for firmware.
+ */
int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
void *private)