diff options
author | Steven Price <[email protected]> | 2020-02-03 17:35:54 -0800 |
---|---|---|
committer | Linus Torvalds <[email protected]> | 2020-02-04 03:05:25 +0000 |
commit | fbf56346b855872db45af7c1274180f9d91f46cd (patch) | |
tree | 30a520c91c81e5a33ee997cae81316ab00776bd7 | |
parent | 488ae6a2b933cb538b5d91b1c0a3420188d28771 (diff) |
mm: pagewalk: don't lock PTEs for walk_page_range_novma()
walk_page_range_novma() can be used to walk page tables or the kernel or
for firmware. These page tables may contain entries that are not backed
by a struct page and so it isn't (in general) possible to take the PTE
lock for the pte_entry() callback. So update walk_pte_range() to only
take the lock when no_vma==false by splitting out the inner loop to a
separate function and add a comment explaining the difference to
walk_page_range_novma().
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Steven Price <[email protected]>
Cc: Albert Ou <[email protected]>
Cc: Alexandre Ghiti <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Ard Biesheuvel <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Catalin Marinas <[email protected]>
Cc: Christian Borntraeger <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: David S. Miller <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: James Hogan <[email protected]>
Cc: James Morse <[email protected]>
Cc: Jerome Glisse <[email protected]>
Cc: "Liang, Kan" <[email protected]>
Cc: Mark Rutland <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Paul Burton <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Paul Walmsley <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ralf Baechle <[email protected]>
Cc: Russell King <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Vasily Gorbik <[email protected]>
Cc: Vineet Gupta <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Zong Li <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
-rw-r--r-- | mm/pagewalk.c | 35 |
1 files changed, 28 insertions, 7 deletions
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index d5773465f6da..4b5ee92ba079 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -4,15 +4,12 @@ #include <linux/sched.h> #include <linux/hugetlb.h> -static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - struct mm_walk *walk) +static int walk_pte_range_inner(pte_t *pte, unsigned long addr, + unsigned long end, struct mm_walk *walk) { - pte_t *pte; - int err = 0; const struct mm_walk_ops *ops = walk->ops; - spinlock_t *ptl; + int err = 0; - pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); for (;;) { err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk); if (err) @@ -22,8 +19,26 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, break; pte++; } + return err; +} + +static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + pte_t *pte; + int err = 0; + spinlock_t *ptl; + + if (walk->no_vma) { + pte = pte_offset_map(pmd, addr); + err = walk_pte_range_inner(pte, addr, end, walk); + pte_unmap(pte); + } else { + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + err = walk_pte_range_inner(pte, addr, end, walk); + pte_unmap_unlock(pte, ptl); + } - pte_unmap_unlock(pte, ptl); return err; } @@ -394,6 +409,12 @@ int walk_page_range(struct mm_struct *mm, unsigned long start, return err; } +/* + * Similar to walk_page_range() but can walk any page tables even if they are + * not backed by VMAs. Because 'unusual' entries may be walked this function + * will also not lock the PTEs for the pte_entry() callback. This is useful for + * walking the kernel pages tables or page tables for firmware. + */ int walk_page_range_novma(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private) |