aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMinchan Kim <[email protected]>2016-01-15 16:55:06 -0800
committerLinus Torvalds <[email protected]>2016-01-15 17:56:32 -0800
commit64b42bc1cfdf6e2c3ab7315f2ff56c31cd257370 (patch)
treeef734cba71908ab191f20ae3ed48a9d9ec0af9c5
parent21f55b018ba57897f4d3590ecbe11516bdc540af (diff)
mm/madvise.c: free swp_entry in madvise_free
When I test below piece of code with 12 processes(ie, 512M * 12 = 6G consume) on my (3G ram + 12 cpu + 8G swap, the madvise_free is siginficat slower (ie, 2x times) than madvise_dontneed. loop = 5; mmap(512M); while (loop--) { memset(512M); madvise(MADV_FREE or MADV_DONTNEED); } The reason is lots of swapin. 1) dontneed: 1,612 swapin 2) madvfree: 879,585 swapin If we find hinted pages were already swapped out when syscall is called, it's pointless to keep the swapped-out pages in pte. Instead, let's free the cold page because swapin is more expensive than (alloc page + zeroing). With this patch, it reduced swapin from 879,585 to 1,878 so elapsed time 1) dontneed: 6.10user 233.50system 0:50.44elapsed 2) madvfree: 6.03user 401.17system 1:30.67elapsed 2) madvfree + below patch: 6.70user 339.14system 1:04.45elapsed Signed-off-by: Minchan Kim <[email protected]> Acked-by: Michal Hocko <[email protected]> Acked-by: Hugh Dickins <[email protected]> Cc: "James E.J. Bottomley" <[email protected]> Cc: "Kirill A. Shutemov" <[email protected]> Cc: Shaohua Li <[email protected]> Cc: <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Arnd Bergmann <[email protected]> Cc: Benjamin Herrenschmidt <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Chen Gang <[email protected]> Cc: Chris Zankel <[email protected]> Cc: Daniel Micay <[email protected]> Cc: Darrick J. Wong <[email protected]> Cc: David S. Miller <[email protected]> Cc: Helge Deller <[email protected]> Cc: Ivan Kokshaysky <[email protected]> Cc: Jason Evans <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: KOSAKI Motohiro <[email protected]> Cc: Kirill A. Shutemov <[email protected]> Cc: Matt Turner <[email protected]> Cc: Max Filippov <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Michael Kerrisk <[email protected]> Cc: Mika Penttil <[email protected]> Cc: Ralf Baechle <[email protected]> Cc: Richard Henderson <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Roland Dreier <[email protected]> Cc: Russell King <[email protected]> Cc: Shaohua Li <[email protected]> Cc: Will Deacon <[email protected]> Cc: Wu Fengguang <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
-rw-r--r--mm/madvise.c25
1 files changed, 24 insertions, 1 deletions
diff --git a/mm/madvise.c b/mm/madvise.c
index ed137fde4459..98e28e777ccb 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -270,6 +270,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl;
pte_t *orig_pte, *pte, ptent;
struct page *page;
+ int nr_swap = 0;
split_huge_pmd(vma, pmd, addr);
if (pmd_trans_unstable(pmd))
@@ -280,8 +281,24 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
for (; addr != end; pte++, addr += PAGE_SIZE) {
ptent = *pte;
- if (!pte_present(ptent))
+ if (pte_none(ptent))
continue;
+ /*
+ * If the pte has swp_entry, just clear page table to
+ * prevent swap-in which is more expensive rather than
+ * (page allocation + zeroing).
+ */
+ if (!pte_present(ptent)) {
+ swp_entry_t entry;
+
+ entry = pte_to_swp_entry(ptent);
+ if (non_swap_entry(entry))
+ continue;
+ nr_swap--;
+ free_swap_and_cache(entry);
+ pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
+ continue;
+ }
page = vm_normal_page(vma, addr, ptent);
if (!page)
@@ -355,6 +372,12 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
}
}
out:
+ if (nr_swap) {
+ if (current->mm == mm)
+ sync_mm_rss(mm);
+
+ add_mm_counter(mm, MM_SWAPENTS, nr_swap);
+ }
arch_leave_lazy_mmu_mode();
pte_unmap_unlock(orig_pte, ptl);
cond_resched();