aboutsummaryrefslogtreecommitdiff
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2016-11-04 12:16:38 -0600
committerMark Brown <broonie@kernel.org>2016-11-04 12:16:38 -0600
commitcc9b94029e9ef51787af908e9856b1eed314bc00 (patch)
tree9675310b89d0f6fb1f7bd9423f0638c4ee5226fd /mm/hugetlb.c
parent13bed58ce8748d430a26e353a09b89f9d613a71f (diff)
parent1b5b42216469b05ef4b5916cb40b127dfab1da88 (diff)
Merge branch 'topic/error' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator into regulator-fixed
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c177
1 files changed, 120 insertions, 57 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d26162e81fea..ec49d9ef1eef 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -567,13 +567,13 @@ retry:
* appear as a "reserved" entry instead of simply dangling with incorrect
* counts.
*/
-void hugetlb_fix_reserve_counts(struct inode *inode, bool restore_reserve)
+void hugetlb_fix_reserve_counts(struct inode *inode)
{
struct hugepage_subpool *spool = subpool_inode(inode);
long rsv_adjust;
rsv_adjust = hugepage_subpool_get_pages(spool, 1);
- if (restore_reserve && rsv_adjust) {
+ if (rsv_adjust) {
struct hstate *h = hstate_inode(inode);
hugetlb_acct_memory(h, 1);
@@ -832,8 +832,27 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
* Only the process that called mmap() has reserves for
* private mappings.
*/
- if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
- return true;
+ if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
+ /*
+ * Like the shared case above, a hole punch or truncate
+ * could have been performed on the private mapping.
+ * Examine the value of chg to determine if reserves
+ * actually exist or were previously consumed.
+ * Very Subtle - The value of chg comes from a previous
+ * call to vma_needs_reserves(). The reserve map for
+ * private mappings has different (opposite) semantics
+ * than that of shared mappings. vma_needs_reserves()
+ * has already taken this difference in semantics into
+ * account. Therefore, the meaning of chg is the same
+ * as in the shared case above. Code could easily be
+ * combined, but keeping it separate draws attention to
+ * subtle differences.
+ */
+ if (chg)
+ return false;
+ else
+ return true;
+ }
return false;
}
@@ -1003,7 +1022,9 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
((node = hstate_next_node_to_free(hs, mask)) || 1); \
nr_nodes--)
-#if defined(CONFIG_X86_64) && ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA))
+#if defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE) && \
+ ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || \
+ defined(CONFIG_CMA))
static void destroy_compound_gigantic_page(struct page *page,
unsigned int order)
{
@@ -1011,6 +1032,7 @@ static void destroy_compound_gigantic_page(struct page *page,
int nr_pages = 1 << order;
struct page *p = page + 1;
+ atomic_set(compound_mapcount_ptr(page), 0);
for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
clear_compound_head(p);
set_page_refcounted(p);
@@ -1415,37 +1437,61 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
/*
* Dissolve a given free hugepage into free buddy pages. This function does
- * nothing for in-use (including surplus) hugepages.
+ * nothing for in-use (including surplus) hugepages. Returns -EBUSY if the
+ * number of free hugepages would be reduced below the number of reserved
+ * hugepages.
*/
-static void dissolve_free_huge_page(struct page *page)
+static int dissolve_free_huge_page(struct page *page)
{
+ int rc = 0;
+
spin_lock(&hugetlb_lock);
if (PageHuge(page) && !page_count(page)) {
- struct hstate *h = page_hstate(page);
- int nid = page_to_nid(page);
- list_del(&page->lru);
+ struct page *head = compound_head(page);
+ struct hstate *h = page_hstate(head);
+ int nid = page_to_nid(head);
+ if (h->free_huge_pages - h->resv_huge_pages == 0) {
+ rc = -EBUSY;
+ goto out;
+ }
+ list_del(&head->lru);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
- update_and_free_page(h, page);
+ h->max_huge_pages--;
+ update_and_free_page(h, head);
}
+out:
spin_unlock(&hugetlb_lock);
+ return rc;
}
/*
* Dissolve free hugepages in a given pfn range. Used by memory hotplug to
* make specified memory blocks removable from the system.
- * Note that start_pfn should aligned with (minimum) hugepage size.
+ * Note that this will dissolve a free gigantic hugepage completely, if any
+ * part of it lies within the given range.
+ * Also note that if dissolve_free_huge_page() returns with an error, all
+ * free hugepages that were dissolved before that error are lost.
*/
-void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
+int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long pfn;
+ struct page *page;
+ int rc = 0;
if (!hugepages_supported())
- return;
+ return rc;
- VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << minimum_order));
- for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order)
- dissolve_free_huge_page(pfn_to_page(pfn));
+ for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) {
+ page = pfn_to_page(pfn);
+ if (PageHuge(page) && !page_count(page)) {
+ rc = dissolve_free_huge_page(page);
+ if (rc)
+ break;
+ }
+ }
+
+ return rc;
}
/*
@@ -1816,6 +1862,25 @@ static long __vma_reservation_common(struct hstate *h,
if (vma->vm_flags & VM_MAYSHARE)
return ret;
+ else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) && ret >= 0) {
+ /*
+ * In most cases, reserves always exist for private mappings.
+ * However, a file associated with mapping could have been
+ * hole punched or truncated after reserves were consumed.
+ * As subsequent fault on such a range will not use reserves.
+ * Subtle - The reserve map for private mappings has the
+ * opposite meaning than that of shared mappings. If NO
+ * entry is in the reserve map, it means a reservation exists.
+ * If an entry exists in the reserve map, it means the
+ * reservation has already been consumed. As a result, the
+ * return value of this routine is the opposite of the
+ * value returned from reserve map manipulation routines above.
+ */
+ if (ret)
+ return 0;
+ else
+ return 1;
+ }
else
return ret < 0 ? ret : 0;
}
@@ -2175,6 +2240,10 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
* and reducing the surplus.
*/
spin_unlock(&hugetlb_lock);
+
+ /* yield cpu to avoid soft lockup */
+ cond_resched();
+
if (hstate_is_gigantic(h))
ret = alloc_fresh_gigantic_page(h, nodes_allowed);
else
@@ -3138,7 +3207,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct page *ref_page)
{
- int force_flush = 0;
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
pte_t *ptep;
@@ -3157,19 +3225,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
tlb_start_vma(tlb, vma);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
address = start;
-again:
for (; address < end; address += sz) {
ptep = huge_pte_offset(mm, address);
if (!ptep)
continue;
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, &address, ptep))
- goto unlock;
+ if (huge_pmd_unshare(mm, &address, ptep)) {
+ spin_unlock(ptl);
+ continue;
+ }
pte = huge_ptep_get(ptep);
- if (huge_pte_none(pte))
- goto unlock;
+ if (huge_pte_none(pte)) {
+ spin_unlock(ptl);
+ continue;
+ }
/*
* Migrating hugepage or HWPoisoned hugepage is already
@@ -3177,7 +3248,8 @@ again:
*/
if (unlikely(!pte_present(pte))) {
huge_pte_clear(mm, address, ptep);
- goto unlock;
+ spin_unlock(ptl);
+ continue;
}
page = pte_page(pte);
@@ -3187,9 +3259,10 @@ again:
* are about to unmap is the actual page of interest.
*/
if (ref_page) {
- if (page != ref_page)
- goto unlock;
-
+ if (page != ref_page) {
+ spin_unlock(ptl);
+ continue;
+ }
/*
* Mark the VMA as having unmapped its page so that
* future faults in this VMA will fail rather than
@@ -3205,30 +3278,14 @@ again:
hugetlb_count_sub(pages_per_huge_page(h), mm);
page_remove_rmap(page, true);
- force_flush = !__tlb_remove_page(tlb, page);
- if (force_flush) {
- address += sz;
- spin_unlock(ptl);
- break;
- }
- /* Bail out after unmapping reference page if supplied */
- if (ref_page) {
- spin_unlock(ptl);
- break;
- }
-unlock:
+
spin_unlock(ptl);
- }
- /*
- * mmu_gather ran out of room to batch pages, we break out of
- * the PTE lock to avoid doing the potential expensive TLB invalidate
- * and page-free while holding it.
- */
- if (force_flush) {
- force_flush = 0;
- tlb_flush_mmu(tlb);
- if (address < end && !ref_page)
- goto again;
+ tlb_remove_page_size(tlb, page, huge_page_size(h));
+ /*
+ * Bail out after unmapping reference page if supplied
+ */
+ if (ref_page)
+ break;
}
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
tlb_end_vma(tlb, vma);
@@ -3287,7 +3344,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
address = address & huge_page_mask(h);
pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff;
- mapping = file_inode(vma->vm_file)->i_mapping;
+ mapping = vma->vm_file->f_mapping;
/*
* Take the mapping lock for the duration of the table walk. As
@@ -3344,7 +3401,7 @@ retry_avoidcopy:
/* If no-one else is actually using this page, avoid the copy
* and just make the page writable */
if (page_mapcount(old_page) == 1 && PageAnon(old_page)) {
- page_move_anon_rmap(old_page, vma, address);
+ page_move_anon_rmap(old_page, vma);
set_huge_ptep_writable(vma, address, ptep);
return 0;
}
@@ -3909,6 +3966,14 @@ same_page:
return i ? i : -EFAULT;
}
+#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
+/*
+ * ARCHes with special requirements for evicting HUGETLB backing TLB entries can
+ * implement this.
+ */
+#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
+#endif
+
unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot)
{
@@ -3969,7 +4034,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* once we release i_mmap_rwsem, another task can do the final put_page
* and that page table be reused and filled with junk.
*/
- flush_tlb_range(vma, start, end);
+ flush_hugetlb_tlb_range(vma, start, end);
mmu_notifier_invalidate_range(mm, start, end);
i_mmap_unlock_write(vma->vm_file->f_mapping);
mmu_notifier_invalidate_range_end(mm, start, end);
@@ -4190,7 +4255,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
if (saddr) {
spte = huge_pte_offset(svma->vm_mm, saddr);
if (spte) {
- mm_inc_nr_pmds(mm);
get_page(virt_to_page(spte));
break;
}
@@ -4205,9 +4269,9 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
if (pud_none(*pud)) {
pud_populate(mm, pud,
(pmd_t *)((unsigned long)spte & PAGE_MASK));
+ mm_inc_nr_pmds(mm);
} else {
put_page(virt_to_page(spte));
- mm_inc_nr_pmds(mm);
}
spin_unlock(ptl);
out:
@@ -4278,7 +4342,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pte = (pte_t *)pmd_alloc(mm, pud, addr);
}
}
- BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
+ BUG_ON(pte && pte_present(*pte) && !pte_huge(*pte));
return pte;
}
@@ -4363,7 +4427,6 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
/*
* This function is called from memory failure code.
- * Assume the caller holds page lock of the head page.
*/
int dequeue_hwpoisoned_huge_page(struct page *hpage)
{