diff options
author | Mark Brown <broonie@kernel.org> | 2016-11-04 12:16:38 -0600 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2016-11-04 12:16:38 -0600 |
commit | cc9b94029e9ef51787af908e9856b1eed314bc00 (patch) | |
tree | 9675310b89d0f6fb1f7bd9423f0638c4ee5226fd /mm/hugetlb.c | |
parent | 13bed58ce8748d430a26e353a09b89f9d613a71f (diff) | |
parent | 1b5b42216469b05ef4b5916cb40b127dfab1da88 (diff) |
Merge branch 'topic/error' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator into regulator-fixed
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 177 |
1 files changed, 120 insertions, 57 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d26162e81fea..ec49d9ef1eef 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -567,13 +567,13 @@ retry: * appear as a "reserved" entry instead of simply dangling with incorrect * counts. */ -void hugetlb_fix_reserve_counts(struct inode *inode, bool restore_reserve) +void hugetlb_fix_reserve_counts(struct inode *inode) { struct hugepage_subpool *spool = subpool_inode(inode); long rsv_adjust; rsv_adjust = hugepage_subpool_get_pages(spool, 1); - if (restore_reserve && rsv_adjust) { + if (rsv_adjust) { struct hstate *h = hstate_inode(inode); hugetlb_acct_memory(h, 1); @@ -832,8 +832,27 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg) * Only the process that called mmap() has reserves for * private mappings. */ - if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) - return true; + if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { + /* + * Like the shared case above, a hole punch or truncate + * could have been performed on the private mapping. + * Examine the value of chg to determine if reserves + * actually exist or were previously consumed. + * Very Subtle - The value of chg comes from a previous + * call to vma_needs_reserves(). The reserve map for + * private mappings has different (opposite) semantics + * than that of shared mappings. vma_needs_reserves() + * has already taken this difference in semantics into + * account. Therefore, the meaning of chg is the same + * as in the shared case above. Code could easily be + * combined, but keeping it separate draws attention to + * subtle differences. + */ + if (chg) + return false; + else + return true; + } return false; } @@ -1003,7 +1022,9 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) ((node = hstate_next_node_to_free(hs, mask)) || 1); \ nr_nodes--) -#if defined(CONFIG_X86_64) && ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)) +#if defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE) && \ + ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || \ + defined(CONFIG_CMA)) static void destroy_compound_gigantic_page(struct page *page, unsigned int order) { @@ -1011,6 +1032,7 @@ static void destroy_compound_gigantic_page(struct page *page, int nr_pages = 1 << order; struct page *p = page + 1; + atomic_set(compound_mapcount_ptr(page), 0); for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { clear_compound_head(p); set_page_refcounted(p); @@ -1415,37 +1437,61 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, /* * Dissolve a given free hugepage into free buddy pages. This function does - * nothing for in-use (including surplus) hugepages. + * nothing for in-use (including surplus) hugepages. Returns -EBUSY if the + * number of free hugepages would be reduced below the number of reserved + * hugepages. */ -static void dissolve_free_huge_page(struct page *page) +static int dissolve_free_huge_page(struct page *page) { + int rc = 0; + spin_lock(&hugetlb_lock); if (PageHuge(page) && !page_count(page)) { - struct hstate *h = page_hstate(page); - int nid = page_to_nid(page); - list_del(&page->lru); + struct page *head = compound_head(page); + struct hstate *h = page_hstate(head); + int nid = page_to_nid(head); + if (h->free_huge_pages - h->resv_huge_pages == 0) { + rc = -EBUSY; + goto out; + } + list_del(&head->lru); h->free_huge_pages--; h->free_huge_pages_node[nid]--; - update_and_free_page(h, page); + h->max_huge_pages--; + update_and_free_page(h, head); } +out: spin_unlock(&hugetlb_lock); + return rc; } /* * Dissolve free hugepages in a given pfn range. Used by memory hotplug to * make specified memory blocks removable from the system. - * Note that start_pfn should aligned with (minimum) hugepage size. + * Note that this will dissolve a free gigantic hugepage completely, if any + * part of it lies within the given range. + * Also note that if dissolve_free_huge_page() returns with an error, all + * free hugepages that were dissolved before that error are lost. */ -void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) +int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; + struct page *page; + int rc = 0; if (!hugepages_supported()) - return; + return rc; - VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << minimum_order)); - for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) - dissolve_free_huge_page(pfn_to_page(pfn)); + for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) { + page = pfn_to_page(pfn); + if (PageHuge(page) && !page_count(page)) { + rc = dissolve_free_huge_page(page); + if (rc) + break; + } + } + + return rc; } /* @@ -1816,6 +1862,25 @@ static long __vma_reservation_common(struct hstate *h, if (vma->vm_flags & VM_MAYSHARE) return ret; + else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) && ret >= 0) { + /* + * In most cases, reserves always exist for private mappings. + * However, a file associated with mapping could have been + * hole punched or truncated after reserves were consumed. + * As subsequent fault on such a range will not use reserves. + * Subtle - The reserve map for private mappings has the + * opposite meaning than that of shared mappings. If NO + * entry is in the reserve map, it means a reservation exists. + * If an entry exists in the reserve map, it means the + * reservation has already been consumed. As a result, the + * return value of this routine is the opposite of the + * value returned from reserve map manipulation routines above. + */ + if (ret) + return 0; + else + return 1; + } else return ret < 0 ? ret : 0; } @@ -2175,6 +2240,10 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, * and reducing the surplus. */ spin_unlock(&hugetlb_lock); + + /* yield cpu to avoid soft lockup */ + cond_resched(); + if (hstate_is_gigantic(h)) ret = alloc_fresh_gigantic_page(h, nodes_allowed); else @@ -3138,7 +3207,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page) { - int force_flush = 0; struct mm_struct *mm = vma->vm_mm; unsigned long address; pte_t *ptep; @@ -3157,19 +3225,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, tlb_start_vma(tlb, vma); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); address = start; -again: for (; address < end; address += sz) { ptep = huge_pte_offset(mm, address); if (!ptep) continue; ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, &address, ptep)) - goto unlock; + if (huge_pmd_unshare(mm, &address, ptep)) { + spin_unlock(ptl); + continue; + } pte = huge_ptep_get(ptep); - if (huge_pte_none(pte)) - goto unlock; + if (huge_pte_none(pte)) { + spin_unlock(ptl); + continue; + } /* * Migrating hugepage or HWPoisoned hugepage is already @@ -3177,7 +3248,8 @@ again: */ if (unlikely(!pte_present(pte))) { huge_pte_clear(mm, address, ptep); - goto unlock; + spin_unlock(ptl); + continue; } page = pte_page(pte); @@ -3187,9 +3259,10 @@ again: * are about to unmap is the actual page of interest. */ if (ref_page) { - if (page != ref_page) - goto unlock; - + if (page != ref_page) { + spin_unlock(ptl); + continue; + } /* * Mark the VMA as having unmapped its page so that * future faults in this VMA will fail rather than @@ -3205,30 +3278,14 @@ again: hugetlb_count_sub(pages_per_huge_page(h), mm); page_remove_rmap(page, true); - force_flush = !__tlb_remove_page(tlb, page); - if (force_flush) { - address += sz; - spin_unlock(ptl); - break; - } - /* Bail out after unmapping reference page if supplied */ - if (ref_page) { - spin_unlock(ptl); - break; - } -unlock: + spin_unlock(ptl); - } - /* - * mmu_gather ran out of room to batch pages, we break out of - * the PTE lock to avoid doing the potential expensive TLB invalidate - * and page-free while holding it. - */ - if (force_flush) { - force_flush = 0; - tlb_flush_mmu(tlb); - if (address < end && !ref_page) - goto again; + tlb_remove_page_size(tlb, page, huge_page_size(h)); + /* + * Bail out after unmapping reference page if supplied + */ + if (ref_page) + break; } mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); tlb_end_vma(tlb, vma); @@ -3287,7 +3344,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, address = address & huge_page_mask(h); pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - mapping = file_inode(vma->vm_file)->i_mapping; + mapping = vma->vm_file->f_mapping; /* * Take the mapping lock for the duration of the table walk. As @@ -3344,7 +3401,7 @@ retry_avoidcopy: /* If no-one else is actually using this page, avoid the copy * and just make the page writable */ if (page_mapcount(old_page) == 1 && PageAnon(old_page)) { - page_move_anon_rmap(old_page, vma, address); + page_move_anon_rmap(old_page, vma); set_huge_ptep_writable(vma, address, ptep); return 0; } @@ -3909,6 +3966,14 @@ same_page: return i ? i : -EFAULT; } +#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE +/* + * ARCHes with special requirements for evicting HUGETLB backing TLB entries can + * implement this. + */ +#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) +#endif + unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot) { @@ -3969,7 +4034,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, * once we release i_mmap_rwsem, another task can do the final put_page * and that page table be reused and filled with junk. */ - flush_tlb_range(vma, start, end); + flush_hugetlb_tlb_range(vma, start, end); mmu_notifier_invalidate_range(mm, start, end); i_mmap_unlock_write(vma->vm_file->f_mapping); mmu_notifier_invalidate_range_end(mm, start, end); @@ -4190,7 +4255,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (saddr) { spte = huge_pte_offset(svma->vm_mm, saddr); if (spte) { - mm_inc_nr_pmds(mm); get_page(virt_to_page(spte)); break; } @@ -4205,9 +4269,9 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (pud_none(*pud)) { pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); + mm_inc_nr_pmds(mm); } else { put_page(virt_to_page(spte)); - mm_inc_nr_pmds(mm); } spin_unlock(ptl); out: @@ -4278,7 +4342,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte = (pte_t *)pmd_alloc(mm, pud, addr); } } - BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); + BUG_ON(pte && pte_present(*pte) && !pte_huge(*pte)); return pte; } @@ -4363,7 +4427,6 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, /* * This function is called from memory failure code. - * Assume the caller holds page lock of the head page. */ int dequeue_hwpoisoned_huge_page(struct page *hpage) { |