diff options
author | David S. Miller <davem@davemloft.net> | 2017-08-09 22:12:35 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-08-09 22:12:35 -0700 |
commit | 99274b818a5dc7c3226866970a0cec0d40ed5f0f (patch) | |
tree | 5dba5ae98cf31a08c1b4c8fc6f3803cf4f0d0c50 /arch/sparc/mm/hugetlbpage.c | |
parent | cb8c65ccff7f77d0285f1b126c72d37b2572c865 (diff) | |
parent | 763797847460a9659a6dfa79b354f3b58e68d523 (diff) |
Merge branch 'sparc64-Add-16GB-hugepage-support'
Nitin Gupta says:
====================
sparc64: Add 16GB hugepage support
SPARC architecture supports 16G hugepages but the kernel did not
support these. This patch series adds support for it and also cleanes
up some page walk/alloc functions.
Patch 1/3: Core changes needed to add 16G hugepage support: To map a
single 16G hugepage, two PUD entries are used. Each PUD entry maps
8G portion of a 16G page. This page table encoding scheme is same as
that used for hugepages at PMD level (8M, 256M and 2G pages) where
each PMD entry points successively to 8M regions within a page. No
page table entries below the PUD level are allocated for 16G
hugepage since those are not required.
TSB entries for a 16G page are created at every 4M boundary since
the HUGE_TSB is used for these pages which is configured with page
size of 4M. When walking page tables (on a TSB miss), bits [32:22]
are transferred from vaddr to PUD to resolve addresses at 4M
boundary. The resolved address mapping is then stored in HUGE_TSB.
Patch 2/3: get_user_pages() etc. are used for direct IO. These
functions were not aware of hugepages at the PUD level and would try
to continue walking page tables beyond the PUD level. Since 16G
hugepages have page tables allocated till PUD level only, these
accesses would result in invalid access. This patch adds the case
for PUD huge pages to these functions.
Patch 3/3: Patch 1 added the case of PUD entry being huge in page
table walk and alloc functions. This new case further increased
nesting in these functions and made them harder to follow. This
patch flattens these functions for better readability.
Cc: sparclinux@vger.kernel.org
Changelog v5 vs v4:
- Checking at PUD level for hugepage entry during page table walk is
patched out if 16GB hugepages are not being used.
Changelog v4 vs v3:
- Added cover letter (patch 0/4) for patch series.
Changelog v3 vs v2:
- Fixed email headers so the subject shows up correctly.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/mm/hugetlbpage.c')
-rw-r--r-- | arch/sparc/mm/hugetlbpage.c | 102 |
1 files changed, 60 insertions, 42 deletions
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 28ee8d8ffa07..bcd8cdbc377f 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -143,6 +143,10 @@ static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift) pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V; switch (shift) { + case HPAGE_16GB_SHIFT: + hugepage_size = _PAGE_SZ16GB_4V; + pte_val(entry) |= _PAGE_PUD_HUGE; + break; case HPAGE_2GB_SHIFT: hugepage_size = _PAGE_SZ2GB_4V; pte_val(entry) |= _PAGE_PMD_HUGE; @@ -187,6 +191,9 @@ static unsigned int sun4v_huge_tte_to_shift(pte_t entry) unsigned int shift; switch (tte_szbits) { + case _PAGE_SZ16GB_4V: + shift = HPAGE_16GB_SHIFT; + break; case _PAGE_SZ2GB_4V: shift = HPAGE_2GB_SHIFT; break; @@ -259,22 +266,19 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte = NULL; pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); - if (pud) { - pmd = pmd_alloc(mm, pud, addr); - if (!pmd) - return NULL; - - if (sz >= PMD_SIZE) - pte = (pte_t *)pmd; - else - pte = pte_alloc_map(mm, pmd, addr); - } - - return pte; + if (!pud) + return NULL; + if (sz >= PUD_SIZE) + return (pte_t *)pud; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return NULL; + if (sz >= PMD_SIZE) + return (pte_t *)pmd; + return pte_alloc_map(mm, pmd, addr); } pte_t *huge_pte_offset(struct mm_struct *mm, @@ -283,34 +287,40 @@ pte_t *huge_pte_offset(struct mm_struct *mm, pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte = NULL; pgd = pgd_offset(mm, addr); - if (!pgd_none(*pgd)) { - pud = pud_offset(pgd, addr); - if (!pud_none(*pud)) { - pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) { - if (is_hugetlb_pmd(*pmd)) - pte = (pte_t *)pmd; - else - pte = pte_offset_map(pmd, addr); - } - } - } - - return pte; + if (pgd_none(*pgd)) + return NULL; + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return NULL; + if (is_hugetlb_pud(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return NULL; + if (is_hugetlb_pmd(*pmd)) + return (pte_t *)pmd; + return pte_offset_map(pmd, addr); } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { - unsigned int i, nptes, orig_shift, shift; - unsigned long size; + unsigned int nptes, orig_shift, shift; + unsigned long i, size; pte_t orig; size = huge_tte_to_size(entry); - shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT; + + shift = PAGE_SHIFT; + if (size >= PUD_SIZE) + shift = PUD_SHIFT; + else if (size >= PMD_SIZE) + shift = PMD_SHIFT; + else + shift = PAGE_SHIFT; + nptes = size >> shift; if (!pte_present(*ptep) && pte_present(entry)) @@ -333,19 +343,23 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned int i, nptes, hugepage_shift; + unsigned int i, nptes, orig_shift, shift; unsigned long size; pte_t entry; entry = *ptep; size = huge_tte_to_size(entry); - if (size >= HPAGE_SIZE) - nptes = size >> PMD_SHIFT; + + shift = PAGE_SHIFT; + if (size >= PUD_SIZE) + shift = PUD_SHIFT; + else if (size >= PMD_SIZE) + shift = PMD_SHIFT; else - nptes = size >> PAGE_SHIFT; + shift = PAGE_SHIFT; - hugepage_shift = pte_none(entry) ? PAGE_SHIFT : - huge_tte_to_shift(entry); + nptes = size >> shift; + orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry); if (pte_present(entry)) mm->context.hugetlb_pte_count -= nptes; @@ -354,11 +368,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, for (i = 0; i < nptes; i++) ptep[i] = __pte(0UL); - maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift); + maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift); /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ if (size == HPAGE_SIZE) maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0, - hugepage_shift); + orig_shift); return entry; } @@ -371,7 +385,8 @@ int pmd_huge(pmd_t pmd) int pud_huge(pud_t pud) { - return 0; + return !pud_none(pud) && + (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID; } static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, @@ -435,8 +450,11 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, - ceiling); + if (is_hugetlb_pud(*pud)) + pud_clear(pud); + else + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, + ceiling); } while (pud++, addr = next, addr != end); start &= PGDIR_MASK; |