aboutsummaryrefslogtreecommitdiff
path: root/arch/sparc/mm/hugetlbpage.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-08-09 22:12:35 -0700
committerDavid S. Miller <davem@davemloft.net>2017-08-09 22:12:35 -0700
commit99274b818a5dc7c3226866970a0cec0d40ed5f0f (patch)
tree5dba5ae98cf31a08c1b4c8fc6f3803cf4f0d0c50 /arch/sparc/mm/hugetlbpage.c
parentcb8c65ccff7f77d0285f1b126c72d37b2572c865 (diff)
parent763797847460a9659a6dfa79b354f3b58e68d523 (diff)
Merge branch 'sparc64-Add-16GB-hugepage-support'
Nitin Gupta says: ==================== sparc64: Add 16GB hugepage support SPARC architecture supports 16G hugepages but the kernel did not support these. This patch series adds support for it and also cleanes up some page walk/alloc functions. Patch 1/3: Core changes needed to add 16G hugepage support: To map a single 16G hugepage, two PUD entries are used. Each PUD entry maps 8G portion of a 16G page. This page table encoding scheme is same as that used for hugepages at PMD level (8M, 256M and 2G pages) where each PMD entry points successively to 8M regions within a page. No page table entries below the PUD level are allocated for 16G hugepage since those are not required. TSB entries for a 16G page are created at every 4M boundary since the HUGE_TSB is used for these pages which is configured with page size of 4M. When walking page tables (on a TSB miss), bits [32:22] are transferred from vaddr to PUD to resolve addresses at 4M boundary. The resolved address mapping is then stored in HUGE_TSB. Patch 2/3: get_user_pages() etc. are used for direct IO. These functions were not aware of hugepages at the PUD level and would try to continue walking page tables beyond the PUD level. Since 16G hugepages have page tables allocated till PUD level only, these accesses would result in invalid access. This patch adds the case for PUD huge pages to these functions. Patch 3/3: Patch 1 added the case of PUD entry being huge in page table walk and alloc functions. This new case further increased nesting in these functions and made them harder to follow. This patch flattens these functions for better readability. Cc: sparclinux@vger.kernel.org Changelog v5 vs v4: - Checking at PUD level for hugepage entry during page table walk is patched out if 16GB hugepages are not being used. Changelog v4 vs v3: - Added cover letter (patch 0/4) for patch series. Changelog v3 vs v2: - Fixed email headers so the subject shows up correctly. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/mm/hugetlbpage.c')
-rw-r--r--arch/sparc/mm/hugetlbpage.c102
1 files changed, 60 insertions, 42 deletions
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 28ee8d8ffa07..bcd8cdbc377f 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -143,6 +143,10 @@ static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
switch (shift) {
+ case HPAGE_16GB_SHIFT:
+ hugepage_size = _PAGE_SZ16GB_4V;
+ pte_val(entry) |= _PAGE_PUD_HUGE;
+ break;
case HPAGE_2GB_SHIFT:
hugepage_size = _PAGE_SZ2GB_4V;
pte_val(entry) |= _PAGE_PMD_HUGE;
@@ -187,6 +191,9 @@ static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
unsigned int shift;
switch (tte_szbits) {
+ case _PAGE_SZ16GB_4V:
+ shift = HPAGE_16GB_SHIFT;
+ break;
case _PAGE_SZ2GB_4V:
shift = HPAGE_2GB_SHIFT;
break;
@@ -259,22 +266,19 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
- pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
- if (pud) {
- pmd = pmd_alloc(mm, pud, addr);
- if (!pmd)
- return NULL;
-
- if (sz >= PMD_SIZE)
- pte = (pte_t *)pmd;
- else
- pte = pte_alloc_map(mm, pmd, addr);
- }
-
- return pte;
+ if (!pud)
+ return NULL;
+ if (sz >= PUD_SIZE)
+ return (pte_t *)pud;
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return NULL;
+ if (sz >= PMD_SIZE)
+ return (pte_t *)pmd;
+ return pte_alloc_map(mm, pmd, addr);
}
pte_t *huge_pte_offset(struct mm_struct *mm,
@@ -283,34 +287,40 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
- pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
- if (!pgd_none(*pgd)) {
- pud = pud_offset(pgd, addr);
- if (!pud_none(*pud)) {
- pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd)) {
- if (is_hugetlb_pmd(*pmd))
- pte = (pte_t *)pmd;
- else
- pte = pte_offset_map(pmd, addr);
- }
- }
- }
-
- return pte;
+ if (pgd_none(*pgd))
+ return NULL;
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud))
+ return NULL;
+ if (is_hugetlb_pud(*pud))
+ return (pte_t *)pud;
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ return NULL;
+ if (is_hugetlb_pmd(*pmd))
+ return (pte_t *)pmd;
+ return pte_offset_map(pmd, addr);
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry)
{
- unsigned int i, nptes, orig_shift, shift;
- unsigned long size;
+ unsigned int nptes, orig_shift, shift;
+ unsigned long i, size;
pte_t orig;
size = huge_tte_to_size(entry);
- shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT;
+
+ shift = PAGE_SHIFT;
+ if (size >= PUD_SIZE)
+ shift = PUD_SHIFT;
+ else if (size >= PMD_SIZE)
+ shift = PMD_SHIFT;
+ else
+ shift = PAGE_SHIFT;
+
nptes = size >> shift;
if (!pte_present(*ptep) && pte_present(entry))
@@ -333,19 +343,23 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
- unsigned int i, nptes, hugepage_shift;
+ unsigned int i, nptes, orig_shift, shift;
unsigned long size;
pte_t entry;
entry = *ptep;
size = huge_tte_to_size(entry);
- if (size >= HPAGE_SIZE)
- nptes = size >> PMD_SHIFT;
+
+ shift = PAGE_SHIFT;
+ if (size >= PUD_SIZE)
+ shift = PUD_SHIFT;
+ else if (size >= PMD_SIZE)
+ shift = PMD_SHIFT;
else
- nptes = size >> PAGE_SHIFT;
+ shift = PAGE_SHIFT;
- hugepage_shift = pte_none(entry) ? PAGE_SHIFT :
- huge_tte_to_shift(entry);
+ nptes = size >> shift;
+ orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
if (pte_present(entry))
mm->context.hugetlb_pte_count -= nptes;
@@ -354,11 +368,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
for (i = 0; i < nptes; i++)
ptep[i] = __pte(0UL);
- maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift);
+ maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
if (size == HPAGE_SIZE)
maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
- hugepage_shift);
+ orig_shift);
return entry;
}
@@ -371,7 +385,8 @@ int pmd_huge(pmd_t pmd)
int pud_huge(pud_t pud)
{
- return 0;
+ return !pud_none(pud) &&
+ (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
}
static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
@@ -435,8 +450,11 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
- hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
- ceiling);
+ if (is_hugetlb_pud(*pud))
+ pud_clear(pud);
+ else
+ hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
+ ceiling);
} while (pud++, addr = next, addr != end);
start &= PGDIR_MASK;