aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arc/Kconfig1
-rw-r--r--arch/arc/include/asm/io.h7
-rw-r--r--arch/arc/mm/ioremap.c49
-rw-r--r--arch/arm/include/asm/hugetlb.h1
-rw-r--r--arch/arm/include/asm/tlb.h12
-rw-r--r--arch/arm/mm/fault-armv.c3
-rw-r--r--arch/arm/mm/mmu.c7
-rw-r--r--arch/arm64/Kconfig5
-rw-r--r--arch/arm64/include/asm/hugetlb.h16
-rw-r--r--arch/arm64/include/asm/io.h3
-rw-r--r--arch/arm64/include/asm/mte.h4
-rw-r--r--arch/arm64/include/asm/pgtable.h26
-rw-r--r--arch/arm64/include/asm/tlb.h14
-rw-r--r--arch/arm64/include/asm/tlbbatch.h12
-rw-r--r--arch/arm64/include/asm/tlbflush.h64
-rw-r--r--arch/arm64/kernel/mte.c37
-rw-r--r--arch/arm64/mm/fault.c2
-rw-r--r--arch/arm64/mm/ioremap.c10
-rw-r--r--arch/arm64/mm/mmu.c7
-rw-r--r--arch/csky/include/asm/pgalloc.h4
-rw-r--r--arch/hexagon/Kconfig1
-rw-r--r--arch/hexagon/include/asm/io.h11
-rw-r--r--arch/hexagon/include/asm/pgalloc.h8
-rw-r--r--arch/hexagon/kernel/hexagon_ksyms.c2
-rw-r--r--arch/hexagon/mm/Makefile2
-rw-r--r--arch/hexagon/mm/ioremap.c44
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/ia64/include/asm/io.h13
-rw-r--r--arch/ia64/mm/ioremap.c41
-rw-r--r--arch/loongarch/Kconfig2
-rw-r--r--arch/loongarch/include/asm/io.h2
-rw-r--r--arch/loongarch/include/asm/pgalloc.h27
-rw-r--r--arch/loongarch/mm/pgtable.c7
-rw-r--r--arch/m68k/include/asm/io_mm.h2
-rw-r--r--arch/m68k/include/asm/kmap.h2
-rw-r--r--arch/m68k/include/asm/mcf_pgalloc.h47
-rw-r--r--arch/m68k/include/asm/sun3_pgalloc.h8
-rw-r--r--arch/m68k/mm/motorola.c4
-rw-r--r--arch/mips/include/asm/io.h5
-rw-r--r--arch/mips/include/asm/pgalloc.h32
-rw-r--r--arch/mips/mm/pgtable.c8
-rw-r--r--arch/nios2/include/asm/pgalloc.h8
-rw-r--r--arch/openrisc/Kconfig1
-rw-r--r--arch/openrisc/include/asm/io.h11
-rw-r--r--arch/openrisc/include/asm/pgalloc.h8
-rw-r--r--arch/openrisc/mm/ioremap.c82
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/include/asm/io.h15
-rw-r--r--arch/parisc/mm/ioremap.c62
-rw-r--r--arch/powerpc/Kconfig3
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h9
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h155
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h49
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h9
-rw-r--r--arch/powerpc/include/asm/io.h17
-rw-r--r--arch/powerpc/include/asm/pgalloc.h4
-rw-r--r--arch/powerpc/include/asm/pgtable.h27
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c1
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c2
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c10
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c110
-rw-r--r--arch/powerpc/mm/book3s64/radix_hugetlbpage.c1
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c574
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c11
-rw-r--r--arch/powerpc/mm/fault.c4
-rw-r--r--arch/powerpc/mm/init_64.c37
-rw-r--r--arch/powerpc/mm/ioremap.c26
-rw-r--r--arch/powerpc/mm/ioremap_32.c19
-rw-r--r--arch/powerpc/mm/ioremap_64.c12
-rw-r--r--arch/powerpc/mm/pgtable-frag.c73
-rw-r--r--arch/powerpc/mm/pgtable.c8
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c2
-rw-r--r--arch/powerpc/xmon/xmon.c2
-rw-r--r--arch/riscv/Kconfig2
-rw-r--r--arch/riscv/include/asm/hugetlb.h1
-rw-r--r--arch/riscv/include/asm/pgalloc.h8
-rw-r--r--arch/riscv/include/asm/pgtable.h12
-rw-r--r--arch/riscv/mm/fault.c4
-rw-r--r--arch/riscv/mm/init.c16
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/include/asm/io.h21
-rw-r--r--arch/s390/include/asm/pgalloc.h8
-rw-r--r--arch/s390/include/asm/tlb.h4
-rw-r--r--arch/s390/mm/fault.c2
-rw-r--r--arch/s390/mm/pgalloc.c176
-rw-r--r--arch/s390/pci/pci.c57
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/include/asm/io.h89
-rw-r--r--arch/sh/include/asm/io_noioport.h7
-rw-r--r--arch/sh/include/asm/pgalloc.h9
-rw-r--r--arch/sh/mm/ioremap.c65
-rw-r--r--arch/sparc/include/asm/pgalloc_64.h4
-rw-r--r--arch/sparc/kernel/setup_32.c2
-rw-r--r--arch/sparc/mm/init_64.c33
-rw-r--r--arch/sparc/mm/srmmu.c5
-rw-r--r--arch/um/include/asm/pgalloc.h18
-rw-r--r--arch/x86/Kconfig7
-rw-r--r--arch/x86/include/asm/io.h5
-rw-r--r--arch/x86/include/asm/pgtable.h16
-rw-r--r--arch/x86/include/asm/tlbflush.h24
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/pgtable.c47
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/xen/mmu_pv.c2
-rw-r--r--arch/xtensa/Kconfig1
-rw-r--r--arch/xtensa/include/asm/io.h32
-rw-r--r--arch/xtensa/mm/ioremap.c58
109 files changed, 1647 insertions, 1007 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 96cf8720bb93..6f4995ad9873 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -26,6 +26,7 @@ config ARC
select GENERIC_PENDING_IRQ if SMP
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
+ select GENERIC_IOREMAP
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARC_MMU_V4
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 80347382a380..4fdb7350636c 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -21,8 +21,9 @@
#endif
extern void __iomem *ioremap(phys_addr_t paddr, unsigned long size);
-extern void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
- unsigned long flags);
+#define ioremap ioremap
+#define ioremap_prot ioremap_prot
+#define iounmap iounmap
static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
{
return (void __iomem *)port;
@@ -32,8 +33,6 @@ static inline void ioport_unmap(void __iomem *addr)
{
}
-extern void iounmap(const volatile void __iomem *addr);
-
/*
* io{read,write}{16,32}be() macros
*/
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index 712c2311daef..b07004d53267 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -8,7 +8,6 @@
#include <linux/module.h>
#include <linux/io.h>
#include <linux/mm.h>
-#include <linux/slab.h>
#include <linux/cache.h>
static inline bool arc_uncached_addr_space(phys_addr_t paddr)
@@ -25,13 +24,6 @@ static inline bool arc_uncached_addr_space(phys_addr_t paddr)
void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
{
- phys_addr_t end;
-
- /* Don't allow wraparound or zero size */
- end = paddr + size - 1;
- if (!size || (end < paddr))
- return NULL;
-
/*
* If the region is h/w uncached, MMU mapping can be elided as optim
* The cast to u32 is fine as this region can only be inside 4GB
@@ -51,55 +43,22 @@ EXPORT_SYMBOL(ioremap);
* ARC hardware uncached region, this one still goes thru the MMU as caller
* might need finer access control (R/W/X)
*/
-void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
+void __iomem *ioremap_prot(phys_addr_t paddr, size_t size,
unsigned long flags)
{
- unsigned int off;
- unsigned long vaddr;
- struct vm_struct *area;
- phys_addr_t end;
pgprot_t prot = __pgprot(flags);
- /* Don't allow wraparound, zero size */
- end = paddr + size - 1;
- if ((!size) || (end < paddr))
- return NULL;
-
- /* An early platform driver might end up here */
- if (!slab_is_available())
- return NULL;
-
/* force uncached */
- prot = pgprot_noncached(prot);
-
- /* Mappings have to be page-aligned */
- off = paddr & ~PAGE_MASK;
- paddr &= PAGE_MASK_PHYS;
- size = PAGE_ALIGN(end + 1) - paddr;
-
- /*
- * Ok, go for it..
- */
- area = get_vm_area(size, VM_IOREMAP);
- if (!area)
- return NULL;
- area->phys_addr = paddr;
- vaddr = (unsigned long)area->addr;
- if (ioremap_page_range(vaddr, vaddr + size, paddr, prot)) {
- vunmap((void __force *)vaddr);
- return NULL;
- }
- return (void __iomem *)(off + (char __iomem *)vaddr);
+ return generic_ioremap_prot(paddr, size, pgprot_noncached(prot));
}
EXPORT_SYMBOL(ioremap_prot);
-
-void iounmap(const volatile void __iomem *addr)
+void iounmap(volatile void __iomem *addr)
{
/* weird double cast to handle phys_addr_t > 32 bits */
if (arc_uncached_addr_space((phys_addr_t)(u32)addr))
return;
- vfree((void *)(PAGE_MASK & (unsigned long __force)addr));
+ generic_iounmap(addr);
}
EXPORT_SYMBOL(iounmap);
diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h
index d02d6ca88e92..a3a82b7158d4 100644
--- a/arch/arm/include/asm/hugetlb.h
+++ b/arch/arm/include/asm/hugetlb.h
@@ -10,6 +10,7 @@
#ifndef _ASM_ARM_HUGETLB_H
#define _ASM_ARM_HUGETLB_H
+#include <asm/cacheflush.h>
#include <asm/page.h>
#include <asm/hugetlb-3level.h>
#include <asm-generic/hugetlb.h>
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index b8cbe03ad260..f40d06ad5d2a 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -39,7 +39,9 @@ static inline void __tlb_remove_table(void *_table)
static inline void
__pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr)
{
- pgtable_pte_page_dtor(pte);
+ struct ptdesc *ptdesc = page_ptdesc(pte);
+
+ pagetable_pte_dtor(ptdesc);
#ifndef CONFIG_ARM_LPAE
/*
@@ -50,17 +52,17 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr)
__tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE);
#endif
- tlb_remove_table(tlb, pte);
+ tlb_remove_ptdesc(tlb, ptdesc);
}
static inline void
__pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
{
#ifdef CONFIG_ARM_LPAE
- struct page *page = virt_to_page(pmdp);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pmdp);
- pgtable_pmd_page_dtor(page);
- tlb_remove_table(tlb, page);
+ pagetable_pmd_dtor(ptdesc);
+ tlb_remove_ptdesc(tlb, ptdesc);
#endif
}
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index ca5302b0b7ee..7cb125497976 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -117,11 +117,10 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
* must use the nested version. This also means we need to
* open-code the spin-locking.
*/
- pte = pte_offset_map(pmd, address);
+ pte = pte_offset_map_nolock(vma->vm_mm, pmd, address, &ptl);
if (!pte)
return 0;
- ptl = pte_lockptr(vma->vm_mm, pmd);
do_pte_lock(ptl);
ret = do_adjust_pte(vma, address, pfn, pte);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 13fc4bb5f792..fdeaee30d167 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -737,11 +737,12 @@ static void __init *early_alloc(unsigned long sz)
static void *__init late_alloc(unsigned long sz)
{
- void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz));
+ void *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_HIGHMEM,
+ get_order(sz));
- if (!ptr || !pgtable_pte_page_ctor(virt_to_page(ptr)))
+ if (!ptdesc || !pagetable_pte_ctor(ptdesc))
BUG();
- return ptr;
+ return ptdesc_to_virt(ptdesc);
}
static pte_t * __init arm_pte_alloc(pmd_t *pmd, unsigned long addr,
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a2511b30d0f6..a7a88322bf46 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -78,6 +78,7 @@ config ARM64
select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION
select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION
select ARCH_KEEP_MEMBLOCK
+ select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_GNU_PROPERTY
select ARCH_USE_MEMTEST
@@ -96,6 +97,7 @@ config ARM64
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_SUPPORTS_PER_VMA_LOCK
+ select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
@@ -348,9 +350,6 @@ config GENERIC_CSUM
config GENERIC_CALIBRATE_DELAY
def_bool y
-config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
- def_bool y
-
config SMP
def_bool y
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 6a4a1ab8eb23..f43a38ac1779 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -10,6 +10,7 @@
#ifndef __ASM_HUGETLB_H
#define __ASM_HUGETLB_H
+#include <asm/cacheflush.h>
#include <asm/page.h>
#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
@@ -60,4 +61,19 @@ extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
#include <asm-generic/hugetlb.h>
+#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
+static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end)
+{
+ unsigned long stride = huge_page_size(hstate_vma(vma));
+
+ if (stride == PMD_SIZE)
+ __flush_tlb_range(vma, start, end, stride, false, 2);
+ else if (stride == PUD_SIZE)
+ __flush_tlb_range(vma, start, end, stride, false, 1);
+ else
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
+}
+
#endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 51d92abf945e..3b694511b98f 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -139,8 +139,7 @@ extern void __memset_io(volatile void __iomem *, int, size_t);
* I/O memory mapping functions.
*/
-bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot);
-#define ioremap_allowed ioremap_allowed
+#define ioremap_prot ioremap_prot
#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index c028afb1cd0b..4cedbaa16f41 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -90,7 +90,7 @@ static inline bool try_page_mte_tagging(struct page *page)
}
void mte_zero_clear_page_tags(void *addr);
-void mte_sync_tags(pte_t old_pte, pte_t pte);
+void mte_sync_tags(pte_t pte);
void mte_copy_page_tags(void *kto, const void *kfrom);
void mte_thread_init_user(void);
void mte_thread_switch(struct task_struct *next);
@@ -122,7 +122,7 @@ static inline bool try_page_mte_tagging(struct page *page)
static inline void mte_zero_clear_page_tags(void *addr)
{
}
-static inline void mte_sync_tags(pte_t old_pte, pte_t pte)
+static inline void mte_sync_tags(pte_t pte)
{
}
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0bd18de9fd97..fe4b913589ee 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -337,18 +337,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
* don't expose tags (instruction fetches don't check tags).
*/
if (system_supports_mte() && pte_access_permitted(pte, false) &&
- !pte_special(pte)) {
- pte_t old_pte = READ_ONCE(*ptep);
- /*
- * We only need to synchronise if the new PTE has tags enabled
- * or if swapping in (in which case another mapping may have
- * set tags in the past even if this PTE isn't tagged).
- * (!pte_none() && !pte_present()) is an open coded version of
- * is_swap_pte()
- */
- if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte)))
- mte_sync_tags(old_pte, pte);
- }
+ !pte_special(pte) && pte_tagged(pte))
+ mte_sync_tags(pte);
__check_safe_pte_update(mm, ptep, pte);
@@ -358,7 +348,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- page_table_check_pte_set(mm, addr, ptep, pte);
+ page_table_check_pte_set(mm, ptep, pte);
return __set_pte_at(mm, addr, ptep, pte);
}
@@ -534,14 +524,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
- page_table_check_pmd_set(mm, addr, pmdp, pmd);
+ page_table_check_pmd_set(mm, pmdp, pmd);
return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
- page_table_check_pud_set(mm, addr, pudp, pud);
+ page_table_check_pud_set(mm, pudp, pud);
return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
}
@@ -938,7 +928,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
{
pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
- page_table_check_pte_clear(mm, address, pte);
+ page_table_check_pte_clear(mm, pte);
return pte;
}
@@ -950,7 +940,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
{
pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
- page_table_check_pmd_clear(mm, address, pmd);
+ page_table_check_pmd_clear(mm, pmd);
return pmd;
}
@@ -986,7 +976,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
- page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
+ page_table_check_pmd_set(vma->vm_mm, pmdp, pmd);
return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
}
#endif
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index c995d1f4594f..2c29239d05c3 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -75,18 +75,20 @@ static inline void tlb_flush(struct mmu_gather *tlb)
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
unsigned long addr)
{
- pgtable_pte_page_dtor(pte);
- tlb_remove_table(tlb, pte);
+ struct ptdesc *ptdesc = page_ptdesc(pte);
+
+ pagetable_pte_dtor(ptdesc);
+ tlb_remove_ptdesc(tlb, ptdesc);
}
#if CONFIG_PGTABLE_LEVELS > 2
static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
unsigned long addr)
{
- struct page *page = virt_to_page(pmdp);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pmdp);
- pgtable_pmd_page_dtor(page);
- tlb_remove_table(tlb, page);
+ pagetable_pmd_dtor(ptdesc);
+ tlb_remove_ptdesc(tlb, ptdesc);
}
#endif
@@ -94,7 +96,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
unsigned long addr)
{
- tlb_remove_table(tlb, virt_to_page(pudp));
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(pudp));
}
#endif
diff --git a/arch/arm64/include/asm/tlbbatch.h b/arch/arm64/include/asm/tlbbatch.h
new file mode 100644
index 000000000000..fedb0b87b8db
--- /dev/null
+++ b/arch/arm64/include/asm/tlbbatch.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_ARM64_TLBBATCH_H
+#define _ARCH_ARM64_TLBBATCH_H
+
+struct arch_tlbflush_unmap_batch {
+ /*
+ * For arm64, HW can do tlb shootdown, so we don't
+ * need to record cpumask for sending IPI
+ */
+};
+
+#endif /* _ARCH_ARM64_TLBBATCH_H */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 412a3b9a3c25..55b50e1d4a84 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -13,6 +13,7 @@
#include <linux/bitfield.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
+#include <linux/mmu_notifier.h>
#include <asm/cputype.h>
#include <asm/mmu.h>
@@ -252,17 +253,26 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
__tlbi(aside1is, asid);
__tlbi_user(aside1is, asid);
dsb(ish);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
-static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
- unsigned long uaddr)
+static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
+ unsigned long uaddr)
{
unsigned long addr;
dsb(ishst);
- addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
+ addr = __TLBI_VADDR(uaddr, ASID(mm));
__tlbi(vale1is, addr);
__tlbi_user(vale1is, addr);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK,
+ (uaddr & PAGE_MASK) + PAGE_SIZE);
+}
+
+static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
+ unsigned long uaddr)
+{
+ return __flush_tlb_page_nosync(vma->vm_mm, uaddr);
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -272,6 +282,53 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
dsb(ish);
}
+static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
+ /*
+ * TLB flush deferral is not required on systems which are affected by
+ * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation
+ * will have two consecutive TLBI instructions with a dsb(ish) in between
+ * defeating the purpose (i.e save overall 'dsb ish' cost).
+ */
+ if (unlikely(cpus_have_const_cap(ARM64_WORKAROUND_REPEAT_TLBI)))
+ return false;
+#endif
+ return true;
+}
+
+static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr)
+{
+ __flush_tlb_page_nosync(mm, uaddr);
+}
+
+/*
+ * If mprotect/munmap/etc occurs during TLB batched flushing, we need to
+ * synchronise all the TLBI issued with a DSB to avoid the race mentioned in
+ * flush_tlb_batched_pending().
+ */
+static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
+{
+ dsb(ish);
+}
+
+/*
+ * To support TLB batched flush for multiple pages unmapping, we only send
+ * the TLBI for each page in arch_tlbbatch_add_pending() and wait for the
+ * completion at the end in arch_tlbbatch_flush(). Since we've already issued
+ * TLBI for each page so only a DSB is needed to synchronise its effect on the
+ * other CPUs.
+ *
+ * This will save the time waiting on DSB comparing issuing a TLBI;DSB sequence
+ * for each page.
+ */
+static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+ dsb(ish);
+}
+
/*
* This is meant to avoid soft lock-ups on large TLB flushing ranges and not
* necessarily a performance improvement.
@@ -358,6 +415,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
scale++;
}
dsb(ish);
+ mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
}
static inline void flush_tlb_range(struct vm_area_struct *vma,
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 4c5ef9b20065..4edecaac8f91 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -35,41 +35,18 @@ DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
#endif
-static void mte_sync_page_tags(struct page *page, pte_t old_pte,
- bool check_swap, bool pte_is_tagged)
-{
- if (check_swap && is_swap_pte(old_pte)) {
- swp_entry_t entry = pte_to_swp_entry(old_pte);
-
- if (!non_swap_entry(entry))
- mte_restore_tags(entry, page);
- }
-
- if (!pte_is_tagged)
- return;
-
- if (try_page_mte_tagging(page)) {
- mte_clear_page_tags(page_address(page));
- set_page_mte_tagged(page);
- }
-}
-
-void mte_sync_tags(pte_t old_pte, pte_t pte)
+void mte_sync_tags(pte_t pte)
{
struct page *page = pte_page(pte);
long i, nr_pages = compound_nr(page);
- bool check_swap = nr_pages == 1;
- bool pte_is_tagged = pte_tagged(pte);
-
- /* Early out if there's nothing to do */
- if (!check_swap && !pte_is_tagged)
- return;
/* if PG_mte_tagged is set, tags have already been initialised */
- for (i = 0; i < nr_pages; i++, page++)
- if (!page_mte_tagged(page))
- mte_sync_page_tags(page, old_pte, check_swap,
- pte_is_tagged);
+ for (i = 0; i < nr_pages; i++, page++) {
+ if (try_page_mte_tagging(page)) {
+ mte_clear_page_tags(page_address(page));
+ set_page_mte_tagged(page);
+ }
+ }
/* ensure the tags are visible before the PTE is set */
smp_wmb();
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 3fe516b32577..103fcbdc6552 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -587,7 +587,6 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
-#ifdef CONFIG_PER_VMA_LOCK
if (!(mm_flags & FAULT_FLAG_USER))
goto lock_mmap;
@@ -615,7 +614,6 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
return 0;
}
lock_mmap:
-#endif /* CONFIG_PER_VMA_LOCK */
retry:
vma = lock_mm_and_find_vma(mm, addr, regs);
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index c5af103d4ad4..269f2f63ab7d 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -3,20 +3,22 @@
#include <linux/mm.h>
#include <linux/io.h>
-bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+ unsigned long prot)
{
unsigned long last_addr = phys_addr + size - 1;
/* Don't allow outside PHYS_MASK */
if (last_addr & ~PHYS_MASK)
- return false;
+ return NULL;
/* Don't allow RAM to be mapped. */
if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr))))
- return false;
+ return NULL;
- return true;
+ return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
}
+EXPORT_SYMBOL(ioremap_prot);
/*
* Must be called after early_fixmap_init
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 95d360805f8a..47781bec6171 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -426,6 +426,7 @@ static phys_addr_t __pgd_pgtable_alloc(int shift)
static phys_addr_t pgd_pgtable_alloc(int shift)
{
phys_addr_t pa = __pgd_pgtable_alloc(shift);
+ struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa));
/*
* Call proper page table ctor in case later we need to
@@ -433,12 +434,12 @@ static phys_addr_t pgd_pgtable_alloc(int shift)
* this pre-allocated page table.
*
* We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is
- * folded, and if so pgtable_pmd_page_ctor() becomes nop.
+ * folded, and if so pagetable_pte_ctor() becomes nop.
*/
if (shift == PAGE_SHIFT)
- BUG_ON(!pgtable_pte_page_ctor(phys_to_page(pa)));
+ BUG_ON(!pagetable_pte_ctor(ptdesc));
else if (shift == PMD_SHIFT)
- BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa)));
+ BUG_ON(!pagetable_pmd_ctor(ptdesc));
return pa;
}
diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index 7d57e5da0914..9c84c9012e53 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -63,8 +63,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
#define __pte_free_tlb(tlb, pte, address) \
do { \
- pgtable_pte_page_dtor(pte); \
- tlb_remove_page(tlb, pte); \
+ pagetable_pte_dtor(page_ptdesc(pte)); \
+ tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \
} while (0)
extern void pagetable_init(void);
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 6726f4941015..a880ee067d2e 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -25,6 +25,7 @@ config HEXAGON
select NEED_SG_DMA_LENGTH
select NO_IOPORT_MAP
select GENERIC_IOMAP
+ select GENERIC_IOREMAP
select GENERIC_SMP_IDLE_THREAD
select STACKTRACE_SUPPORT
select GENERIC_CLOCKEVENTS_BROADCAST
diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index 46a099de85b7..e2b308e32a37 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h
@@ -27,8 +27,6 @@
extern int remap_area_pages(unsigned long start, unsigned long phys_addr,
unsigned long end, unsigned long flags);
-extern void iounmap(const volatile void __iomem *addr);
-
/* Defined in lib/io.c, needed for smc91x driver. */
extern void __raw_readsw(const void __iomem *addr, void *data, int wordlen);
extern void __raw_writesw(void __iomem *addr, const void *data, int wordlen);
@@ -170,8 +168,13 @@ static inline void writel(u32 data, volatile void __iomem *addr)
#define writew_relaxed __raw_writew
#define writel_relaxed __raw_writel
-void __iomem *ioremap(unsigned long phys_addr, unsigned long size);
-#define ioremap_uc(X, Y) ioremap((X), (Y))
+/*
+ * I/O memory mapping functions.
+ */
+#define _PAGE_IOREMAP (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ (__HEXAGON_C_DEV << 6))
+
+#define ioremap_uc(addr, size) ioremap((addr), (size))
#define __raw_writel writel
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index f0c47e6a7427..55988625e6fb 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -87,10 +87,10 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
max_kernel_seg = pmdindex;
}
-#define __pte_free_tlb(tlb, pte, addr) \
-do { \
- pgtable_pte_page_dtor((pte)); \
- tlb_remove_page((tlb), (pte)); \
+#define __pte_free_tlb(tlb, pte, addr) \
+do { \
+ pagetable_pte_dtor((page_ptdesc(pte))); \
+ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \
} while (0)
#endif
diff --git a/arch/hexagon/kernel/hexagon_ksyms.c b/arch/hexagon/kernel/hexagon_ksyms.c
index ec56ce2d92a2..36a80e31d187 100644
--- a/arch/hexagon/kernel/hexagon_ksyms.c
+++ b/arch/hexagon/kernel/hexagon_ksyms.c
@@ -14,12 +14,10 @@
EXPORT_SYMBOL(__clear_user_hexagon);
EXPORT_SYMBOL(raw_copy_from_user);
EXPORT_SYMBOL(raw_copy_to_user);
-EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(__vmgetie);
EXPORT_SYMBOL(__vmsetie);
EXPORT_SYMBOL(__vmyield);
EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(ioremap);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
diff --git a/arch/hexagon/mm/Makefile b/arch/hexagon/mm/Makefile
index 49911a906fd0..ba4b04d962d6 100644
--- a/arch/hexagon/mm/Makefile
+++ b/arch/hexagon/mm/Makefile
@@ -3,5 +3,5 @@
# Makefile for Hexagon memory management subsystem
#
-obj-y := init.o ioremap.o uaccess.o vm_fault.o cache.o
+obj-y := init.o uaccess.o vm_fault.o cache.o
obj-y += copy_to_user.o copy_from_user.o vm_tlb.o
diff --git a/arch/hexagon/mm/ioremap.c b/arch/hexagon/mm/ioremap.c
deleted file mode 100644
index 255c5b1ee1a7..000000000000
--- a/arch/hexagon/mm/ioremap.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * I/O remap functions for Hexagon
- *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-#include <linux/io.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-
-void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
-{
- unsigned long last_addr, addr;
- unsigned long offset = phys_addr & ~PAGE_MASK;
- struct vm_struct *area;
-
- pgprot_t prot = __pgprot(_PAGE_PRESENT|_PAGE_READ|_PAGE_WRITE
- |(__HEXAGON_C_DEV << 6));
-
- last_addr = phys_addr + size - 1;
-
- /* Wrapping not allowed */
- if (!size || (last_addr < phys_addr))
- return NULL;
-
- /* Rounds up to next page size, including whole-page offset */
- size = PAGE_ALIGN(offset + size);
-
- area = get_vm_area(size, VM_IOREMAP);
- addr = (unsigned long)area->addr;
-
- if (ioremap_page_range(addr, addr+size, phys_addr, prot)) {
- vunmap((void *)addr);
- return NULL;
- }
-
- return (void __iomem *) (offset + addr);
-}
-
-void iounmap(const volatile void __iomem *addr)
-{
- vunmap((void *) ((unsigned long) addr & PAGE_MASK));
-}
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 2cd93e6bf0fe..3ab75f36c037 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -47,6 +47,7 @@ config IA64
select GENERIC_IRQ_LEGACY
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
+ select GENERIC_IOREMAP
select GENERIC_SMP_IDLE_THREAD
select ARCH_TASK_STRUCT_ON_STACK
select ARCH_TASK_STRUCT_ALLOCATOR
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 83a492c8d298..eedc0afa8cad 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -243,15 +243,12 @@ static inline void outsl(unsigned long port, const void *src,
# ifdef __KERNEL__
-extern void __iomem * ioremap(unsigned long offset, unsigned long size);
+#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL)
+
extern void __iomem * ioremap_uc(unsigned long offset, unsigned long size);
-extern void iounmap (volatile void __iomem *addr);
-static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned long size)
-{
- return ioremap(phys_addr, size);
-}
-#define ioremap ioremap
-#define ioremap_cache ioremap_cache
+
+#define ioremap_prot ioremap_prot
+#define ioremap_cache ioremap
#define ioremap_uc ioremap_uc
#define iounmap iounmap
diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c
index 92b81bc91397..711b6abc822e 100644
--- a/arch/ia64/mm/ioremap.c
+++ b/arch/ia64/mm/ioremap.c
@@ -29,13 +29,9 @@ early_ioremap (unsigned long phys_addr, unsigned long size)
return __ioremap_uc(phys_addr);
}
-void __iomem *
-ioremap (unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+ unsigned long flags)
{
- void __iomem *addr;
- struct vm_struct *area;
- unsigned long offset;
- pgprot_t prot;
u64 attr;
unsigned long gran_base, gran_size;
unsigned long page_base;
@@ -68,36 +64,12 @@ ioremap (unsigned long phys_addr, unsigned long size)
*/
page_base = phys_addr & PAGE_MASK;
size = PAGE_ALIGN(phys_addr + size) - page_base;
- if (efi_mem_attribute(page_base, size) & EFI_MEMORY_WB) {
- prot = PAGE_KERNEL;
-
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
-
- /*
- * Ok, go for it..
- */
- area = get_vm_area(size, VM_IOREMAP);
- if (!area)
- return NULL;
-
- area->phys_addr = phys_addr;
- addr = (void __iomem *) area->addr;
- if (ioremap_page_range((unsigned long) addr,
- (unsigned long) addr + size, phys_addr, prot)) {
- vunmap((void __force *) addr);
- return NULL;
- }
-
- return (void __iomem *) (offset + (char __iomem *)addr);
- }
+ if (efi_mem_attribute(page_base, size) & EFI_MEMORY_WB)
+ return generic_ioremap_prot(phys_addr, size, __pgprot(flags));
return __ioremap_uc(phys_addr);
}
-EXPORT_SYMBOL(ioremap);
+EXPORT_SYMBOL(ioremap_prot);
void __iomem *
ioremap_uc(unsigned long phys_addr, unsigned long size)
@@ -114,8 +86,7 @@ early_iounmap (volatile void __iomem *addr, unsigned long size)
{
}
-void
-iounmap (volatile void __iomem *addr)
+void iounmap(volatile void __iomem *addr)
{
if (REGION_NUMBER(addr) == RGN_GATE)
vunmap((void *) ((unsigned long) addr & PAGE_MASK));
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index e71d5bf2cee0..dc56ddf9ba03 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -60,7 +60,7 @@ config LOONGARCH
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_LD_ORPHAN_WARN
- select ARCH_WANT_OPTIMIZE_VMEMMAP
+ select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select ARCH_WANTS_NO_INSTR
select BUILDTIME_TABLE_SORT
select COMMON_CLK
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index 1c9410220040..0dcb36b32cb2 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -5,8 +5,6 @@
#ifndef _ASM_IO_H
#define _ASM_IO_H
-#define ARCH_HAS_IOREMAP_WC
-
#include <linux/kernel.h>
#include <linux/types.h>
diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
index af1d1e4a6965..23f5b1107246 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -45,9 +45,9 @@ extern void pagetable_init(void);
extern pgd_t *pgd_alloc(struct mm_struct *mm);
#define __pte_free_tlb(tlb, pte, address) \
-do { \
- pgtable_pte_page_dtor(pte); \
- tlb_remove_page((tlb), pte); \
+do { \
+ pagetable_pte_dtor(page_ptdesc(pte)); \
+ tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \
} while (0)
#ifndef __PAGETABLE_PMD_FOLDED
@@ -55,18 +55,18 @@ do { \
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
{
pmd_t *pmd;
- struct page *pg;
+ struct ptdesc *ptdesc;
- pg = alloc_page(GFP_KERNEL_ACCOUNT);
- if (!pg)
+ ptdesc = pagetable_alloc(GFP_KERNEL_ACCOUNT, 0);
+ if (!ptdesc)
return NULL;
- if (!pgtable_pmd_page_ctor(pg)) {
- __free_page(pg);
+ if (!pagetable_pmd_ctor(ptdesc)) {
+ pagetable_free(ptdesc);
return NULL;
}
- pmd = (pmd_t *)page_address(pg);
+ pmd = ptdesc_address(ptdesc);
pmd_init(pmd);
return pmd;
}
@@ -80,10 +80,13 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
pud_t *pud;
+ struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0);
- pud = (pud_t *) __get_free_page(GFP_KERNEL);
- if (pud)
- pud_init(pud);
+ if (!ptdesc)
+ return NULL;
+ pud = ptdesc_address(ptdesc);
+
+ pud_init(pud);
return pud;
}
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index 36a6dc0148ae..5bd102b51f7c 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -11,10 +11,11 @@
pgd_t *pgd_alloc(struct mm_struct *mm)
{
- pgd_t *ret, *init;
+ pgd_t *init, *ret = NULL;
+ struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0);
- ret = (pgd_t *) __get_free_page(GFP_KERNEL);
- if (ret) {
+ if (ptdesc) {
+ ret = (pgd_t *)ptdesc_address(ptdesc);
init = pgd_offset(&init_mm, 0UL);
pgd_init(ret);
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h
index d41fa488453b..6a0abd4846c6 100644
--- a/arch/m68k/include/asm/io_mm.h
+++ b/arch/m68k/include/asm/io_mm.h
@@ -26,8 +26,6 @@
#include <asm/virtconvert.h>
#include <asm/kmap.h>
-#include <asm-generic/iomap.h>
-
#ifdef CONFIG_ATARI
#define atari_readb raw_inb
#define atari_writeb raw_outb
diff --git a/arch/m68k/include/asm/kmap.h b/arch/m68k/include/asm/kmap.h
index dec05743d426..4efb3efa593a 100644
--- a/arch/m68k/include/asm/kmap.h
+++ b/arch/m68k/include/asm/kmap.h
@@ -4,8 +4,6 @@
#ifdef CONFIG_MMU
-#define ARCH_HAS_IOREMAP_WT
-
/* Values for nocacheflag and cmode */
#define IOMAP_FULL_CACHING 0
#define IOMAP_NOCACHE_SER 1
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 5c2c0a864524..302c5bf67179 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -5,22 +5,22 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
-extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
- free_page((unsigned long) pte);
+ pagetable_free(virt_to_ptdesc(pte));
}
extern const char bad_pmd_string[];
-extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
- unsigned long page = __get_free_page(GFP_DMA);
+ struct ptdesc *ptdesc = pagetable_alloc((GFP_DMA | __GFP_ZERO) &
+ ~__GFP_HIGHMEM, 0);
- if (!page)
+ if (!ptdesc)
return NULL;
- memset((void *)page, 0, PAGE_SIZE);
- return (pte_t *) (page);
+ return ptdesc_address(ptdesc);
}
extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
@@ -35,36 +35,34 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pgtable,
unsigned long address)
{
- struct page *page = virt_to_page(pgtable);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
- pgtable_pte_page_dtor(page);
- __free_page(page);
+ pagetable_pte_dtor(ptdesc);
+ pagetable_free(ptdesc);
}
static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
- struct page *page = alloc_pages(GFP_DMA, 0);
+ struct ptdesc *ptdesc = pagetable_alloc(GFP_DMA | __GFP_ZERO, 0);
pte_t *pte;
- if (!page)
+ if (!ptdesc)
return NULL;
- if (!pgtable_pte_page_ctor(page)) {
- __free_page(page);
+ if (!pagetable_pte_ctor(ptdesc)) {
+ pagetable_free(ptdesc);
return NULL;
}
- pte = page_address(page);
- clear_page(pte);
-
+ pte = ptdesc_address(ptdesc);
return pte;
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pgtable)
{
- struct page *page = virt_to_page(pgtable);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
- pgtable_pte_page_dtor(page);
- __free_page(page);
+ pagetable_pte_dtor(ptdesc);
+ pagetable_free(ptdesc);
}
/*
@@ -75,16 +73,19 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pgtable)
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- free_page((unsigned long) pgd);
+ pagetable_free(virt_to_ptdesc(pgd));
}
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *new_pgd;
+ struct ptdesc *ptdesc = pagetable_alloc((GFP_DMA | __GFP_NOWARN) &
+ ~__GFP_HIGHMEM, 0);
- new_pgd = (pgd_t *)__get_free_page(GFP_DMA | __GFP_NOWARN);
- if (!new_pgd)
+ if (!ptdesc)
return NULL;
+ new_pgd = ptdesc_address(ptdesc);
+
memcpy(new_pgd, swapper_pg_dir, PTRS_PER_PGD * sizeof(pgd_t));
memset(new_pgd, 0, PAGE_OFFSET >> PGDIR_SHIFT);
return new_pgd;
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 198036aff519..ff48573db2c0 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -17,10 +17,10 @@
extern const char bad_pmd_string[];
-#define __pte_free_tlb(tlb,pte,addr) \
-do { \
- pgtable_pte_page_dtor(pte); \
- tlb_remove_page((tlb), pte); \
+#define __pte_free_tlb(tlb, pte, addr) \
+do { \
+ pagetable_pte_dtor(page_ptdesc(pte)); \
+ tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \
} while (0)
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index c75984e2d86b..594575a0780c 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -161,7 +161,7 @@ void *get_pointer_table(int type)
* m68k doesn't have SPLIT_PTE_PTLOCKS for not having
* SMP.
*/
- pgtable_pte_page_ctor(virt_to_page(page));
+ pagetable_pte_ctor(virt_to_ptdesc(page));
}
mmu_page_ctor(page);
@@ -201,7 +201,7 @@ int free_pointer_table(void *table, int type)
list_del(dp);
mmu_page_dtor((void *)page);
if (type == TABLE_PTE)
- pgtable_pte_page_dtor(virt_to_page((void *)page));
+ pagetable_pte_dtor(virt_to_ptdesc((void *)page));
free_page (page);
return 1;
} else if (ptable_list[type].next != dp) {
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index affd21e9c20b..062dd4e6b954 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -12,8 +12,6 @@
#ifndef _ASM_IO_H
#define _ASM_IO_H
-#define ARCH_HAS_IOREMAP_WC
-
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/types.h>
@@ -25,7 +23,6 @@
#include <asm/byteorder.h>
#include <asm/cpu.h>
#include <asm/cpu-features.h>
-#include <asm-generic/iomap.h>
#include <asm/page.h>
#include <asm/pgtable-bits.h>
#include <asm/processor.h>
@@ -210,6 +207,8 @@ void iounmap(const volatile void __iomem *addr);
#define ioremap_wc(offset, size) \
ioremap_prot((offset), (size), boot_cpu_data.writecombine)
+#include <asm-generic/iomap.h>
+
#if defined(CONFIG_CPU_CAVIUM_OCTEON)
#define war_io_reorder_wmb() wmb()
#else
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index f72e737dda21..40e40a7eb94a 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -51,13 +51,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- free_pages((unsigned long)pgd, PGD_TABLE_ORDER);
+ pagetable_free(virt_to_ptdesc(pgd));
}
-#define __pte_free_tlb(tlb,pte,address) \
-do { \
- pgtable_pte_page_dtor(pte); \
- tlb_remove_page((tlb), pte); \
+#define __pte_free_tlb(tlb, pte, address) \
+do { \
+ pagetable_pte_dtor(page_ptdesc(pte)); \
+ tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \
} while (0)
#ifndef __PAGETABLE_PMD_FOLDED
@@ -65,18 +65,18 @@ do { \
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
{
pmd_t *pmd;
- struct page *pg;
+ struct ptdesc *ptdesc;
- pg = alloc_pages(GFP_KERNEL_ACCOUNT, PMD_TABLE_ORDER);
- if (!pg)
+ ptdesc = pagetable_alloc(GFP_KERNEL_ACCOUNT, PMD_TABLE_ORDER);
+ if (!ptdesc)
return NULL;
- if (!pgtable_pmd_page_ctor(pg)) {
- __free_pages(pg, PMD_TABLE_ORDER);
+ if (!pagetable_pmd_ctor(ptdesc)) {
+ pagetable_free(ptdesc);
return NULL;
}
- pmd = (pmd_t *)page_address(pg);
+ pmd = ptdesc_address(ptdesc);
pmd_init(pmd);
return pmd;
}
@@ -90,10 +90,14 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
pud_t *pud;
+ struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM,
+ PUD_TABLE_ORDER);
- pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_TABLE_ORDER);
- if (pud)
- pud_init(pud);
+ if (!ptdesc)
+ return NULL;
+ pud = ptdesc_address(ptdesc);
+
+ pud_init(pud);
return pud;
}
diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c
index b13314be5d0e..1506e458040d 100644
--- a/arch/mips/mm/pgtable.c
+++ b/arch/mips/mm/pgtable.c
@@ -10,10 +10,12 @@
pgd_t *pgd_alloc(struct mm_struct *mm)
{
- pgd_t *ret, *init;
+ pgd_t *init, *ret = NULL;
+ struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM,
+ PGD_TABLE_ORDER);
- ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER);
- if (ret) {
+ if (ptdesc) {
+ ret = ptdesc_address(ptdesc);
init = pgd_offset(&init_mm, 0UL);
pgd_init(ret);
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index ecd1657bb2ce..ce6bb8e74271 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -28,10 +28,10 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
extern pgd_t *pgd_alloc(struct mm_struct *mm);
-#define __pte_free_tlb(tlb, pte, addr) \
- do { \
- pgtable_pte_page_dtor(pte); \
- tlb_remove_page((tlb), (pte)); \
+#define __pte_free_tlb(tlb, pte, addr) \
+ do { \
+ pagetable_pte_dtor(page_ptdesc(pte)); \
+ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \
} while (0)
#endif /* _ASM_NIOS2_PGALLOC_H */
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index c7f282f60f64..fd9bb76a610b 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -21,6 +21,7 @@ config OPENRISC
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
select GENERIC_PCI_IOMAP
+ select GENERIC_IOREMAP
select GENERIC_CPU_DEVICES
select HAVE_PCI
select HAVE_UID16
diff --git a/arch/openrisc/include/asm/io.h b/arch/openrisc/include/asm/io.h
index ee6043a03173..5a6f0f16a5ce 100644
--- a/arch/openrisc/include/asm/io.h
+++ b/arch/openrisc/include/asm/io.h
@@ -15,6 +15,8 @@
#define __ASM_OPENRISC_IO_H
#include <linux/types.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
/*
* PCI: We do not use IO ports in OpenRISC
@@ -27,11 +29,10 @@
#define PIO_OFFSET 0
#define PIO_MASK 0
-#define ioremap ioremap
-void __iomem *ioremap(phys_addr_t offset, unsigned long size);
-
-#define iounmap iounmap
-extern void iounmap(volatile void __iomem *addr);
+/*
+ * I/O memory mapping functions.
+ */
+#define _PAGE_IOREMAP (pgprot_val(PAGE_KERNEL) | _PAGE_CI)
#include <asm-generic/io.h>
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index b7b2b8d16fad..c6a73772a546 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -66,10 +66,10 @@ extern inline pgd_t *pgd_alloc(struct mm_struct *mm)
extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
-#define __pte_free_tlb(tlb, pte, addr) \
-do { \
- pgtable_pte_page_dtor(pte); \
- tlb_remove_page((tlb), (pte)); \
+#define __pte_free_tlb(tlb, pte, addr) \
+do { \
+ pagetable_pte_dtor(page_ptdesc(pte)); \
+ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \
} while (0)
#endif
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 8ec0dafecf25..91c8259d4b7e 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -22,88 +22,6 @@
extern int mem_init_done;
-static unsigned int fixmaps_used __initdata;
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem *__ref ioremap(phys_addr_t addr, unsigned long size)
-{
- phys_addr_t p;
- unsigned long v;
- unsigned long offset, last_addr;
- struct vm_struct *area = NULL;
-
- /* Don't allow wraparound or zero size */
- last_addr = addr + size - 1;
- if (!size || last_addr < addr)
- return NULL;
-
- /*
- * Mappings have to be page-aligned
- */
- offset = addr & ~PAGE_MASK;
- p = addr & PAGE_MASK;
- size = PAGE_ALIGN(last_addr + 1) - p;
-
- if (likely(mem_init_done)) {
- area = get_vm_area(size, VM_IOREMAP);
- if (!area)
- return NULL;
- v = (unsigned long)area->addr;
- } else {
- if ((fixmaps_used + (size >> PAGE_SHIFT)) > FIX_N_IOREMAPS)
- return NULL;
- v = fix_to_virt(FIX_IOREMAP_BEGIN + fixmaps_used);
- fixmaps_used += (size >> PAGE_SHIFT);
- }
-
- if (ioremap_page_range(v, v + size, p,
- __pgprot(pgprot_val(PAGE_KERNEL) | _PAGE_CI))) {
- if (likely(mem_init_done))
- vfree(area->addr);
- else
- fixmaps_used -= (size >> PAGE_SHIFT);
- return NULL;
- }
-
- return (void __iomem *)(offset + (char *)v);
-}
-EXPORT_SYMBOL(ioremap);
-
-void iounmap(volatile void __iomem *addr)
-{
- /* If the page is from the fixmap pool then we just clear out
- * the fixmap mapping.
- */
- if (unlikely((unsigned long)addr > FIXADDR_START)) {
- /* This is a bit broken... we don't really know
- * how big the area is so it's difficult to know
- * how many fixed pages to invalidate...
- * just flush tlb and hope for the best...
- * consider this a FIXME
- *
- * Really we should be clearing out one or more page
- * table entries for these virtual addresses so that
- * future references cause a page fault... for now, we
- * rely on two things:
- * i) this code never gets called on known boards
- * ii) invalid accesses to the freed areas aren't made
- */
- flush_tlb_all();
- return;
- }
-
- return vfree((void *)(PAGE_MASK & (unsigned long)addr));
-}
-EXPORT_SYMBOL(iounmap);
-
/**
* OK, this one's a bit tricky... ioremap can get called before memory is
* initialized (early serial console does this) and will want to alloc a page
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 4cb46d5c64a2..4cda9f0a3277 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -36,6 +36,7 @@ config PARISC
select GENERIC_ATOMIC64 if !64BIT
select GENERIC_IRQ_PROBE
select GENERIC_PCI_IOMAP
+ select GENERIC_IOREMAP
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_SMP_IDLE_THREAD
select GENERIC_ARCH_TOPOLOGY if SMP
diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h
index c05e781be2f5..366537042465 100644
--- a/arch/parisc/include/asm/io.h
+++ b/arch/parisc/include/asm/io.h
@@ -125,12 +125,17 @@ static inline void gsc_writeq(unsigned long long val, unsigned long addr)
/*
* The standard PCI ioremap interfaces
*/
-void __iomem *ioremap(unsigned long offset, unsigned long size);
-#define ioremap_wc ioremap
-#define ioremap_uc ioremap
-#define pci_iounmap pci_iounmap
+#define ioremap_prot ioremap_prot
+
+#define _PAGE_IOREMAP (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | \
+ _PAGE_ACCESSED | _PAGE_NO_CACHE)
-extern void iounmap(const volatile void __iomem *addr);
+#define ioremap_wc(addr, size) \
+ ioremap_prot((addr), (size), _PAGE_IOREMAP)
+#define ioremap_uc(addr, size) \
+ ioremap_prot((addr), (size), _PAGE_IOREMAP)
+
+#define pci_iounmap pci_iounmap
void memset_io(volatile void __iomem *addr, unsigned char val, int count);
void memcpy_fromio(void *dst, const volatile void __iomem *src, int count);
diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c
index 345ff0b66499..fd996472dfe7 100644
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -13,25 +13,9 @@
#include <linux/io.h>
#include <linux/mm.h>
-/*
- * Generic mapping function (not visible outside):
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+ unsigned long prot)
{
- void __iomem *addr;
- struct vm_struct *area;
- unsigned long offset, last_addr;
- pgprot_t pgprot;
-
#ifdef CONFIG_EISA
unsigned long end = phys_addr + size - 1;
/* Support EISA addresses */
@@ -40,11 +24,6 @@ void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
phys_addr |= F_EXTEND(0xfc000000);
#endif
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
/*
* Don't allow anybody to remap normal RAM that we're using..
*/
@@ -62,39 +41,6 @@ void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
}
}
- pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY |
- _PAGE_ACCESSED | _PAGE_NO_CACHE);
-
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr + 1) - phys_addr;
-
- /*
- * Ok, go for it..
- */
- area = get_vm_area(size, VM_IOREMAP);
- if (!area)
- return NULL;
-
- addr = (void __iomem *) area->addr;
- if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
- phys_addr, pgprot)) {
- vunmap(addr);
- return NULL;
- }
-
- return (void __iomem *) (offset + (char __iomem *)addr);
-}
-EXPORT_SYMBOL(ioremap);
-
-void iounmap(const volatile void __iomem *io_addr)
-{
- unsigned long addr = (unsigned long)io_addr & PAGE_MASK;
-
- if (is_vmalloc_addr((void *)addr))
- vunmap((void *)addr);
+ return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
}
-EXPORT_SYMBOL(iounmap);
+EXPORT_SYMBOL(ioremap_prot);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 0b1172cbeccb..938294c996dc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -157,6 +157,7 @@ config PPC
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
+ select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if PPC_RADIX_MMU
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
@@ -174,6 +175,7 @@ config PPC
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WANT_LD_ORPHAN_WARN
+ select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if PPC_RADIX_MMU
select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if PPC_BOOK3S_32 || PPC_8xx
select ARCH_WEAK_RELEASE_ACQUIRE
select BINFMT_ELF
@@ -193,6 +195,7 @@ config PPC
select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC
select GENERIC_EARLY_IOREMAP
select GENERIC_GETTIMEOFDAY
+ select GENERIC_IOREMAP
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
select GENERIC_PCI_IOMAP if PCI
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index d4a19e6547ac..6e70ae511631 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -138,7 +138,16 @@ static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
}
#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS)
+
+/*
+ * pud comparison that will work with both pte and page table pointer.
+ */
+static inline int hash__pud_same(pud_t pud_a, pud_t pud_b)
+{
+ return (((pud_raw(pud_a) ^ pud_raw(pud_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
+}
#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS)
+
static inline int hash__p4d_bad(p4d_t p4d)
{
return (p4d_val(p4d) == 0);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 4acc9690f599..a8204566cfd0 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -921,8 +921,29 @@ static inline pud_t pte_pud(pte_t pte)
{
return __pud_raw(pte_raw(pte));
}
+
+static inline pte_t *pudp_ptep(pud_t *pud)
+{
+ return (pte_t *)pud;
+}
+
+#define pud_pfn(pud) pte_pfn(pud_pte(pud))
+#define pud_dirty(pud) pte_dirty(pud_pte(pud))
+#define pud_young(pud) pte_young(pud_pte(pud))
+#define pud_mkold(pud) pte_pud(pte_mkold(pud_pte(pud)))
+#define pud_wrprotect(pud) pte_pud(pte_wrprotect(pud_pte(pud)))
+#define pud_mkdirty(pud) pte_pud(pte_mkdirty(pud_pte(pud)))
+#define pud_mkclean(pud) pte_pud(pte_mkclean(pud_pte(pud)))
+#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
+#define pud_mkwrite(pud) pte_pud(pte_mkwrite(pud_pte(pud)))
#define pud_write(pud) pte_write(pud_pte(pud))
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#define pud_soft_dirty(pmd) pte_soft_dirty(pud_pte(pud))
+#define pud_mksoft_dirty(pmd) pte_pud(pte_mksoft_dirty(pud_pte(pud)))
+#define pud_clear_soft_dirty(pmd) pte_pud(pte_clear_soft_dirty(pud_pte(pud)))
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
static inline int pud_bad(pud_t pud)
{
if (radix_enabled())
@@ -1115,15 +1136,24 @@ static inline bool pmd_access_permitted(pmd_t pmd, bool write)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
+extern pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot);
extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd);
+extern void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud);
+
static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd)
{
}
+static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pud)
+{
+}
+
extern int hash__has_transparent_hugepage(void);
static inline int has_transparent_hugepage(void)
{
@@ -1133,6 +1163,14 @@ static inline int has_transparent_hugepage(void)
}
#define has_transparent_hugepage has_transparent_hugepage
+static inline int has_transparent_pud_hugepage(void)
+{
+ if (radix_enabled())
+ return radix__has_transparent_pud_hugepage();
+ return 0;
+}
+#define has_transparent_pud_hugepage has_transparent_pud_hugepage
+
static inline unsigned long
pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
unsigned long clr, unsigned long set)
@@ -1142,6 +1180,16 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
}
+static inline unsigned long
+pud_hugepage_update(struct mm_struct *mm, unsigned long addr, pud_t *pudp,
+ unsigned long clr, unsigned long set)
+{
+ if (radix_enabled())
+ return radix__pud_hugepage_update(mm, addr, pudp, clr, set);
+ BUG();
+ return pud_val(*pudp);
+}
+
/*
* returns true for pmd migration entries, THP, devmap, hugetlb
* But compile time dependent on THP config
@@ -1151,6 +1199,11 @@ static inline int pmd_large(pmd_t pmd)
return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
}
+static inline int pud_large(pud_t pud)
+{
+ return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
+}
+
/*
* For radix we should always find H_PAGE_HASHPTE zero. Hence
* the below will work for radix too
@@ -1166,6 +1219,17 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
return ((old & _PAGE_ACCESSED) != 0);
}
+static inline int __pudp_test_and_clear_young(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ unsigned long old;
+
+ if ((pud_raw(*pudp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+ return 0;
+ old = pud_hugepage_update(mm, addr, pudp, _PAGE_ACCESSED, 0);
+ return ((old & _PAGE_ACCESSED) != 0);
+}
+
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
@@ -1174,6 +1238,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
}
+#define __HAVE_ARCH_PUDP_SET_WRPROTECT
+static inline void pudp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp)
+{
+ if (pud_write(*pudp))
+ pud_hugepage_update(mm, addr, pudp, _PAGE_WRITE, 0);
+}
+
/*
* Only returns true for a THP. False for pmd migration entry.
* We also need to return true when we come across a pte that
@@ -1195,6 +1267,17 @@ static inline int pmd_trans_huge(pmd_t pmd)
return hash__pmd_trans_huge(pmd);
}
+static inline int pud_trans_huge(pud_t pud)
+{
+ if (!pud_present(pud))
+ return false;
+
+ if (radix_enabled())
+ return radix__pud_trans_huge(pud);
+ return 0;
+}
+
+
#define __HAVE_ARCH_PMD_SAME
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
@@ -1203,6 +1286,15 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
return hash__pmd_same(pmd_a, pmd_b);
}
+#define pud_same pud_same
+static inline int pud_same(pud_t pud_a, pud_t pud_b)
+{
+ if (radix_enabled())
+ return radix__pud_same(pud_a, pud_b);
+ return hash__pud_same(pud_a, pud_b);
+}
+
+
static inline pmd_t __pmd_mkhuge(pmd_t pmd)
{
if (radix_enabled())
@@ -1210,6 +1302,14 @@ static inline pmd_t __pmd_mkhuge(pmd_t pmd)
return hash__pmd_mkhuge(pmd);
}
+static inline pud_t __pud_mkhuge(pud_t pud)
+{
+ if (radix_enabled())
+ return radix__pud_mkhuge(pud);
+ BUG();
+ return pud;
+}
+
/*
* pfn_pmd return a pmd_t that can be used as pmd pte entry.
*/
@@ -1225,14 +1325,34 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
return pmd;
}
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+#ifdef CONFIG_DEBUG_VM
+ if (radix_enabled())
+ WARN_ON((pud_raw(pud) & cpu_to_be64(_PAGE_PTE)) == 0);
+ else
+ WARN_ON(1);
+#endif
+ return pud;
+}
+
+
#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
pmd_t entry, int dirty);
+#define __HAVE_ARCH_PUDP_SET_ACCESS_FLAGS
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty);
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
+#define __HAVE_ARCH_PUDP_TEST_AND_CLEAR_YOUNG
+extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp);
+
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
@@ -1243,6 +1363,16 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
}
+#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ if (radix_enabled())
+ return radix__pudp_huge_get_and_clear(mm, addr, pudp);
+ BUG();
+ return *pudp;
+}
+
static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
@@ -1257,6 +1387,11 @@ pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmdp, int full);
+#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
+pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
+ unsigned long addr,
+ pud_t *pudp, int full);
+
#define __HAVE_ARCH_PGTABLE_DEPOSIT
static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
pmd_t *pmdp, pgtable_t pgtable)
@@ -1305,6 +1440,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
return hash__pmd_mkdevmap(pmd);
}
+static inline pud_t pud_mkdevmap(pud_t pud)
+{
+ if (radix_enabled())
+ return radix__pud_mkdevmap(pud);
+ BUG();
+ return pud;
+}
+
static inline int pmd_devmap(pmd_t pmd)
{
return pte_devmap(pmd_pte(pmd));
@@ -1312,7 +1455,7 @@ static inline int pmd_devmap(pmd_t pmd)
static inline int pud_devmap(pud_t pud)
{
- return 0;
+ return pte_devmap(pud_pte(pud));
}
static inline int pgd_devmap(pgd_t pgd)
@@ -1321,16 +1464,6 @@ static inline int pgd_devmap(pgd_t pgd)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static inline int pud_pfn(pud_t pud)
-{
- /*
- * Currently all calls to pud_pfn() are gated around a pud_devmap()
- * check so this should never be used. If it grows another user we
- * want to know about it.
- */
- BUILD_BUG();
- return 0;
-}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 686001eda936..357e23a403d3 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -250,6 +250,10 @@ static inline int radix__pud_bad(pud_t pud)
return !!(pud_val(pud) & RADIX_PUD_BAD_BITS);
}
+static inline int radix__pud_same(pud_t pud_a, pud_t pud_b)
+{
+ return ((pud_raw(pud_a) ^ pud_raw(pud_b)) == 0);
+}
static inline int radix__p4d_bad(p4d_t p4d)
{
@@ -268,9 +272,22 @@ static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
return __pmd(pmd_val(pmd) | _PAGE_PTE);
}
+static inline int radix__pud_trans_huge(pud_t pud)
+{
+ return (pud_val(pud) & (_PAGE_PTE | _PAGE_DEVMAP)) == _PAGE_PTE;
+}
+
+static inline pud_t radix__pud_mkhuge(pud_t pud)
+{
+ return __pud(pud_val(pud) | _PAGE_PTE);
+}
+
extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, unsigned long clr,
unsigned long set);
+extern unsigned long radix__pud_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, unsigned long clr,
+ unsigned long set);
extern pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
@@ -278,6 +295,9 @@ extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp);
+pud_t radix__pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp);
+
static inline int radix__has_transparent_hugepage(void)
{
/* For radix 2M at PMD level means thp */
@@ -285,6 +305,14 @@ static inline int radix__has_transparent_hugepage(void)
return 1;
return 0;
}
+
+static inline int radix__has_transparent_pud_hugepage(void)
+{
+ /* For radix 1G at PUD level means pud hugepage support */
+ if (mmu_psize_defs[MMU_PAGE_1G].shift == PUD_SHIFT)
+ return 1;
+ return 0;
+}
#endif
static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd)
@@ -292,9 +320,20 @@ static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd)
return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP));
}
+static inline pud_t radix__pud_mkdevmap(pud_t pud)
+{
+ return __pud(pud_val(pud) | (_PAGE_PTE | _PAGE_DEVMAP));
+}
+
+struct vmem_altmap;
+struct dev_pagemap;
extern int __meminit radix__vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys);
+int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap);
+void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap);
extern void radix__vmemmap_remove_mapping(unsigned long start,
unsigned long page_size);
@@ -325,5 +364,15 @@ int radix__remove_section_mapping(unsigned long start, unsigned long end);
void radix__kernel_map_pages(struct page *page, int numpages, int enable);
+#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
+#define vmemmap_can_optimize vmemmap_can_optimize
+bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap);
+#endif
+
+#define vmemmap_populate_compound_pages vmemmap_populate_compound_pages
+int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
+ unsigned long start,
+ unsigned long end, int node,
+ struct dev_pagemap *pgmap);
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 77797a2a82eb..a38542259fab 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -68,6 +68,8 @@ void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long end, int psize);
extern void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
+extern void radix__flush_pud_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 0d0c1447ecf0..1950c1b825b4 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -5,6 +5,7 @@
#define MMU_NO_CONTEXT ~0UL
#include <linux/mm_types.h>
+#include <linux/mmu_notifier.h>
#include <asm/book3s/64/tlbflush-hash.h>
#include <asm/book3s/64/tlbflush-radix.h>
@@ -50,6 +51,14 @@ static inline void flush_pmd_tlb_range(struct vm_area_struct *vma,
radix__flush_pmd_tlb_range(vma, start, end);
}
+#define __HAVE_ARCH_FLUSH_PUD_TLB_RANGE
+static inline void flush_pud_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ if (radix_enabled())
+ radix__flush_pud_tlb_range(vma, start, end);
+}
+
#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
unsigned long start,
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index f1e657c9bbe8..0732b743e099 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -3,11 +3,6 @@
#define _ASM_POWERPC_IO_H
#ifdef __KERNEL__
-#define ARCH_HAS_IOREMAP_WC
-#ifdef CONFIG_PPC32
-#define ARCH_HAS_IOREMAP_WT
-#endif
-
/*
*/
@@ -732,9 +727,7 @@ static inline void name at \
#define writel_relaxed(v, addr) writel(v, addr)
#define writeq_relaxed(v, addr) writeq(v, addr)
-#ifdef CONFIG_GENERIC_IOMAP
-#include <asm-generic/iomap.h>
-#else
+#ifndef CONFIG_GENERIC_IOMAP
/*
* Here comes the implementation of the IOMAP interfaces.
*/
@@ -896,8 +889,8 @@ static inline void iosync(void)
*
*/
extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
-extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
- unsigned long flags);
+#define ioremap ioremap
+#define ioremap_prot ioremap_prot
extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
#define ioremap_wc ioremap_wc
@@ -911,14 +904,12 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
#define ioremap_cache(addr, size) \
ioremap_prot((addr), (size), pgprot_val(PAGE_KERNEL))
-extern void iounmap(volatile void __iomem *addr);
+#define iounmap iounmap
void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size);
int early_ioremap_range(unsigned long ea, phys_addr_t pa,
unsigned long size, pgprot_t prot);
-void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
- pgprot_t prot, void *caller);
extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
pgprot_t prot, void *caller);
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index 3360cad78ace..3a971e2a8c73 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -45,6 +45,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
pte_fragment_free((unsigned long *)ptepage, 0);
}
+/* arch use pte_free_defer() implementation in arch/powerpc/mm/pgtable-frag.c */
+#define pte_free_defer pte_free_defer
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+
/*
* Functions that deal with pagetables that could be at any level of
* the table need to be passed an "index_size" so they know how to
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 6a88bfdaa69b..33464e6d6431 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -158,13 +158,30 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
}
#ifdef CONFIG_PPC64
-#define is_ioremap_addr is_ioremap_addr
-static inline bool is_ioremap_addr(const void *x)
+int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size);
+bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+ unsigned long page_size);
+/*
+ * mm/memory_hotplug.c:mhp_supports_memmap_on_memory goes into details
+ * some of the restrictions. We don't check for PMD_SIZE because our
+ * vmemmap allocation code can fallback correctly. The pageblock
+ * alignment requirement is met using altmap->reserve blocks.
+ */
+#define arch_supports_memmap_on_memory arch_supports_memmap_on_memory
+static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
{
- unsigned long addr = (unsigned long)x;
-
- return addr >= IOREMAP_BASE && addr < IOREMAP_END;
+ if (!radix_enabled())
+ return false;
+ /*
+ * With 4K page size and 2M PMD_SIZE, we can align
+ * things better with memory block size value
+ * starting from 128MB. Hence align things with PMD_SIZE.
+ */
+ if (IS_ENABLED(CONFIG_PPC_4K_PAGES))
+ return IS_ALIGNED(vmemmap_size, PMD_SIZE);
+ return true;
}
+
#endif /* CONFIG_PPC64 */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 709ebd578394..e2d6f9327f77 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -410,6 +410,7 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm,
ret = H_STATE;
break;
}
+ vma_start_write(vma);
/* Copy vm_flags to avoid partial modifications in ksm_madvise */
vm_flags = vma->vm_flags;
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 51f48984abca..988948d69bc1 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -214,7 +214,7 @@ unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr
old = be64_to_cpu(old_be);
- trace_hugepage_update(addr, old, clr, set);
+ trace_hugepage_update_pmd(addr, old, clr, set);
if (old & H_PAGE_HASHPTE)
hpte_do_hugepage_flush(mm, addr, pmdp, old);
return old;
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index c766e4c26e42..1715b07c630c 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -246,15 +246,15 @@ static void destroy_contexts(mm_context_t *ctx)
static void pmd_frag_destroy(void *pmd_frag)
{
int count;
- struct page *page;
+ struct ptdesc *ptdesc;
- page = virt_to_page(pmd_frag);
+ ptdesc = virt_to_ptdesc(pmd_frag);
/* drop all the pending references */
count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
- pgtable_pmd_page_dtor(page);
- __free_page(page);
+ if (atomic_sub_and_test(PMD_FRAG_NR - count, &ptdesc->pt_frag_refcount)) {
+ pagetable_pmd_dtor(ptdesc);
+ pagetable_free(ptdesc);
}
}
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 85c84e89e3ea..1498ccd08367 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -64,11 +64,39 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
return changed;
}
+int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp, pud_t entry, int dirty)
+{
+ int changed;
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(!pud_devmap(*pudp));
+ assert_spin_locked(pud_lockptr(vma->vm_mm, pudp));
+#endif
+ changed = !pud_same(*(pudp), entry);
+ if (changed) {
+ /*
+ * We can use MMU_PAGE_1G here, because only radix
+ * path look at the psize.
+ */
+ __ptep_set_access_flags(vma, pudp_ptep(pudp),
+ pud_pte(entry), address, MMU_PAGE_1G);
+ }
+ return changed;
+}
+
+
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
}
+
+int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp)
+{
+ return __pudp_test_and_clear_young(vma->vm_mm, address, pudp);
+}
+
/*
* set a new huge pmd. We should not be called for updating
* an existing pmd entry. That should go via pmd_hugepage_update.
@@ -90,6 +118,23 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
}
+void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+#ifdef CONFIG_DEBUG_VM
+ /*
+ * Make sure hardware valid bit is not set. We don't do
+ * tlb flush for this update.
+ */
+
+ WARN_ON(pte_hw_valid(pud_pte(*pudp)));
+ assert_spin_locked(pud_lockptr(mm, pudp));
+ WARN_ON(!(pud_large(pud)));
+#endif
+ trace_hugepage_set_pud(addr, pud_val(pud));
+ return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud));
+}
+
static void do_serialize(void *arg)
{
/* We've taken the IPI, so try to trim the mask while here */
@@ -147,11 +192,35 @@ pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
return pmd;
}
+pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pudp, int full)
+{
+ pud_t pud;
+
+ VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
+ VM_BUG_ON((pud_present(*pudp) && !pud_devmap(*pudp)) ||
+ !pud_present(*pudp));
+ pud = pudp_huge_get_and_clear(vma->vm_mm, addr, pudp);
+ /*
+ * if it not a fullmm flush, then we can possibly end up converting
+ * this PMD pte entry to a regular level 0 PTE by a parallel page fault.
+ * Make sure we flush the tlb in this case.
+ */
+ if (!full)
+ flush_pud_tlb_range(vma, addr, addr + HPAGE_PUD_SIZE);
+ return pud;
+}
+
static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
{
return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
}
+static pud_t pud_set_protbits(pud_t pud, pgprot_t pgprot)
+{
+ return __pud(pud_val(pud) | pgprot_val(pgprot));
+}
+
/*
* At some point we should be able to get rid of
* pmd_mkhuge() and mk_huge_pmd() when we update all the
@@ -166,6 +235,15 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
return __pmd_mkhuge(pmd_set_protbits(__pmd(pmdv), pgprot));
}
+pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot)
+{
+ unsigned long pudv;
+
+ pudv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
+
+ return __pud_mkhuge(pud_set_protbits(__pud(pudv), pgprot));
+}
+
pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
{
return pfn_pmd(page_to_pfn(page), pgprot);
@@ -306,22 +384,22 @@ static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
{
void *ret = NULL;
- struct page *page;
+ struct ptdesc *ptdesc;
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
if (mm == &init_mm)
gfp &= ~__GFP_ACCOUNT;
- page = alloc_page(gfp);
- if (!page)
+ ptdesc = pagetable_alloc(gfp, 0);
+ if (!ptdesc)
return NULL;
- if (!pgtable_pmd_page_ctor(page)) {
- __free_pages(page, 0);
+ if (!pagetable_pmd_ctor(ptdesc)) {
+ pagetable_free(ptdesc);
return NULL;
}
- atomic_set(&page->pt_frag_refcount, 1);
+ atomic_set(&ptdesc->pt_frag_refcount, 1);
- ret = page_address(page);
+ ret = ptdesc_address(ptdesc);
/*
* if we support only one fragment just return the
* allocated page.
@@ -331,12 +409,12 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
spin_lock(&mm->page_table_lock);
/*
- * If we find pgtable_page set, we return
+ * If we find ptdesc_page set, we return
* the allocated page with single fragment
* count.
*/
if (likely(!mm->context.pmd_frag)) {
- atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR);
+ atomic_set(&ptdesc->pt_frag_refcount, PMD_FRAG_NR);
mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
@@ -357,15 +435,15 @@ pmd_t *pmd_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr)
void pmd_fragment_free(unsigned long *pmd)
{
- struct page *page = virt_to_page(pmd);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
- if (PageReserved(page))
- return free_reserved_page(page);
+ if (pagetable_is_reserved(ptdesc))
+ return free_reserved_ptdesc(ptdesc);
- BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
- if (atomic_dec_and_test(&page->pt_frag_refcount)) {
- pgtable_pmd_page_dtor(page);
- __free_page(page);
+ BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) {
+ pagetable_pmd_dtor(ptdesc);
+ pagetable_free(ptdesc);
}
}
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index 5e3195568525..17075c78d4bc 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -39,6 +39,7 @@ void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long st
radix__flush_tlb_pwc_range_psize(vma->vm_mm, start, end, psize);
else
radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
+ mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
}
void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index e7ea492ac510..96679018e7fb 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -601,17 +601,6 @@ void __init radix__early_init_mmu(void)
#else
mmu_virtual_psize = MMU_PAGE_4K;
#endif
-
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- /* vmemmap mapping */
- if (mmu_psize_defs[MMU_PAGE_2M].shift) {
- /*
- * map vmemmap using 2M if available
- */
- mmu_vmemmap_psize = MMU_PAGE_2M;
- } else
- mmu_vmemmap_psize = mmu_virtual_psize;
-#endif
#endif
/*
* initialize page table size
@@ -744,8 +733,58 @@ static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
p4d_clear(p4d);
}
-static void remove_pte_table(pte_t *pte_start, unsigned long addr,
- unsigned long end, bool direct)
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end)
+{
+ unsigned long start = ALIGN_DOWN(addr, PMD_SIZE);
+
+ return !vmemmap_populated(start, PMD_SIZE);
+}
+
+static bool __meminit vmemmap_page_is_unused(unsigned long addr, unsigned long end)
+{
+ unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+
+ return !vmemmap_populated(start, PAGE_SIZE);
+
+}
+#endif
+
+static void __meminit free_vmemmap_pages(struct page *page,
+ struct vmem_altmap *altmap,
+ int order)
+{
+ unsigned int nr_pages = 1 << order;
+
+ if (altmap) {
+ unsigned long alt_start, alt_end;
+ unsigned long base_pfn = page_to_pfn(page);
+
+ /*
+ * with 2M vmemmap mmaping we can have things setup
+ * such that even though atlmap is specified we never
+ * used altmap.
+ */
+ alt_start = altmap->base_pfn;
+ alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
+
+ if (base_pfn >= alt_start && base_pfn < alt_end) {
+ vmem_altmap_free(altmap, nr_pages);
+ return;
+ }
+ }
+
+ if (PageReserved(page)) {
+ /* allocated from memblock */
+ while (nr_pages--)
+ free_reserved_page(page++);
+ } else
+ free_pages((unsigned long)page_address(page), order);
+}
+
+static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr,
+ unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
{
unsigned long next, pages = 0;
pte_t *pte;
@@ -759,24 +798,26 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
if (!pte_present(*pte))
continue;
- if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
- /*
- * The vmemmap_free() and remove_section_mapping()
- * codepaths call us with aligned addresses.
- */
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
+ if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
+ if (!direct)
+ free_vmemmap_pages(pte_page(*pte), altmap, 0);
+ pte_clear(&init_mm, addr, pte);
+ pages++;
}
-
- pte_clear(&init_mm, addr, pte);
- pages++;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ else if (!direct && vmemmap_page_is_unused(addr, next)) {
+ free_vmemmap_pages(pte_page(*pte), altmap, 0);
+ pte_clear(&init_mm, addr, pte);
+ }
+#endif
}
if (direct)
update_page_count(mmu_virtual_psize, -pages);
}
static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
- unsigned long end, bool direct)
+ unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
{
unsigned long next, pages = 0;
pte_t *pte_base;
@@ -790,18 +831,24 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
continue;
if (pmd_is_leaf(*pmd)) {
- if (!IS_ALIGNED(addr, PMD_SIZE) ||
- !IS_ALIGNED(next, PMD_SIZE)) {
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
+ IS_ALIGNED(next, PMD_SIZE)) {
+ if (!direct)
+ free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
+ pte_clear(&init_mm, addr, (pte_t *)pmd);
+ pages++;
}
- pte_clear(&init_mm, addr, (pte_t *)pmd);
- pages++;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ else if (!direct && vmemmap_pmd_is_unused(addr, next)) {
+ free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
+ pte_clear(&init_mm, addr, (pte_t *)pmd);
+ }
+#endif
continue;
}
pte_base = (pte_t *)pmd_page_vaddr(*pmd);
- remove_pte_table(pte_base, addr, next, direct);
+ remove_pte_table(pte_base, addr, next, direct, altmap);
free_pte_table(pte_base, pmd);
}
if (direct)
@@ -809,7 +856,8 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
}
static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
- unsigned long end, bool direct)
+ unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
{
unsigned long next, pages = 0;
pmd_t *pmd_base;
@@ -834,15 +882,16 @@ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
}
pmd_base = pud_pgtable(*pud);
- remove_pmd_table(pmd_base, addr, next, direct);
+ remove_pmd_table(pmd_base, addr, next, direct, altmap);
free_pmd_table(pmd_base, pud);
}
if (direct)
update_page_count(MMU_PAGE_1G, -pages);
}
-static void __meminit remove_pagetable(unsigned long start, unsigned long end,
- bool direct)
+static void __meminit
+remove_pagetable(unsigned long start, unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
{
unsigned long addr, next;
pud_t *pud_base;
@@ -871,7 +920,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end,
}
pud_base = p4d_pgtable(*p4d);
- remove_pud_table(pud_base, addr, next, direct);
+ remove_pud_table(pud_base, addr, next, direct, altmap);
free_pud_table(pud_base, p4d);
}
@@ -894,7 +943,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
{
- remove_pagetable(start, end, true);
+ remove_pagetable(start, end, true, NULL);
return 0;
}
#endif /* CONFIG_MEMORY_HOTPLUG */
@@ -926,10 +975,429 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
return 0;
}
+
+bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap)
+{
+ if (radix_enabled())
+ return __vmemmap_can_optimize(altmap, pgmap);
+
+ return false;
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
+ unsigned long addr, unsigned long next)
+{
+ int large = pmd_large(*pmdp);
+
+ if (large)
+ vmemmap_verify(pmdp_ptep(pmdp), node, addr, next);
+
+ return large;
+}
+
+void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
+ unsigned long addr, unsigned long next)
+{
+ pte_t entry;
+ pte_t *ptep = pmdp_ptep(pmdp);
+
+ VM_BUG_ON(!IS_ALIGNED(addr, PMD_SIZE));
+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+ set_pte_at(&init_mm, addr, ptep, entry);
+ asm volatile("ptesync": : :"memory");
+
+ vmemmap_verify(ptep, node, addr, next);
+}
+
+static pte_t * __meminit radix__vmemmap_pte_populate(pmd_t *pmdp, unsigned long addr,
+ int node,
+ struct vmem_altmap *altmap,
+ struct page *reuse)
+{
+ pte_t *pte = pte_offset_kernel(pmdp, addr);
+
+ if (pte_none(*pte)) {
+ pte_t entry;
+ void *p;
+
+ if (!reuse) {
+ /*
+ * make sure we don't create altmap mappings
+ * covering things outside the device.
+ */
+ if (altmap && altmap_cross_boundary(altmap, addr, PAGE_SIZE))
+ altmap = NULL;
+
+ p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
+ if (!p && altmap)
+ p = vmemmap_alloc_block_buf(PAGE_SIZE, node, NULL);
+ if (!p)
+ return NULL;
+ pr_debug("PAGE_SIZE vmemmap mapping\n");
+ } else {
+ /*
+ * When a PTE/PMD entry is freed from the init_mm
+ * there's a free_pages() call to this page allocated
+ * above. Thus this get_page() is paired with the
+ * put_page_testzero() on the freeing path.
+ * This can only called by certain ZONE_DEVICE path,
+ * and through vmemmap_populate_compound_pages() when
+ * slab is available.
+ */
+ get_page(reuse);
+ p = page_to_virt(reuse);
+ pr_debug("Tail page reuse vmemmap mapping\n");
+ }
+
+ VM_BUG_ON(!PAGE_ALIGNED(addr));
+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+ set_pte_at(&init_mm, addr, pte, entry);
+ asm volatile("ptesync": : :"memory");
+ }
+ return pte;
+}
+
+static inline pud_t *vmemmap_pud_alloc(p4d_t *p4dp, int node,
+ unsigned long address)
+{
+ pud_t *pud;
+
+ /* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+ if (unlikely(p4d_none(*p4dp))) {
+ if (unlikely(!slab_is_available())) {
+ pud = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+ p4d_populate(&init_mm, p4dp, pud);
+ /* go to the pud_offset */
+ } else
+ return pud_alloc(&init_mm, p4dp, address);
+ }
+ return pud_offset(p4dp, address);
+}
+
+static inline pmd_t *vmemmap_pmd_alloc(pud_t *pudp, int node,
+ unsigned long address)
+{
+ pmd_t *pmd;
+
+ /* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+ if (unlikely(pud_none(*pudp))) {
+ if (unlikely(!slab_is_available())) {
+ pmd = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+ pud_populate(&init_mm, pudp, pmd);
+ } else
+ return pmd_alloc(&init_mm, pudp, address);
+ }
+ return pmd_offset(pudp, address);
+}
+
+static inline pte_t *vmemmap_pte_alloc(pmd_t *pmdp, int node,
+ unsigned long address)
+{
+ pte_t *pte;
+
+ /* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+ if (unlikely(pmd_none(*pmdp))) {
+ if (unlikely(!slab_is_available())) {
+ pte = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+ pmd_populate(&init_mm, pmdp, pte);
+ } else
+ return pte_alloc_kernel(pmdp, address);
+ }
+ return pte_offset_kernel(pmdp, address);
+}
+
+
+
+int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+ unsigned long addr;
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ for (addr = start; addr < end; addr = next) {
+ next = pmd_addr_end(addr, end);
+
+ pgd = pgd_offset_k(addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = vmemmap_pud_alloc(p4d, node, addr);
+ if (!pud)
+ return -ENOMEM;
+ pmd = vmemmap_pmd_alloc(pud, node, addr);
+ if (!pmd)
+ return -ENOMEM;
+
+ if (pmd_none(READ_ONCE(*pmd))) {
+ void *p;
+
+ /*
+ * keep it simple by checking addr PMD_SIZE alignment
+ * and verifying the device boundary condition.
+ * For us to use a pmd mapping, both addr and pfn should
+ * be aligned. We skip if addr is not aligned and for
+ * pfn we hope we have extra area in the altmap that
+ * can help to find an aligned block. This can result
+ * in altmap block allocation failures, in which case
+ * we fallback to RAM for vmemmap allocation.
+ */
+ if (altmap && (!IS_ALIGNED(addr, PMD_SIZE) ||
+ altmap_cross_boundary(altmap, addr, PMD_SIZE))) {
+ /*
+ * make sure we don't create altmap mappings
+ * covering things outside the device.
+ */
+ goto base_mapping;
+ }
+
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+ if (p) {
+ vmemmap_set_pmd(pmd, p, node, addr, next);
+ pr_debug("PMD_SIZE vmemmap mapping\n");
+ continue;
+ } else if (altmap) {
+ /*
+ * A vmemmap block allocation can fail due to
+ * alignment requirements and we trying to align
+ * things aggressively there by running out of
+ * space. Try base mapping on failure.
+ */
+ goto base_mapping;
+ }
+ } else if (vmemmap_check_pmd(pmd, node, addr, next)) {
+ /*
+ * If a huge mapping exist due to early call to
+ * vmemmap_populate, let's try to use that.
+ */
+ continue;
+ }
+base_mapping:
+ /*
+ * Not able allocate higher order memory to back memmap
+ * or we found a pointer to pte page. Allocate base page
+ * size vmemmap
+ */
+ pte = vmemmap_pte_alloc(pmd, node, addr);
+ if (!pte)
+ return -ENOMEM;
+
+ pte = radix__vmemmap_pte_populate(pmd, addr, node, altmap, NULL);
+ if (!pte)
+ return -ENOMEM;
+
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+ next = addr + PAGE_SIZE;
+ }
+ return 0;
+}
+
+static pte_t * __meminit radix__vmemmap_populate_address(unsigned long addr, int node,
+ struct vmem_altmap *altmap,
+ struct page *reuse)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset_k(addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = vmemmap_pud_alloc(p4d, node, addr);
+ if (!pud)
+ return NULL;
+ pmd = vmemmap_pmd_alloc(pud, node, addr);
+ if (!pmd)
+ return NULL;
+ if (pmd_leaf(*pmd))
+ /*
+ * The second page is mapped as a hugepage due to a nearby request.
+ * Force our mapping to page size without deduplication
+ */
+ return NULL;
+ pte = vmemmap_pte_alloc(pmd, node, addr);
+ if (!pte)
+ return NULL;
+ radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+ return pte;
+}
+
+static pte_t * __meminit vmemmap_compound_tail_page(unsigned long addr,
+ unsigned long pfn_offset, int node)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long map_addr;
+
+ /* the second vmemmap page which we use for duplication */
+ map_addr = addr - pfn_offset * sizeof(struct page) + PAGE_SIZE;
+ pgd = pgd_offset_k(map_addr);
+ p4d = p4d_offset(pgd, map_addr);
+ pud = vmemmap_pud_alloc(p4d, node, map_addr);
+ if (!pud)
+ return NULL;
+ pmd = vmemmap_pmd_alloc(pud, node, map_addr);
+ if (!pmd)
+ return NULL;
+ if (pmd_leaf(*pmd))
+ /*
+ * The second page is mapped as a hugepage due to a nearby request.
+ * Force our mapping to page size without deduplication
+ */
+ return NULL;
+ pte = vmemmap_pte_alloc(pmd, node, map_addr);
+ if (!pte)
+ return NULL;
+ /*
+ * Check if there exist a mapping to the left
+ */
+ if (pte_none(*pte)) {
+ /*
+ * Populate the head page vmemmap page.
+ * It can fall in different pmd, hence
+ * vmemmap_populate_address()
+ */
+ pte = radix__vmemmap_populate_address(map_addr - PAGE_SIZE, node, NULL, NULL);
+ if (!pte)
+ return NULL;
+ /*
+ * Populate the tail pages vmemmap page
+ */
+ pte = radix__vmemmap_pte_populate(pmd, map_addr, node, NULL, NULL);
+ if (!pte)
+ return NULL;
+ vmemmap_verify(pte, node, map_addr, map_addr + PAGE_SIZE);
+ return pte;
+ }
+ return pte;
+}
+
+int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
+ unsigned long start,
+ unsigned long end, int node,
+ struct dev_pagemap *pgmap)
+{
+ /*
+ * we want to map things as base page size mapping so that
+ * we can save space in vmemmap. We could have huge mapping
+ * covering out both edges.
+ */
+ unsigned long addr;
+ unsigned long addr_pfn = start_pfn;
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ for (addr = start; addr < end; addr = next) {
+
+ pgd = pgd_offset_k(addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = vmemmap_pud_alloc(p4d, node, addr);
+ if (!pud)
+ return -ENOMEM;
+ pmd = vmemmap_pmd_alloc(pud, node, addr);
+ if (!pmd)
+ return -ENOMEM;
+
+ if (pmd_leaf(READ_ONCE(*pmd))) {
+ /* existing huge mapping. Skip the range */
+ addr_pfn += (PMD_SIZE >> PAGE_SHIFT);
+ next = pmd_addr_end(addr, end);
+ continue;
+ }
+ pte = vmemmap_pte_alloc(pmd, node, addr);
+ if (!pte)
+ return -ENOMEM;
+ if (!pte_none(*pte)) {
+ /*
+ * This could be because we already have a compound
+ * page whose VMEMMAP_RESERVE_NR pages were mapped and
+ * this request fall in those pages.
+ */
+ addr_pfn += 1;
+ next = addr + PAGE_SIZE;
+ continue;
+ } else {
+ unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
+ unsigned long pfn_offset = addr_pfn - ALIGN_DOWN(addr_pfn, nr_pages);
+ pte_t *tail_page_pte;
+
+ /*
+ * if the address is aligned to huge page size it is the
+ * head mapping.
+ */
+ if (pfn_offset == 0) {
+ /* Populate the head page vmemmap page */
+ pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+ if (!pte)
+ return -ENOMEM;
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+ /*
+ * Populate the tail pages vmemmap page
+ * It can fall in different pmd, hence
+ * vmemmap_populate_address()
+ */
+ pte = radix__vmemmap_populate_address(addr + PAGE_SIZE, node, NULL, NULL);
+ if (!pte)
+ return -ENOMEM;
+
+ addr_pfn += 2;
+ next = addr + 2 * PAGE_SIZE;
+ continue;
+ }
+ /*
+ * get the 2nd mapping details
+ * Also create it if that doesn't exist
+ */
+ tail_page_pte = vmemmap_compound_tail_page(addr, pfn_offset, node);
+ if (!tail_page_pte) {
+
+ pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+ if (!pte)
+ return -ENOMEM;
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+ addr_pfn += 1;
+ next = addr + PAGE_SIZE;
+ continue;
+ }
+
+ pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, pte_page(*tail_page_pte));
+ if (!pte)
+ return -ENOMEM;
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+ addr_pfn += 1;
+ next = addr + PAGE_SIZE;
+ continue;
+ }
+ }
+ return 0;
+}
+
+
#ifdef CONFIG_MEMORY_HOTPLUG
void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
{
- remove_pagetable(start, start + page_size, false);
+ remove_pagetable(start, start + page_size, true, NULL);
+}
+
+void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
+{
+ remove_pagetable(start, end, false, altmap);
}
#endif
#endif
@@ -962,7 +1430,24 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long add
#endif
old = radix__pte_update(mm, addr, pmdp_ptep(pmdp), clr, set, 1);
- trace_hugepage_update(addr, old, clr, set);
+ trace_hugepage_update_pmd(addr, old, clr, set);
+
+ return old;
+}
+
+unsigned long radix__pud_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, unsigned long clr,
+ unsigned long set)
+{
+ unsigned long old;
+
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(!pud_devmap(*pudp));
+ assert_spin_locked(pud_lockptr(mm, pudp));
+#endif
+
+ old = radix__pte_update(mm, addr, pudp_ptep(pudp), clr, set, 1);
+ trace_hugepage_update_pud(addr, old, clr, set);
return old;
}
@@ -1043,6 +1528,17 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
return old_pmd;
}
+pud_t radix__pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ pud_t old_pud;
+ unsigned long old;
+
+ old = radix__pud_hugepage_update(mm, addr, pudp, ~0UL, 0);
+ old_pud = __pud(old);
+ return old_pud;
+}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 0bd4866d9824..3020a8b38572 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -987,6 +987,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
}
}
preempt_enable();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
EXPORT_SYMBOL(radix__flush_tlb_mm);
@@ -1020,6 +1021,7 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
}
preempt_enable();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
void radix__flush_all_mm(struct mm_struct *mm)
@@ -1228,6 +1230,7 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
}
out:
preempt_enable();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
@@ -1392,6 +1395,7 @@ static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
}
out:
preempt_enable();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
@@ -1461,6 +1465,13 @@ void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
}
EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
+void radix__flush_pud_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_1G);
+}
+EXPORT_SYMBOL(radix__flush_pud_tlb_range);
+
void radix__flush_tlb_all(void)
{
unsigned long rb,prs,r,rs;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 5bfdf6ecfa96..fafce6bdeff0 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -469,7 +469,6 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
if (is_exec)
flags |= FAULT_FLAG_INSTRUCTION;
-#ifdef CONFIG_PER_VMA_LOCK
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
@@ -501,7 +500,6 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
return user_mode(regs) ? 0 : SIGBUS;
lock_mmap:
-#endif /* CONFIG_PER_VMA_LOCK */
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
@@ -551,9 +549,7 @@ retry:
mmap_read_unlock(current->mm);
-#ifdef CONFIG_PER_VMA_LOCK
done:
-#endif
if (unlikely(fault & VM_FAULT_ERROR))
return mm_fault_error(regs, address, fault);
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index fe1b83020e0d..f7930360406e 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -92,7 +92,7 @@ static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_ad
* a page table lookup here because with the hash translation we don't keep
* vmemmap details in linux page table.
*/
-static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size)
+int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size)
{
struct page *start;
unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size;
@@ -183,8 +183,8 @@ static __meminit int vmemmap_list_populate(unsigned long phys,
return 0;
}
-static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
- unsigned long page_size)
+bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+ unsigned long page_size)
{
unsigned long nr_pfn = page_size / sizeof(struct page);
unsigned long start_pfn = page_to_pfn((struct page *)start);
@@ -198,8 +198,8 @@ static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long star
return false;
}
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
- struct vmem_altmap *altmap)
+static int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
bool altmap_alloc;
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
@@ -272,6 +272,18 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
return 0;
}
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (radix_enabled())
+ return radix__vmemmap_populate(start, end, node, altmap);
+#endif
+
+ return __vmemmap_populate(start, end, node, altmap);
+}
+
#ifdef CONFIG_MEMORY_HOTPLUG
static unsigned long vmemmap_list_free(unsigned long start)
{
@@ -303,8 +315,8 @@ static unsigned long vmemmap_list_free(unsigned long start)
return vmem_back->phys;
}
-void __ref vmemmap_free(unsigned long start, unsigned long end,
- struct vmem_altmap *altmap)
+static void __ref __vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
unsigned long page_order = get_order(page_size);
@@ -362,6 +374,17 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
vmemmap_remove_mapping(start, page_size);
}
}
+
+void __ref vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (radix_enabled())
+ return radix__vmemmap_free(start, end, altmap);
+#endif
+ return __vmemmap_free(start, end, altmap);
+}
+
#endif
void register_page_bootmem_memmap(unsigned long section_nr,
struct page *start_page, unsigned long size)
diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
index 4f12504fb405..705e8e8ffde4 100644
--- a/arch/powerpc/mm/ioremap.c
+++ b/arch/powerpc/mm/ioremap.c
@@ -41,7 +41,7 @@ void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
return __ioremap_caller(addr, size, prot, caller);
}
-void __iomem *ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
+void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long flags)
{
pte_t pte = __pte(flags);
void *caller = __builtin_return_address(0);
@@ -74,27 +74,3 @@ int early_ioremap_range(unsigned long ea, phys_addr_t pa,
return 0;
}
-
-void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
- pgprot_t prot, void *caller)
-{
- struct vm_struct *area;
- int ret;
- unsigned long va;
-
- area = __get_vm_area_caller(size, VM_IOREMAP, IOREMAP_START, IOREMAP_END, caller);
- if (area == NULL)
- return NULL;
-
- area->phys_addr = pa;
- va = (unsigned long)area->addr;
-
- ret = ioremap_page_range(va, va + size, pa, prot);
- if (!ret)
- return (void __iomem *)area->addr + offset;
-
- vunmap_range(va, va + size);
- free_vm_area(area);
-
- return NULL;
-}
diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c
index 9d13143b8be4..ca5bc6be3e6f 100644
--- a/arch/powerpc/mm/ioremap_32.c
+++ b/arch/powerpc/mm/ioremap_32.c
@@ -22,6 +22,13 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
int err;
/*
+ * If the address lies within the first 16 MB, assume it's in ISA
+ * memory space
+ */
+ if (addr < SZ_16M)
+ addr += _ISA_MEM_BASE;
+
+ /*
* Choose an address to map it to.
* Once the vmalloc system is running, we use it.
* Before then, we use space going down from IOREMAP_TOP
@@ -31,13 +38,6 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
offset = addr & ~PAGE_MASK;
size = PAGE_ALIGN(addr + size) - p;
- /*
- * If the address lies within the first 16 MB, assume it's in ISA
- * memory space
- */
- if (p < 16 * 1024 * 1024)
- p += _ISA_MEM_BASE;
-
#ifndef CONFIG_CRASH_DUMP
/*
* Don't allow anybody to remap normal RAM that we're using.
@@ -63,7 +63,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
return (void __iomem *)v + offset;
if (slab_is_available())
- return do_ioremap(p, offset, size, prot, caller);
+ return generic_ioremap_prot(addr, size, prot);
/*
* Should check if it is a candidate for a BAT mapping
@@ -87,7 +87,6 @@ void iounmap(volatile void __iomem *addr)
if (v_block_mapped((unsigned long)addr))
return;
- if (addr > high_memory && (unsigned long)addr < ioremap_bot)
- vunmap((void *)(PAGE_MASK & (unsigned long)addr));
+ generic_iounmap(addr);
}
EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
index 3acece00b33e..d24e5f166723 100644
--- a/arch/powerpc/mm/ioremap_64.c
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -29,7 +29,7 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
return NULL;
if (slab_is_available())
- return do_ioremap(paligned, offset, size, prot, caller);
+ return generic_ioremap_prot(addr, size, prot);
pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
@@ -49,17 +49,9 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
*/
void iounmap(volatile void __iomem *token)
{
- void *addr;
-
if (!slab_is_available())
return;
- addr = (void *)((unsigned long __force)PCI_FIX_ADDR(token) & PAGE_MASK);
-
- if ((unsigned long)addr < ioremap_bot) {
- pr_warn("Attempt to iounmap early bolted mapping at 0x%p\n", addr);
- return;
- }
- vunmap(addr);
+ generic_iounmap(PCI_FIX_ADDR(token));
}
EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
index 20652daa1d7e..8c31802f97e8 100644
--- a/arch/powerpc/mm/pgtable-frag.c
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -18,15 +18,15 @@
void pte_frag_destroy(void *pte_frag)
{
int count;
- struct page *page;
+ struct ptdesc *ptdesc;
- page = virt_to_page(pte_frag);
+ ptdesc = virt_to_ptdesc(pte_frag);
/* drop all the pending references */
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
- pgtable_pte_page_dtor(page);
- __free_page(page);
+ if (atomic_sub_and_test(PTE_FRAG_NR - count, &ptdesc->pt_frag_refcount)) {
+ pagetable_pte_dtor(ptdesc);
+ pagetable_free(ptdesc);
}
}
@@ -55,25 +55,25 @@ static pte_t *get_pte_from_cache(struct mm_struct *mm)
static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
{
void *ret = NULL;
- struct page *page;
+ struct ptdesc *ptdesc;
if (!kernel) {
- page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
- if (!page)
+ ptdesc = pagetable_alloc(PGALLOC_GFP | __GFP_ACCOUNT, 0);
+ if (!ptdesc)
return NULL;
- if (!pgtable_pte_page_ctor(page)) {
- __free_page(page);
+ if (!pagetable_pte_ctor(ptdesc)) {
+ pagetable_free(ptdesc);
return NULL;
}
} else {
- page = alloc_page(PGALLOC_GFP);
- if (!page)
+ ptdesc = pagetable_alloc(PGALLOC_GFP, 0);
+ if (!ptdesc)
return NULL;
}
- atomic_set(&page->pt_frag_refcount, 1);
+ atomic_set(&ptdesc->pt_frag_refcount, 1);
- ret = page_address(page);
+ ret = ptdesc_address(ptdesc);
/*
* if we support only one fragment just return the
* allocated page.
@@ -82,12 +82,12 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
return ret;
spin_lock(&mm->page_table_lock);
/*
- * If we find pgtable_page set, we return
+ * If we find ptdesc_page set, we return
* the allocated page with single fragment
* count.
*/
if (likely(!pte_frag_get(&mm->context))) {
- atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
+ atomic_set(&ptdesc->pt_frag_refcount, PTE_FRAG_NR);
pte_frag_set(&mm->context, ret + PTE_FRAG_SIZE);
}
spin_unlock(&mm->page_table_lock);
@@ -106,17 +106,40 @@ pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
return __alloc_for_ptecache(mm, kernel);
}
-void pte_fragment_free(unsigned long *table, int kernel)
+static void pte_free_now(struct rcu_head *head)
{
- struct page *page = virt_to_page(table);
+ struct ptdesc *ptdesc;
- if (PageReserved(page))
- return free_reserved_page(page);
+ ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
+ pagetable_pte_dtor(ptdesc);
+ pagetable_free(ptdesc);
+}
- BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
- if (atomic_dec_and_test(&page->pt_frag_refcount)) {
- if (!kernel)
- pgtable_pte_page_dtor(page);
- __free_page(page);
+void pte_fragment_free(unsigned long *table, int kernel)
+{
+ struct ptdesc *ptdesc = virt_to_ptdesc(table);
+
+ if (pagetable_is_reserved(ptdesc))
+ return free_reserved_ptdesc(ptdesc);
+
+ BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) {
+ if (kernel)
+ pagetable_free(ptdesc);
+ else if (folio_test_clear_active(ptdesc_folio(ptdesc)))
+ call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
+ else
+ pte_free_now(&ptdesc->pt_rcu_head);
}
}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
+{
+ struct page *page;
+
+ page = virt_to_page(pgtable);
+ SetPageActive(page);
+ pte_fragment_free((unsigned long *)pgtable, 0);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index cb2dcdb18f8e..a3dcdb2d5b4b 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -311,6 +311,8 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
+ pte_t *pte;
+ spinlock_t *ptl;
if (mm == &init_mm)
return;
@@ -329,8 +331,10 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
*/
if (pmd_none(*pmd))
return;
- BUG_ON(!pmd_present(*pmd));
- assert_spin_locked(pte_lockptr(mm, pmd));
+ pte = pte_offset_map_nolock(mm, pmd, addr, &ptl);
+ BUG_ON(!pte);
+ assert_spin_locked(ptl);
+ pte_unmap(pte);
}
#endif /* CONFIG_DEBUG_VM */
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 45fd975ef521..340b86ef7284 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -94,6 +94,7 @@ config PPC_BOOK3S_64
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
select ARCH_ENABLE_SPLIT_PMD_PTLOCK
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 9c62c2c3b3d0..4f3d6a2f9065 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -637,7 +637,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
nid = first_online_node;
/* Add the memory */
- rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_NONE);
+ rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_MEMMAP_ON_MEMORY);
if (rc) {
invalidate_lmb_associativity_index(lmb);
return rc;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index fae747cc57d2..ee17270d35d0 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1084,7 +1084,7 @@ cmds(struct pt_regs *excp)
memzcan();
break;
case 'i':
- show_mem(0, NULL);
+ show_mem();
break;
default:
termch = cmd;
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 4c07b9189c86..6943d34c1ec1 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -53,7 +53,7 @@ config RISCV
select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select ARCH_WANT_LD_ORPHAN_WARN if !XIP_KERNEL
- select ARCH_WANT_OPTIMIZE_VMEMMAP
+ select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
select BUILDTIME_TABLE_SORT if MMU
diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h
index ce1ebda1a49a..34e24f078cc1 100644
--- a/arch/riscv/include/asm/hugetlb.h
+++ b/arch/riscv/include/asm/hugetlb.h
@@ -2,6 +2,7 @@
#ifndef _ASM_RISCV_HUGETLB_H
#define _ASM_RISCV_HUGETLB_H
+#include <asm/cacheflush.h>
#include <asm/page.h>
static inline void arch_clear_hugepage_flags(struct page *page)
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 59dc12b5b7e8..d169a4f41a2e 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -153,10 +153,10 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
#endif /* __PAGETABLE_PMD_FOLDED */
-#define __pte_free_tlb(tlb, pte, buf) \
-do { \
- pgtable_pte_page_dtor(pte); \
- tlb_remove_page((tlb), pte); \
+#define __pte_free_tlb(tlb, pte, buf) \
+do { \
+ pagetable_pte_dtor(page_ptdesc(pte)); \
+ tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\
} while (0)
#endif /* CONFIG_MMU */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 75970ee2bda2..44377f0d7c35 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -499,7 +499,7 @@ static inline void __set_pte_at(struct mm_struct *mm,
static inline void set_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pteval)
{
- page_table_check_pte_set(mm, addr, ptep, pteval);
+ page_table_check_pte_set(mm, ptep, pteval);
__set_pte_at(mm, addr, ptep, pteval);
}
@@ -529,7 +529,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
{
pte_t pte = __pte(atomic_long_xchg((atomic_long_t *)ptep, 0));
- page_table_check_pte_clear(mm, address, pte);
+ page_table_check_pte_clear(mm, pte);
return pte;
}
@@ -687,14 +687,14 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
- page_table_check_pmd_set(mm, addr, pmdp, pmd);
+ page_table_check_pmd_set(mm, pmdp, pmd);
return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
- page_table_check_pud_set(mm, addr, pudp, pud);
+ page_table_check_pud_set(mm, pudp, pud);
return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
}
@@ -742,7 +742,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
{
pmd_t pmd = __pmd(atomic_long_xchg((atomic_long_t *)pmdp, 0));
- page_table_check_pmd_clear(mm, address, pmd);
+ page_table_check_pmd_clear(mm, pmd);
return pmd;
}
@@ -758,7 +758,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
- page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
+ page_table_check_pmd_set(vma->vm_mm, pmdp, pmd);
return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd)));
}
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 6ea2cce4cc17..046732fcb48c 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -283,7 +283,6 @@ void handle_page_fault(struct pt_regs *regs)
flags |= FAULT_FLAG_WRITE;
else if (cause == EXC_INST_PAGE_FAULT)
flags |= FAULT_FLAG_INSTRUCTION;
-#ifdef CONFIG_PER_VMA_LOCK
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
@@ -311,7 +310,6 @@ void handle_page_fault(struct pt_regs *regs)
return;
}
lock_mmap:
-#endif /* CONFIG_PER_VMA_LOCK */
retry:
vma = lock_mm_and_find_vma(mm, addr, regs);
@@ -368,9 +366,7 @@ retry:
mmap_read_unlock(mm);
-#ifdef CONFIG_PER_VMA_LOCK
done:
-#endif
if (unlikely(fault & VM_FAULT_ERROR)) {
tsk->thread.bad_cause = cause;
mm_fault_error(regs, addr, fault);
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 9ce504737d18..430a3d05a841 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -353,12 +353,10 @@ static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va)
static phys_addr_t __init alloc_pte_late(uintptr_t va)
{
- unsigned long vaddr;
-
- vaddr = __get_free_page(GFP_KERNEL);
- BUG_ON(!vaddr || !pgtable_pte_page_ctor(virt_to_page((void *)vaddr)));
+ struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0);
- return __pa(vaddr);
+ BUG_ON(!ptdesc || !pagetable_pte_ctor(ptdesc));
+ return __pa((pte_t *)ptdesc_address(ptdesc));
}
static void __init create_pte_mapping(pte_t *ptep,
@@ -436,12 +434,10 @@ static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
static phys_addr_t __init alloc_pmd_late(uintptr_t va)
{
- unsigned long vaddr;
-
- vaddr = __get_free_page(GFP_KERNEL);
- BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page((void *)vaddr)));
+ struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0);
- return __pa(vaddr);
+ BUG_ON(!ptdesc || !pagetable_pmd_ctor(ptdesc));
+ return __pa((pmd_t *)ptdesc_address(ptdesc));
}
static void __init create_pmd_mapping(pmd_t *pmdp,
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 5b39918b7042..8ff6d1c21e38 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -127,7 +127,7 @@ config S390
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_IPC_PARSE_VERSION
- select ARCH_WANT_OPTIMIZE_VMEMMAP
+ select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS2
select DMA_OPS if PCI
@@ -143,6 +143,7 @@ config S390
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select GENERIC_VDSO_TIME_NS
+ select GENERIC_IOREMAP if PCI
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index e3882b012bfa..4453ad7c11ac 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -22,11 +22,18 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define IO_SPACE_LIMIT 0
-void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot);
-void __iomem *ioremap(phys_addr_t addr, size_t size);
-void __iomem *ioremap_wc(phys_addr_t addr, size_t size);
-void __iomem *ioremap_wt(phys_addr_t addr, size_t size);
-void iounmap(volatile void __iomem *addr);
+/*
+ * I/O memory mapping functions.
+ */
+#define ioremap_prot ioremap_prot
+#define iounmap iounmap
+
+#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL)
+
+#define ioremap_wc(addr, size) \
+ ioremap_prot((addr), (size), pgprot_val(pgprot_writecombine(PAGE_KERNEL)))
+#define ioremap_wt(addr, size) \
+ ioremap_prot((addr), (size), pgprot_val(pgprot_writethrough(PAGE_KERNEL)))
static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
{
@@ -51,10 +58,6 @@ static inline void ioport_unmap(void __iomem *p)
#define pci_iomap_wc pci_iomap_wc
#define pci_iomap_wc_range pci_iomap_wc_range
-#define ioremap ioremap
-#define ioremap_wt ioremap_wt
-#define ioremap_wc ioremap_wc
-
#define memcpy_fromio(dst, src, count) zpci_memcpy_fromio(dst, src, count)
#define memcpy_toio(dst, src, count) zpci_memcpy_toio(dst, src, count)
#define memset_io(dst, val, count) zpci_memset_io(dst, val, count)
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 17eb618f1348..376b4b23bdaa 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -86,7 +86,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
if (!table)
return NULL;
crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
- if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+ if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) {
crst_table_free(mm, table);
return NULL;
}
@@ -97,7 +97,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
if (mm_pmd_folded(mm))
return;
- pgtable_pmd_page_dtor(virt_to_page(pmd));
+ pagetable_pmd_dtor(virt_to_ptdesc(pmd));
crst_table_free(mm, (unsigned long *) pmd);
}
@@ -143,6 +143,10 @@ static inline void pmd_populate(struct mm_struct *mm,
#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
+/* arch use pte_free_defer() implementation in arch/s390/mm/pgalloc.c */
+#define pte_free_defer pte_free_defer
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+
void vmem_map_init(void);
void *vmem_crst_alloc(unsigned long val);
pte_t *vmem_pte_alloc(void);
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b91f4a9b044c..383b1f91442c 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -89,12 +89,12 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
{
if (mm_pmd_folded(tlb->mm))
return;
- pgtable_pmd_page_dtor(virt_to_page(pmd));
+ pagetable_pmd_dtor(virt_to_ptdesc(pmd));
__tlb_adjust_range(tlb, address, PAGE_SIZE);
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_puds = 1;
- tlb_remove_table(tlb, pmd);
+ tlb_remove_ptdesc(tlb, pmd);
}
/*
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2f123429a291..6f6b9881e55e 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -407,7 +407,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
access = VM_WRITE;
if (access == VM_WRITE)
flags |= FAULT_FLAG_WRITE;
-#ifdef CONFIG_PER_VMA_LOCK
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
vma = lock_vma_under_rcu(mm, address);
@@ -432,7 +431,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
goto out;
}
lock_mmap:
-#endif /* CONFIG_PER_VMA_LOCK */
mmap_read_lock(mm);
gmap = NULL;
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 66ab68db9842..07fc660a24aa 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -43,17 +43,17 @@ __initcall(page_table_register_sysctl);
unsigned long *crst_table_alloc(struct mm_struct *mm)
{
- struct page *page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+ struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
- if (!page)
+ if (!ptdesc)
return NULL;
- arch_set_page_dat(page, CRST_ALLOC_ORDER);
- return (unsigned long *) page_to_virt(page);
+ arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER);
+ return (unsigned long *) ptdesc_to_virt(ptdesc);
}
void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
- free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+ pagetable_free(virt_to_ptdesc(table));
}
static void __crst_table_upgrade(void *arg)
@@ -140,21 +140,21 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
struct page *page_table_alloc_pgste(struct mm_struct *mm)
{
- struct page *page;
+ struct ptdesc *ptdesc;
u64 *table;
- page = alloc_page(GFP_KERNEL);
- if (page) {
- table = (u64 *)page_to_virt(page);
+ ptdesc = pagetable_alloc(GFP_KERNEL, 0);
+ if (ptdesc) {
+ table = (u64 *)ptdesc_to_virt(ptdesc);
memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
}
- return page;
+ return ptdesc_page(ptdesc);
}
void page_table_free_pgste(struct page *page)
{
- __free_page(page);
+ pagetable_free(page_ptdesc(page));
}
#endif /* CONFIG_PGSTE */
@@ -229,11 +229,20 @@ void page_table_free_pgste(struct page *page)
* logic described above. Both AA bits are set to 1 to denote a 4KB-pgtable
* while the PP bits are never used, nor such a page is added to or removed
* from mm_context_t::pgtable_list.
+ *
+ * pte_free_defer() overrides those rules: it takes the page off pgtable_list,
+ * and prevents both 2K fragments from being reused. pte_free_defer() has to
+ * guarantee that its pgtable cannot be reused before the RCU grace period
+ * has elapsed (which page_table_free_rcu() does not actually guarantee).
+ * But for simplicity, because page->rcu_head overlays page->lru, and because
+ * the RCU callback might not be called before the mm_context_t has been freed,
+ * pte_free_defer() in this implementation prevents both fragments from being
+ * reused, and delays making the call to RCU until both fragments are freed.
*/
unsigned long *page_table_alloc(struct mm_struct *mm)
{
unsigned long *table;
- struct page *page;
+ struct ptdesc *ptdesc;
unsigned int mask, bit;
/* Try to get a fragment of a 4K page as a 2K page table */
@@ -241,9 +250,9 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
table = NULL;
spin_lock_bh(&mm->context.lock);
if (!list_empty(&mm->context.pgtable_list)) {
- page = list_first_entry(&mm->context.pgtable_list,
- struct page, lru);
- mask = atomic_read(&page->_refcount) >> 24;
+ ptdesc = list_first_entry(&mm->context.pgtable_list,
+ struct ptdesc, pt_list);
+ mask = atomic_read(&ptdesc->_refcount) >> 24;
/*
* The pending removal bits must also be checked.
* Failure to do so might lead to an impossible
@@ -255,13 +264,13 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
*/
mask = (mask | (mask >> 4)) & 0x03U;
if (mask != 0x03U) {
- table = (unsigned long *) page_to_virt(page);
+ table = (unsigned long *) ptdesc_to_virt(ptdesc);
bit = mask & 1; /* =1 -> second 2K */
if (bit)
table += PTRS_PER_PTE;
- atomic_xor_bits(&page->_refcount,
+ atomic_xor_bits(&ptdesc->_refcount,
0x01U << (bit + 24));
- list_del(&page->lru);
+ list_del_init(&ptdesc->pt_list);
}
}
spin_unlock_bh(&mm->context.lock);
@@ -269,27 +278,28 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
return table;
}
/* Allocate a fresh page */
- page = alloc_page(GFP_KERNEL);
- if (!page)
+ ptdesc = pagetable_alloc(GFP_KERNEL, 0);
+ if (!ptdesc)
return NULL;
- if (!pgtable_pte_page_ctor(page)) {
- __free_page(page);
+ if (!pagetable_pte_ctor(ptdesc)) {
+ pagetable_free(ptdesc);
return NULL;
}
- arch_set_page_dat(page, 0);
+ arch_set_page_dat(ptdesc_page(ptdesc), 0);
/* Initialize page table */
- table = (unsigned long *) page_to_virt(page);
+ table = (unsigned long *) ptdesc_to_virt(ptdesc);
if (mm_alloc_pgste(mm)) {
/* Return 4K page table with PGSTEs */
- atomic_xor_bits(&page->_refcount, 0x03U << 24);
+ INIT_LIST_HEAD(&ptdesc->pt_list);
+ atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
} else {
/* Return the first 2K fragment of the page */
- atomic_xor_bits(&page->_refcount, 0x01U << 24);
+ atomic_xor_bits(&ptdesc->_refcount, 0x01U << 24);
memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
spin_lock_bh(&mm->context.lock);
- list_add(&page->lru, &mm->context.pgtable_list);
+ list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
spin_unlock_bh(&mm->context.lock);
}
return table;
@@ -300,7 +310,9 @@ static void page_table_release_check(struct page *page, void *table,
{
char msg[128];
- if (!IS_ENABLED(CONFIG_DEBUG_VM) || !mask)
+ if (!IS_ENABLED(CONFIG_DEBUG_VM))
+ return;
+ if (!mask && list_empty(&page->lru))
return;
snprintf(msg, sizeof(msg),
"Invalid pgtable %p release half 0x%02x mask 0x%02x",
@@ -308,12 +320,20 @@ static void page_table_release_check(struct page *page, void *table,
dump_page(page, msg);
}
+static void pte_free_now(struct rcu_head *head)
+{
+ struct ptdesc *ptdesc;
+
+ ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
+ pagetable_pte_dtor(ptdesc);
+ pagetable_free(ptdesc);
+}
+
void page_table_free(struct mm_struct *mm, unsigned long *table)
{
unsigned int mask, bit, half;
- struct page *page;
+ struct ptdesc *ptdesc = virt_to_ptdesc(table);
- page = virt_to_page(table);
if (!mm_alloc_pgste(mm)) {
/* Free 2K page table fragment of a 4K page */
bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
@@ -323,42 +343,50 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
* will happen outside of the critical section from this
* function or from __tlb_remove_table()
*/
- mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
+ mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
mask >>= 24;
- if (mask & 0x03U)
- list_add(&page->lru, &mm->context.pgtable_list);
- else
- list_del(&page->lru);
+ if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
+ /*
+ * Other half is allocated, and neither half has had
+ * its free deferred: add page to head of list, to make
+ * this freed half available for immediate reuse.
+ */
+ list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
+ } else {
+ /* If page is on list, now remove it. */
+ list_del_init(&ptdesc->pt_list);
+ }
spin_unlock_bh(&mm->context.lock);
- mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24));
+ mask = atomic_xor_bits(&ptdesc->_refcount, 0x10U << (bit + 24));
mask >>= 24;
if (mask != 0x00U)
return;
half = 0x01U << bit;
} else {
half = 0x03U;
- mask = atomic_xor_bits(&page->_refcount, 0x03U << 24);
+ mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
mask >>= 24;
}
- page_table_release_check(page, table, half, mask);
- pgtable_pte_page_dtor(page);
- __free_page(page);
+ page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
+ if (folio_test_clear_active(ptdesc_folio(ptdesc)))
+ call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
+ else
+ pte_free_now(&ptdesc->pt_rcu_head);
}
void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
unsigned long vmaddr)
{
struct mm_struct *mm;
- struct page *page;
unsigned int bit, mask;
+ struct ptdesc *ptdesc = virt_to_ptdesc(table);
mm = tlb->mm;
- page = virt_to_page(table);
if (mm_alloc_pgste(mm)) {
gmap_unlink(mm, table, vmaddr);
table = (unsigned long *) ((unsigned long)table | 0x03U);
- tlb_remove_table(tlb, table);
+ tlb_remove_ptdesc(tlb, table);
return;
}
bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
@@ -368,12 +396,20 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
* outside of the critical section from __tlb_remove_table() or from
* page_table_free()
*/
- mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
+ mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
mask >>= 24;
- if (mask & 0x03U)
- list_add_tail(&page->lru, &mm->context.pgtable_list);
- else
- list_del(&page->lru);
+ if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
+ /*
+ * Other half is allocated, and neither half has had
+ * its free deferred: add page to end of list, to make
+ * this freed half available for reuse once its pending
+ * bit has been cleared by __tlb_remove_table().
+ */
+ list_add_tail(&ptdesc->pt_list, &mm->context.pgtable_list);
+ } else {
+ /* If page is on list, now remove it. */
+ list_del_init(&ptdesc->pt_list);
+ }
spin_unlock_bh(&mm->context.lock);
table = (unsigned long *) ((unsigned long) table | (0x01U << bit));
tlb_remove_table(tlb, table);
@@ -383,30 +419,48 @@ void __tlb_remove_table(void *_table)
{
unsigned int mask = (unsigned long) _table & 0x03U, half = mask;
void *table = (void *)((unsigned long) _table ^ mask);
- struct page *page = virt_to_page(table);
+ struct ptdesc *ptdesc = virt_to_ptdesc(table);
switch (half) {
case 0x00U: /* pmd, pud, or p4d */
- free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+ pagetable_free(ptdesc);
return;
case 0x01U: /* lower 2K of a 4K page table */
case 0x02U: /* higher 2K of a 4K page table */
- mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24));
+ mask = atomic_xor_bits(&ptdesc->_refcount, mask << (4 + 24));
mask >>= 24;
if (mask != 0x00U)
return;
break;
case 0x03U: /* 4K page table with pgstes */
- mask = atomic_xor_bits(&page->_refcount, 0x03U << 24);
+ mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
mask >>= 24;
break;
}
- page_table_release_check(page, table, half, mask);
- pgtable_pte_page_dtor(page);
- __free_page(page);
+ page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
+ if (folio_test_clear_active(ptdesc_folio(ptdesc)))
+ call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
+ else
+ pte_free_now(&ptdesc->pt_rcu_head);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
+{
+ struct page *page;
+
+ page = virt_to_page(pgtable);
+ SetPageActive(page);
+ page_table_free(mm, (unsigned long *)pgtable);
+ /*
+ * page_table_free() does not do the pgste gmap_unlink() which
+ * page_table_free_rcu() does: warn us if pgste ever reaches here.
+ */
+ WARN_ON_ONCE(mm_has_pgste(mm));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
/*
* Base infrastructure required to generate basic asces, region, segment,
* and page tables that do not make use of enhanced features like EDAT1.
@@ -432,16 +486,20 @@ static void base_pgt_free(unsigned long *table)
static unsigned long *base_crst_alloc(unsigned long val)
{
unsigned long *table;
+ struct ptdesc *ptdesc;
- table = (unsigned long *)__get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
- if (table)
- crst_table_init(table, val);
+ ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, CRST_ALLOC_ORDER);
+ if (!ptdesc)
+ return NULL;
+ table = ptdesc_address(ptdesc);
+
+ crst_table_init(table, val);
return table;
}
static void base_crst_free(unsigned long *table)
{
- free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+ pagetable_free(virt_to_ptdesc(table));
}
#define BASE_ADDR_END_FUNC(NAME, SIZE) \
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index afc3f33788da..d34d5813d006 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -244,62 +244,25 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
zpci_memcpy_toio(to, from, count);
}
-static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+ unsigned long prot)
{
- unsigned long offset, vaddr;
- struct vm_struct *area;
- phys_addr_t last_addr;
-
- last_addr = addr + size - 1;
- if (!size || last_addr < addr)
- return NULL;
-
+ /*
+ * When PCI MIO instructions are unavailable the "physical" address
+ * encodes a hint for accessing the PCI memory space it represents.
+ * Just pass it unchanged such that ioread/iowrite can decode it.
+ */
if (!static_branch_unlikely(&have_mio))
- return (void __iomem *) addr;
+ return (void __iomem *)phys_addr;
- offset = addr & ~PAGE_MASK;
- addr &= PAGE_MASK;
- size = PAGE_ALIGN(size + offset);
- area = get_vm_area(size, VM_IOREMAP);
- if (!area)
- return NULL;
-
- vaddr = (unsigned long) area->addr;
- if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) {
- free_vm_area(area);
- return NULL;
- }
- return (void __iomem *) ((unsigned long) area->addr + offset);
-}
-
-void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
-{
- return __ioremap(addr, size, __pgprot(prot));
+ return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
}
EXPORT_SYMBOL(ioremap_prot);
-void __iomem *ioremap(phys_addr_t addr, size_t size)
-{
- return __ioremap(addr, size, PAGE_KERNEL);
-}
-EXPORT_SYMBOL(ioremap);
-
-void __iomem *ioremap_wc(phys_addr_t addr, size_t size)
-{
- return __ioremap(addr, size, pgprot_writecombine(PAGE_KERNEL));
-}
-EXPORT_SYMBOL(ioremap_wc);
-
-void __iomem *ioremap_wt(phys_addr_t addr, size_t size)
-{
- return __ioremap(addr, size, pgprot_writethrough(PAGE_KERNEL));
-}
-EXPORT_SYMBOL(ioremap_wt);
-
void iounmap(volatile void __iomem *addr)
{
if (static_branch_likely(&have_mio))
- vunmap((__force void *) ((unsigned long) addr & PAGE_MASK));
+ generic_iounmap(addr);
}
EXPORT_SYMBOL(iounmap);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 2b3ce4fd3956..6be32254211c 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -29,6 +29,7 @@ config SUPERH
select GENERIC_SMP_IDLE_THREAD
select GUP_GET_PXX_LOW_HIGH if X2TLB
select HAS_IOPORT if HAS_IOPORT_MAP
+ select GENERIC_IOREMAP if MMU
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_KGDB
select HAVE_ARCH_SECCOMP_FILTER
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index d8f3537ef57f..f2f38e9d489a 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -119,6 +119,30 @@ void __raw_readsl(const void __iomem *addr, void *data, int longlen);
__BUILD_MEMORY_STRING(__raw_, q, u64)
+#define ioport_map ioport_map
+#define ioport_unmap ioport_unmap
+#define pci_iounmap pci_iounmap
+
+#define ioread8 ioread8
+#define ioread16 ioread16
+#define ioread16be ioread16be
+#define ioread32 ioread32
+#define ioread32be ioread32be
+
+#define iowrite8 iowrite8
+#define iowrite16 iowrite16
+#define iowrite16be iowrite16be
+#define iowrite32 iowrite32
+#define iowrite32be iowrite32be
+
+#define ioread8_rep ioread8_rep
+#define ioread16_rep ioread16_rep
+#define ioread32_rep ioread32_rep
+
+#define iowrite8_rep iowrite8_rep
+#define iowrite16_rep iowrite16_rep
+#define iowrite32_rep iowrite32_rep
+
#ifdef CONFIG_HAS_IOPORT_MAP
/*
@@ -221,10 +245,33 @@ __BUILD_IOPORT_STRING(q, u64)
#endif
+#define inb(addr) inb(addr)
+#define inw(addr) inw(addr)
+#define inl(addr) inl(addr)
+#define outb(x, addr) outb((x), (addr))
+#define outw(x, addr) outw((x), (addr))
+#define outl(x, addr) outl((x), (addr))
+
+#define inb_p(addr) inb(addr)
+#define inw_p(addr) inw(addr)
+#define inl_p(addr) inl(addr)
+#define outb_p(x, addr) outb((x), (addr))
+#define outw_p(x, addr) outw((x), (addr))
+#define outl_p(x, addr) outl((x), (addr))
+
+#define insb insb
+#define insw insw
+#define insl insl
+#define outsb outsb
+#define outsw outsw
+#define outsl outsl
#define IO_SPACE_LIMIT 0xffffffff
/* We really want to try and get these to memcpy etc */
+#define memset_io memset_io
+#define memcpy_fromio memcpy_fromio
+#define memcpy_toio memcpy_toio
void memcpy_fromio(void *, const volatile void __iomem *, unsigned long);
void memcpy_toio(volatile void __iomem *, const void *, unsigned long);
void memset_io(volatile void __iomem *, int, unsigned long);
@@ -243,40 +290,16 @@ unsigned long long poke_real_address_q(unsigned long long addr,
#endif
#ifdef CONFIG_MMU
-void iounmap(void __iomem *addr);
-void __iomem *__ioremap_caller(phys_addr_t offset, unsigned long size,
- pgprot_t prot, void *caller);
-
-static inline void __iomem *ioremap(phys_addr_t offset, unsigned long size)
-{
- return __ioremap_caller(offset, size, PAGE_KERNEL_NOCACHE,
- __builtin_return_address(0));
-}
-
-static inline void __iomem *
-ioremap_cache(phys_addr_t offset, unsigned long size)
-{
- return __ioremap_caller(offset, size, PAGE_KERNEL,
- __builtin_return_address(0));
-}
-#define ioremap_cache ioremap_cache
-
-#ifdef CONFIG_HAVE_IOREMAP_PROT
-static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
- unsigned long flags)
-{
- return __ioremap_caller(offset, size, __pgprot(flags),
- __builtin_return_address(0));
-}
-#endif /* CONFIG_HAVE_IOREMAP_PROT */
+/*
+ * I/O memory mapping functions.
+ */
+#define ioremap_prot ioremap_prot
+#define iounmap iounmap
-#else /* CONFIG_MMU */
-static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
-{
- return (void __iomem *)(unsigned long)offset;
-}
+#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_NOCACHE)
-static inline void iounmap(volatile void __iomem *addr) { }
+#define ioremap_cache(addr, size) \
+ ioremap_prot((addr), (size), pgprot_val(PAGE_KERNEL))
#endif /* CONFIG_MMU */
#define ioremap_uc ioremap
@@ -288,6 +311,8 @@ static inline void iounmap(volatile void __iomem *addr) { }
#define xlate_dev_mem_ptr(p) __va(p)
#define unxlate_dev_mem_ptr(p, v) do { } while (0)
+#include <asm-generic/io.h>
+
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
int valid_phys_addr_range(phys_addr_t addr, size_t size);
int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
diff --git a/arch/sh/include/asm/io_noioport.h b/arch/sh/include/asm/io_noioport.h
index f7938fe0f911..12dad91f41c1 100644
--- a/arch/sh/include/asm/io_noioport.h
+++ b/arch/sh/include/asm/io_noioport.h
@@ -46,13 +46,6 @@ static inline void ioport_unmap(void __iomem *addr)
BUG();
}
-#define inb_p(addr) inb(addr)
-#define inw_p(addr) inw(addr)
-#define inl_p(addr) inl(addr)
-#define outb_p(x, addr) outb((x), (addr))
-#define outw_p(x, addr) outw((x), (addr))
-#define outl_p(x, addr) outl((x), (addr))
-
static inline void insb(unsigned long port, void *dst, unsigned long count)
{
BUG();
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index a9e98233c4d4..5d8577ab1591 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -2,6 +2,7 @@
#ifndef __ASM_SH_PGALLOC_H
#define __ASM_SH_PGALLOC_H
+#include <linux/mm.h>
#include <asm/page.h>
#define __HAVE_ARCH_PMD_ALLOC_ONE
@@ -31,10 +32,10 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
set_pmd(pmd, __pmd((unsigned long)page_address(pte)));
}
-#define __pte_free_tlb(tlb,pte,addr) \
-do { \
- pgtable_pte_page_dtor(pte); \
- tlb_remove_page((tlb), (pte)); \
+#define __pte_free_tlb(tlb, pte, addr) \
+do { \
+ pagetable_pte_dtor(page_ptdesc(pte)); \
+ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \
} while (0)
#endif /* __ASM_SH_PGALLOC_H */
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
index 21342581144d..c33b3daa4ad1 100644
--- a/arch/sh/mm/ioremap.c
+++ b/arch/sh/mm/ioremap.c
@@ -72,22 +72,11 @@ __ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot)
#define __ioremap_29bit(offset, size, prot) NULL
#endif /* CONFIG_29BIT */
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem * __ref
-__ioremap_caller(phys_addr_t phys_addr, unsigned long size,
- pgprot_t pgprot, void *caller)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+ unsigned long prot)
{
- struct vm_struct *area;
- unsigned long offset, last_addr, addr, orig_addr;
void __iomem *mapped;
+ pgprot_t pgprot = __pgprot(prot);
mapped = __ioremap_trapped(phys_addr, size);
if (mapped)
@@ -97,11 +86,6 @@ __ioremap_caller(phys_addr_t phys_addr, unsigned long size,
if (mapped)
return mapped;
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
/*
* If we can't yet use the regular approach, go the fixmap route.
*/
@@ -112,34 +96,14 @@ __ioremap_caller(phys_addr_t phys_addr, unsigned long size,
* First try to remap through the PMB.
* PMB entries are all pre-faulted.
*/
- mapped = pmb_remap_caller(phys_addr, size, pgprot, caller);
+ mapped = pmb_remap_caller(phys_addr, size, pgprot,
+ __builtin_return_address(0));
if (mapped && !IS_ERR(mapped))
return mapped;
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
- /*
- * Ok, go for it..
- */
- area = get_vm_area_caller(size, VM_IOREMAP, caller);
- if (!area)
- return NULL;
- area->phys_addr = phys_addr;
- orig_addr = addr = (unsigned long)area->addr;
-
- if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) {
- vunmap((void *)orig_addr);
- return NULL;
- }
-
- return (void __iomem *)(offset + (char *)orig_addr);
+ return generic_ioremap_prot(phys_addr, size, pgprot);
}
-EXPORT_SYMBOL(__ioremap_caller);
+EXPORT_SYMBOL(ioremap_prot);
/*
* Simple checks for non-translatable mappings.
@@ -158,10 +122,9 @@ static inline int iomapping_nontranslatable(unsigned long offset)
return 0;
}
-void iounmap(void __iomem *addr)
+void iounmap(volatile void __iomem *addr)
{
unsigned long vaddr = (unsigned long __force)addr;
- struct vm_struct *p;
/*
* Nothing to do if there is no translatable mapping.
@@ -172,21 +135,15 @@ void iounmap(void __iomem *addr)
/*
* There's no VMA if it's from an early fixed mapping.
*/
- if (iounmap_fixed(addr) == 0)
+ if (iounmap_fixed((void __iomem *)addr) == 0)
return;
/*
* If the PMB handled it, there's nothing else to do.
*/
- if (pmb_unmap(addr) == 0)
+ if (pmb_unmap((void __iomem *)addr) == 0)
return;
- p = remove_vm_area((void *)(vaddr & PAGE_MASK));
- if (!p) {
- printk(KERN_ERR "%s: bad address %p\n", __func__, addr);
- return;
- }
-
- kfree(p);
+ generic_iounmap(addr);
}
EXPORT_SYMBOL(iounmap);
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 7b5561d17ab1..caa7632be4c2 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -65,6 +65,10 @@ pgtable_t pte_alloc_one(struct mm_struct *mm);
void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
void pte_free(struct mm_struct *mm, pgtable_t ptepage);
+/* arch use pte_free_defer() implementation in arch/sparc/mm/init_64.c */
+#define pte_free_defer pte_free_defer
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+
#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
#define pmd_populate(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index 1adf5c1c16b8..34ef7febf0d5 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -83,7 +83,7 @@ static void prom_sync_me(void)
"nop\n\t" : : "r" (&trapbase));
prom_printf("PROM SYNC COMMAND...\n");
- show_free_areas(0, NULL);
+ show_mem();
if (!is_idle_task(current)) {
local_irq_enable();
ksys_sync();
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 04f9db0c3111..9a63a3e08e40 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2893,14 +2893,15 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
pgtable_t pte_alloc_one(struct mm_struct *mm)
{
- struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!page)
+ struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL | __GFP_ZERO, 0);
+
+ if (!ptdesc)
return NULL;
- if (!pgtable_pte_page_ctor(page)) {
- __free_page(page);
+ if (!pagetable_pte_ctor(ptdesc)) {
+ pagetable_free(ptdesc);
return NULL;
}
- return (pte_t *) page_address(page);
+ return ptdesc_address(ptdesc);
}
void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -2910,10 +2911,10 @@ void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
static void __pte_free(pgtable_t pte)
{
- struct page *page = virt_to_page(pte);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pte);
- pgtable_pte_page_dtor(page);
- __free_page(page);
+ pagetable_pte_dtor(ptdesc);
+ pagetable_free(ptdesc);
}
void pte_free(struct mm_struct *mm, pgtable_t pte)
@@ -2930,6 +2931,22 @@ void pgtable_free(void *table, bool is_page)
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void pte_free_now(struct rcu_head *head)
+{
+ struct page *page;
+
+ page = container_of(head, struct page, rcu_head);
+ __pte_free((pgtable_t)page_address(page));
+}
+
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
+{
+ struct page *page;
+
+ page = virt_to_page(pgtable);
+ call_rcu(&page->rcu_head, pte_free_now);
+}
+
void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd)
{
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 13f027afc875..8393faa3e596 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -355,7 +355,8 @@ pgtable_t pte_alloc_one(struct mm_struct *mm)
return NULL;
page = pfn_to_page(__nocache_pa((unsigned long)ptep) >> PAGE_SHIFT);
spin_lock(&mm->page_table_lock);
- if (page_ref_inc_return(page) == 2 && !pgtable_pte_page_ctor(page)) {
+ if (page_ref_inc_return(page) == 2 &&
+ !pagetable_pte_ctor(page_ptdesc(page))) {
page_ref_dec(page);
ptep = NULL;
}
@@ -371,7 +372,7 @@ void pte_free(struct mm_struct *mm, pgtable_t ptep)
page = pfn_to_page(__nocache_pa((unsigned long)ptep) >> PAGE_SHIFT);
spin_lock(&mm->page_table_lock);
if (page_ref_dec_return(page) == 1)
- pgtable_pte_page_dtor(page);
+ pagetable_pte_dtor(page_ptdesc(page));
spin_unlock(&mm->page_table_lock);
srmmu_free_nocache(ptep, SRMMU_PTE_TABLE_SIZE);
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index 8ec7cd46dd96..de5e31c64793 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -25,19 +25,19 @@
*/
extern pgd_t *pgd_alloc(struct mm_struct *);
-#define __pte_free_tlb(tlb,pte, address) \
-do { \
- pgtable_pte_page_dtor(pte); \
- tlb_remove_page((tlb),(pte)); \
+#define __pte_free_tlb(tlb, pte, address) \
+do { \
+ pagetable_pte_dtor(page_ptdesc(pte)); \
+ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \
} while (0)
#ifdef CONFIG_3_LEVEL_PGTABLES
-#define __pmd_free_tlb(tlb, pmd, address) \
-do { \
- pgtable_pmd_page_dtor(virt_to_page(pmd)); \
- tlb_remove_page((tlb),virt_to_page(pmd)); \
-} while (0) \
+#define __pmd_free_tlb(tlb, pmd, address) \
+do { \
+ pagetable_pmd_dtor(virt_to_ptdesc(pmd)); \
+ tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \
+} while (0)
#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7422db409770..d0258e92a8af 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -102,6 +102,7 @@ config X86
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_ZONE_DMA_SET if EXPERT
select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
@@ -128,7 +129,8 @@ config X86
select ARCH_WANT_GENERAL_HUGETLB
select ARCH_WANT_HUGE_PMD_SHARE
select ARCH_WANT_LD_ORPHAN_WARN
- select ARCH_WANT_OPTIMIZE_VMEMMAP if X86_64
+ select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64
+ select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64
select ARCH_WANTS_THP_SWAP if X86_64
select ARCH_HAS_PARANOID_L1D_FLUSH
select BUILDTIME_TABLE_SORT
@@ -2609,9 +2611,6 @@ config ARCH_HAS_ADD_PAGES
def_bool y
depends on ARCH_ENABLE_MEMORY_HOTPLUG
-config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
- def_bool y
-
menu "Power management and ACPI options"
config ARCH_HIBERNATION_HEADER
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index e9025640f634..76238842406a 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -35,9 +35,6 @@
* - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*/
-#define ARCH_HAS_IOREMAP_WC
-#define ARCH_HAS_IOREMAP_WT
-
#include <linux/string.h>
#include <linux/compiler.h>
#include <linux/cc_platform.h>
@@ -212,8 +209,6 @@ void memset_io(volatile void __iomem *, int, size_t);
#define memcpy_toio memcpy_toio
#define memset_io memset_io
-#include <asm-generic/iomap.h>
-
/*
* ISA space is 'always mapped' on a typical x86 system, no need to
* explicitly ioremap() it. The fact that the ISA IO space is mapped
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5700bb337987..ada1bbf12961 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1023,21 +1023,21 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- page_table_check_pte_set(mm, addr, ptep, pte);
+ page_table_check_pte_set(mm, ptep, pte);
set_pte(ptep, pte);
}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
- page_table_check_pmd_set(mm, addr, pmdp, pmd);
+ page_table_check_pmd_set(mm, pmdp, pmd);
set_pmd(pmdp, pmd);
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
- page_table_check_pud_set(mm, addr, pudp, pud);
+ page_table_check_pud_set(mm, pudp, pud);
native_set_pud(pudp, pud);
}
@@ -1068,7 +1068,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
pte_t pte = native_ptep_get_and_clear(ptep);
- page_table_check_pte_clear(mm, addr, pte);
+ page_table_check_pte_clear(mm, pte);
return pte;
}
@@ -1084,7 +1084,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
* care about updates and native needs no locking
*/
pte = native_local_ptep_get_and_clear(ptep);
- page_table_check_pte_clear(mm, addr, pte);
+ page_table_check_pte_clear(mm, pte);
} else {
pte = ptep_get_and_clear(mm, addr, ptep);
}
@@ -1133,7 +1133,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long
{
pmd_t pmd = native_pmdp_get_and_clear(pmdp);
- page_table_check_pmd_clear(mm, addr, pmd);
+ page_table_check_pmd_clear(mm, pmd);
return pmd;
}
@@ -1144,7 +1144,7 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
{
pud_t pud = native_pudp_get_and_clear(pudp);
- page_table_check_pud_clear(mm, addr, pud);
+ page_table_check_pud_clear(mm, pud);
return pud;
}
@@ -1167,7 +1167,7 @@ static inline int pud_write(pud_t pud)
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
- page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
+ page_table_check_pmd_set(vma->vm_mm, pmdp, pmd);
if (IS_ENABLED(CONFIG_SMP)) {
return xchg(pmdp, pmd);
} else {
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 80450e1d5385..6ab42caaa67a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -3,6 +3,7 @@
#define _ASM_X86_TLBFLUSH_H
#include <linux/mm_types.h>
+#include <linux/mmu_notifier.h>
#include <linux/sched.h>
#include <asm/processor.h>
@@ -253,6 +254,18 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
}
+static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+ bool should_defer = false;
+
+ /* If remote CPUs need to be flushed then defer batch the flush */
+ if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
+ should_defer = true;
+ put_cpu();
+
+ return should_defer;
+}
+
static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
{
/*
@@ -264,11 +277,18 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
return atomic64_inc_return(&mm->context.tlb_gen);
}
-static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
- struct mm_struct *mm)
+static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr)
{
inc_mm_tlb_gen(mm);
cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+
+static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
+{
+ flush_tlb_mm(mm);
}
extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e8711b2cafaf..787da09d24f3 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1328,7 +1328,6 @@ void do_user_addr_fault(struct pt_regs *regs,
}
#endif
-#ifdef CONFIG_PER_VMA_LOCK
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
@@ -1358,7 +1357,6 @@ void do_user_addr_fault(struct pt_regs *regs,
return;
}
lock_mmap:
-#endif /* CONFIG_PER_VMA_LOCK */
retry:
vma = lock_mm_and_find_vma(mm, address, regs);
@@ -1418,9 +1416,7 @@ retry:
}
mmap_read_unlock(mm);
-#ifdef CONFIG_PER_VMA_LOCK
done:
-#endif
if (likely(!(fault & VM_FAULT_ERROR)))
return;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 15a8009a4480..d3a93e8766ee 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -52,7 +52,7 @@ early_param("userpte", setup_userpte);
void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
- pgtable_pte_page_dtor(pte);
+ pagetable_pte_dtor(page_ptdesc(pte));
paravirt_release_pte(page_to_pfn(pte));
paravirt_tlb_remove_table(tlb, pte);
}
@@ -60,7 +60,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
#if CONFIG_PGTABLE_LEVELS > 2
void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
- struct page *page = virt_to_page(pmd);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
/*
* NOTE! For PAE, any changes to the top page-directory-pointer-table
@@ -69,8 +69,8 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
#ifdef CONFIG_X86_PAE
tlb->need_flush_all = 1;
#endif
- pgtable_pmd_page_dtor(page);
- paravirt_tlb_remove_table(tlb, page);
+ pagetable_pmd_dtor(ptdesc);
+ paravirt_tlb_remove_table(tlb, ptdesc_page(ptdesc));
}
#if CONFIG_PGTABLE_LEVELS > 3
@@ -92,16 +92,16 @@ void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
static inline void pgd_list_add(pgd_t *pgd)
{
- struct page *page = virt_to_page(pgd);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pgd);
- list_add(&page->lru, &pgd_list);
+ list_add(&ptdesc->pt_list, &pgd_list);
}
static inline void pgd_list_del(pgd_t *pgd)
{
- struct page *page = virt_to_page(pgd);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pgd);
- list_del(&page->lru);
+ list_del(&ptdesc->pt_list);
}
#define UNSHARED_PTRS_PER_PGD \
@@ -112,12 +112,12 @@ static inline void pgd_list_del(pgd_t *pgd)
static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
{
- virt_to_page(pgd)->pt_mm = mm;
+ virt_to_ptdesc(pgd)->pt_mm = mm;
}
struct mm_struct *pgd_page_get_mm(struct page *page)
{
- return page->pt_mm;
+ return page_ptdesc(page)->pt_mm;
}
static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
@@ -213,11 +213,14 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
{
int i;
+ struct ptdesc *ptdesc;
for (i = 0; i < count; i++)
if (pmds[i]) {
- pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
- free_page((unsigned long)pmds[i]);
+ ptdesc = virt_to_ptdesc(pmds[i]);
+
+ pagetable_pmd_dtor(ptdesc);
+ pagetable_free(ptdesc);
mm_dec_nr_pmds(mm);
}
}
@@ -230,18 +233,24 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
if (mm == &init_mm)
gfp &= ~__GFP_ACCOUNT;
+ gfp &= ~__GFP_HIGHMEM;
for (i = 0; i < count; i++) {
- pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
- if (!pmd)
+ pmd_t *pmd = NULL;
+ struct ptdesc *ptdesc = pagetable_alloc(gfp, 0);
+
+ if (!ptdesc)
failed = true;
- if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
- free_page((unsigned long)pmd);
- pmd = NULL;
+ if (ptdesc && !pagetable_pmd_ctor(ptdesc)) {
+ pagetable_free(ptdesc);
+ ptdesc = NULL;
failed = true;
}
- if (pmd)
+ if (ptdesc) {
mm_inc_nr_pmds(mm);
+ pmd = ptdesc_address(ptdesc);
+ }
+
pmds[i] = pmd;
}
@@ -830,7 +839,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
free_page((unsigned long)pmd_sv);
- pgtable_pmd_page_dtor(virt_to_page(pmd));
+ pagetable_pmd_dtor(virt_to_ptdesc(pmd));
free_page((unsigned long)pmd);
return 1;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 267acf27480a..2d253919b3e8 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -10,6 +10,7 @@
#include <linux/debugfs.h>
#include <linux/sched/smt.h>
#include <linux/task_work.h>
+#include <linux/mmu_notifier.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
@@ -1036,6 +1037,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
put_flush_tlb_info();
put_cpu();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index e0a975165de7..8796ec310483 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -667,7 +667,7 @@ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
spinlock_t *ptl = NULL;
#if USE_SPLIT_PTE_PTLOCKS
- ptl = ptlock_ptr(page);
+ ptl = ptlock_ptr(page_ptdesc(page));
spin_lock_nest_lock(ptl, &mm->page_table_lock);
#endif
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 2a51a466779f..a5488cc40f58 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -28,6 +28,7 @@ config XTENSA
select GENERIC_LIB_UCMPDI2
select GENERIC_PCI_IOMAP
select GENERIC_SCHED_CLOCK
+ select GENERIC_IOREMAP if MMU
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h
index a5b707e1c0f4..934e58399c8c 100644
--- a/arch/xtensa/include/asm/io.h
+++ b/arch/xtensa/include/asm/io.h
@@ -16,6 +16,7 @@
#include <asm/vectors.h>
#include <linux/bug.h>
#include <linux/kernel.h>
+#include <linux/pgtable.h>
#include <linux/types.h>
@@ -24,22 +25,24 @@
#define PCI_IOBASE ((void __iomem *)XCHAL_KIO_BYPASS_VADDR)
#ifdef CONFIG_MMU
-
-void __iomem *xtensa_ioremap_nocache(unsigned long addr, unsigned long size);
-void __iomem *xtensa_ioremap_cache(unsigned long addr, unsigned long size);
-void xtensa_iounmap(volatile void __iomem *addr);
-
/*
- * Return the virtual address for the specified bus memory.
+ * I/O memory mapping functions.
*/
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+ unsigned long prot);
+#define ioremap_prot ioremap_prot
+#define iounmap iounmap
+
static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
{
if (offset >= XCHAL_KIO_PADDR
&& offset - XCHAL_KIO_PADDR < XCHAL_KIO_SIZE)
return (void*)(offset-XCHAL_KIO_PADDR+XCHAL_KIO_BYPASS_VADDR);
else
- return xtensa_ioremap_nocache(offset, size);
+ return ioremap_prot(offset, size,
+ pgprot_val(pgprot_noncached(PAGE_KERNEL)));
}
+#define ioremap ioremap
static inline void __iomem *ioremap_cache(unsigned long offset,
unsigned long size)
@@ -48,21 +51,10 @@ static inline void __iomem *ioremap_cache(unsigned long offset,
&& offset - XCHAL_KIO_PADDR < XCHAL_KIO_SIZE)
return (void*)(offset-XCHAL_KIO_PADDR+XCHAL_KIO_CACHED_VADDR);
else
- return xtensa_ioremap_cache(offset, size);
-}
-#define ioremap_cache ioremap_cache
+ return ioremap_prot(offset, size, pgprot_val(PAGE_KERNEL));
-static inline void iounmap(volatile void __iomem *addr)
-{
- unsigned long va = (unsigned long) addr;
-
- if (!(va >= XCHAL_KIO_CACHED_VADDR &&
- va - XCHAL_KIO_CACHED_VADDR < XCHAL_KIO_SIZE) &&
- !(va >= XCHAL_KIO_BYPASS_VADDR &&
- va - XCHAL_KIO_BYPASS_VADDR < XCHAL_KIO_SIZE))
- xtensa_iounmap(addr);
}
-
+#define ioremap_cache ioremap_cache
#endif /* CONFIG_MMU */
#include <asm-generic/io.h>
diff --git a/arch/xtensa/mm/ioremap.c b/arch/xtensa/mm/ioremap.c
index a400188c16b9..8ca660b7ab49 100644
--- a/arch/xtensa/mm/ioremap.c
+++ b/arch/xtensa/mm/ioremap.c
@@ -6,60 +6,30 @@
*/
#include <linux/io.h>
-#include <linux/vmalloc.h>
#include <linux/pgtable.h>
#include <asm/cacheflush.h>
#include <asm/io.h>
-static void __iomem *xtensa_ioremap(unsigned long paddr, unsigned long size,
- pgprot_t prot)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+ unsigned long prot)
{
- unsigned long offset = paddr & ~PAGE_MASK;
- unsigned long pfn = __phys_to_pfn(paddr);
- struct vm_struct *area;
- unsigned long vaddr;
- int err;
-
- paddr &= PAGE_MASK;
-
+ unsigned long pfn = __phys_to_pfn((phys_addr));
WARN_ON(pfn_valid(pfn));
- size = PAGE_ALIGN(offset + size);
-
- area = get_vm_area(size, VM_IOREMAP);
- if (!area)
- return NULL;
-
- vaddr = (unsigned long)area->addr;
- area->phys_addr = paddr;
-
- err = ioremap_page_range(vaddr, vaddr + size, paddr, prot);
-
- if (err) {
- vunmap((void *)vaddr);
- return NULL;
- }
-
- flush_cache_vmap(vaddr, vaddr + size);
- return (void __iomem *)(offset + vaddr);
-}
-
-void __iomem *xtensa_ioremap_nocache(unsigned long addr, unsigned long size)
-{
- return xtensa_ioremap(addr, size, pgprot_noncached(PAGE_KERNEL));
+ return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
}
-EXPORT_SYMBOL(xtensa_ioremap_nocache);
+EXPORT_SYMBOL(ioremap_prot);
-void __iomem *xtensa_ioremap_cache(unsigned long addr, unsigned long size)
+void iounmap(volatile void __iomem *addr)
{
- return xtensa_ioremap(addr, size, PAGE_KERNEL);
-}
-EXPORT_SYMBOL(xtensa_ioremap_cache);
+ unsigned long va = (unsigned long) addr;
-void xtensa_iounmap(volatile void __iomem *io_addr)
-{
- void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr);
+ if ((va >= XCHAL_KIO_CACHED_VADDR &&
+ va - XCHAL_KIO_CACHED_VADDR < XCHAL_KIO_SIZE) ||
+ (va >= XCHAL_KIO_BYPASS_VADDR &&
+ va - XCHAL_KIO_BYPASS_VADDR < XCHAL_KIO_SIZE))
+ return;
- vunmap(addr);
+ generic_iounmap(addr);
}
-EXPORT_SYMBOL(xtensa_iounmap);
+EXPORT_SYMBOL(iounmap);