diff options
Diffstat (limited to 'arch/arm64/include/asm/pgtable.h')
| -rw-r--r-- | arch/arm64/include/asm/pgtable.h | 666 | 
1 files changed, 590 insertions, 76 deletions
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 79ce70fbb751..afdd56d26ad7 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -18,11 +18,15 @@   * VMALLOC range.   *   * VMALLOC_START: beginning of the kernel vmalloc space - * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space - *	and fixed mappings + * VMALLOC_END: extends to the available space below vmemmap   */  #define VMALLOC_START		(MODULES_END) -#define VMALLOC_END		(VMEMMAP_START - SZ_256M) +#if VA_BITS == VA_BITS_MIN +#define VMALLOC_END		(VMEMMAP_START - SZ_8M) +#else +#define VMEMMAP_UNUSED_NPAGES	((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT) +#define VMALLOC_END		(VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M) +#endif  #define vmemmap			((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) @@ -76,15 +80,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];  #ifdef CONFIG_ARM64_PA_BITS_52  static inline phys_addr_t __pte_to_phys(pte_t pte)  { +	pte_val(pte) &= ~PTE_MAYBE_SHARED;  	return (pte_val(pte) & PTE_ADDR_LOW) |  		((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT);  }  static inline pteval_t __phys_to_pte_val(phys_addr_t phys)  { -	return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PTE_ADDR_MASK; +	return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK;  }  #else -#define __pte_to_phys(pte)	(pte_val(pte) & PTE_ADDR_MASK) +#define __pte_to_phys(pte)	(pte_val(pte) & PTE_ADDR_LOW)  #define __phys_to_pte_val(phys)	(phys)  #endif @@ -93,7 +98,8 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)  	__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))  #define pte_none(pte)		(!pte_val(pte)) -#define pte_clear(mm,addr,ptep)	set_pte(ptep, __pte(0)) +#define __pte_clear(mm, addr, ptep) \ +				__set_pte(ptep, __pte(0))  #define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))  /* @@ -133,11 +139,15 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)  #define pte_valid_not_user(pte) \  	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))  /* + * Returns true if the pte is valid and has the contiguous bit set. + */ +#define pte_valid_cont(pte)	(pte_valid(pte) && pte_cont(pte)) +/*   * Could the pte be present in the TLB? We must check mm_tlb_flush_pending   * so that we don't erroneously return false for pages that have been   * remapped as PROT_NONE but are yet to be flushed from the TLB.   * Note that we can't make any assumptions based on the state of the access - * flag, since ptep_clear_flush_young() elides a DSB when invalidating the + * flag, since __ptep_clear_flush_young() elides a DSB when invalidating the   * TLB.   */  #define pte_accessible(mm, pte)	\ @@ -261,7 +271,7 @@ static inline pte_t pte_mkdevmap(pte_t pte)  	return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));  } -static inline void set_pte(pte_t *ptep, pte_t pte) +static inline void __set_pte(pte_t *ptep, pte_t pte)  {  	WRITE_ONCE(*ptep, pte); @@ -275,6 +285,11 @@ static inline void set_pte(pte_t *ptep, pte_t pte)  	}  } +static inline pte_t __ptep_get(pte_t *ptep) +{ +	return READ_ONCE(*ptep); +} +  extern void __sync_icache_dcache(pte_t pteval);  bool pgattr_change_is_safe(u64 old, u64 new); @@ -302,7 +317,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,  	if (!IS_ENABLED(CONFIG_DEBUG_VM))  		return; -	old_pte = READ_ONCE(*ptep); +	old_pte = __ptep_get(ptep);  	if (!pte_valid(old_pte) || !pte_valid(pte))  		return; @@ -311,7 +326,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,  	/*  	 * Check for potential race with hardware updates of the pte -	 * (ptep_set_access_flags safely changes valid ptes without going +	 * (__ptep_set_access_flags safely changes valid ptes without going  	 * through an invalid entry).  	 */  	VM_WARN_ONCE(!pte_young(pte), @@ -341,23 +356,38 @@ static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)  		mte_sync_tags(pte, nr_pages);  } -static inline void set_ptes(struct mm_struct *mm, -			    unsigned long __always_unused addr, -			    pte_t *ptep, pte_t pte, unsigned int nr) +/* + * Select all bits except the pfn + */ +static inline pgprot_t pte_pgprot(pte_t pte) +{ +	unsigned long pfn = pte_pfn(pte); + +	return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); +} + +#define pte_advance_pfn pte_advance_pfn +static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) +{ +	return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte)); +} + +static inline void __set_ptes(struct mm_struct *mm, +			      unsigned long __always_unused addr, +			      pte_t *ptep, pte_t pte, unsigned int nr)  {  	page_table_check_ptes_set(mm, ptep, pte, nr);  	__sync_cache_and_tags(pte, nr);  	for (;;) {  		__check_safe_pte_update(mm, ptep, pte); -		set_pte(ptep, pte); +		__set_pte(ptep, pte);  		if (--nr == 0)  			break;  		ptep++; -		pte_val(pte) += PAGE_SIZE; +		pte = pte_advance_pfn(pte, 1);  	}  } -#define set_ptes set_ptes  /*   * Huge pte definitions. @@ -433,16 +463,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)  	return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));  } -/* - * Select all bits except the pfn - */ -static inline pgprot_t pte_pgprot(pte_t pte) -{ -	unsigned long pfn = pte_pfn(pte); - -	return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); -} -  #ifdef CONFIG_NUMA_BALANCING  /*   * See the comment in include/linux/pgtable.h @@ -534,7 +554,7 @@ static inline void __set_pte_at(struct mm_struct *mm,  {  	__sync_cache_and_tags(pte, nr);  	__check_safe_pte_update(mm, ptep, pte); -	set_pte(ptep, pte); +	__set_pte(ptep, pte);  }  static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, @@ -616,12 +636,12 @@ static inline bool pud_table(pud_t pud) { return true; }  				 PUD_TYPE_TABLE)  #endif -extern pgd_t init_pg_dir[PTRS_PER_PGD]; +extern pgd_t init_pg_dir[];  extern pgd_t init_pg_end[]; -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; -extern pgd_t reserved_pg_dir[PTRS_PER_PGD]; +extern pgd_t swapper_pg_dir[]; +extern pgd_t idmap_pg_dir[]; +extern pgd_t tramp_pg_dir[]; +extern pgd_t reserved_pg_dir[];  extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); @@ -694,14 +714,14 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)  #define pud_user(pud)		pte_user(pud_pte(pud))  #define pud_user_exec(pud)	pte_user_exec(pud_pte(pud)) +static inline bool pgtable_l4_enabled(void); +  static inline void set_pud(pud_t *pudp, pud_t pud)  { -#ifdef __PAGETABLE_PUD_FOLDED -	if (in_swapper_pgdir(pudp)) { +	if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) {  		set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));  		return;  	} -#endif /* __PAGETABLE_PUD_FOLDED */  	WRITE_ONCE(*pudp, pud); @@ -754,12 +774,27 @@ static inline pmd_t *pud_pgtable(pud_t pud)  #if CONFIG_PGTABLE_LEVELS > 3 +static __always_inline bool pgtable_l4_enabled(void) +{ +	if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2)) +		return true; +	if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT)) +		return vabits_actual == VA_BITS; +	return alternative_has_cap_unlikely(ARM64_HAS_VA52); +} + +static inline bool mm_pud_folded(const struct mm_struct *mm) +{ +	return !pgtable_l4_enabled(); +} +#define mm_pud_folded  mm_pud_folded +  #define pud_ERROR(e)	\  	pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) -#define p4d_none(p4d)		(!p4d_val(p4d)) -#define p4d_bad(p4d)		(!(p4d_val(p4d) & 2)) -#define p4d_present(p4d)	(p4d_val(p4d)) +#define p4d_none(p4d)		(pgtable_l4_enabled() && !p4d_val(p4d)) +#define p4d_bad(p4d)		(pgtable_l4_enabled() && !(p4d_val(p4d) & 2)) +#define p4d_present(p4d)	(!p4d_none(p4d))  static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)  { @@ -775,7 +810,8 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)  static inline void p4d_clear(p4d_t *p4dp)  { -	set_p4d(p4dp, __p4d(0)); +	if (pgtable_l4_enabled()) +		set_p4d(p4dp, __p4d(0));  }  static inline phys_addr_t p4d_page_paddr(p4d_t p4d) @@ -783,27 +819,75 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d)  	return __p4d_to_phys(p4d);  } +#define pud_index(addr)		(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + +static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr) +{ +	return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr); +} +  static inline pud_t *p4d_pgtable(p4d_t p4d)  {  	return (pud_t *)__va(p4d_page_paddr(p4d));  } -/* Find an entry in the first-level page table. */ -#define pud_offset_phys(dir, addr)	(p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) +static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr) +{ +	BUG_ON(!pgtable_l4_enabled()); -#define pud_set_fixmap(addr)		((pud_t *)set_fixmap_offset(FIX_PUD, addr)) -#define pud_set_fixmap_offset(p4d, addr)	pud_set_fixmap(pud_offset_phys(p4d, addr)) -#define pud_clear_fixmap()		clear_fixmap(FIX_PUD) +	return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t); +} -#define p4d_page(p4d)		pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d))) +static inline +pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr) +{ +	if (!pgtable_l4_enabled()) +		return p4d_to_folded_pud(p4dp, addr); +	return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr); +} +#define pud_offset_lockless pud_offset_lockless + +static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr) +{ +	return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr); +} +#define pud_offset	pud_offset + +static inline pud_t *pud_set_fixmap(unsigned long addr) +{ +	if (!pgtable_l4_enabled()) +		return NULL; +	return (pud_t *)set_fixmap_offset(FIX_PUD, addr); +} + +static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr) +{ +	if (!pgtable_l4_enabled()) +		return p4d_to_folded_pud(p4dp, addr); +	return pud_set_fixmap(pud_offset_phys(p4dp, addr)); +} + +static inline void pud_clear_fixmap(void) +{ +	if (pgtable_l4_enabled()) +		clear_fixmap(FIX_PUD); +}  /* use ONLY for statically allocated translation tables */ -#define pud_offset_kimg(dir,addr)	((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) +static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr) +{ +	if (!pgtable_l4_enabled()) +		return p4d_to_folded_pud(p4dp, addr); +	return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr)); +} + +#define p4d_page(p4d)		pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))  #else +static inline bool pgtable_l4_enabled(void) { return false; } +  #define p4d_page_paddr(p4d)	({ BUILD_BUG(); 0;}) -#define pgd_page_paddr(pgd)	({ BUILD_BUG(); 0;})  /* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */  #define pud_set_fixmap(addr)		NULL @@ -814,6 +898,122 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)  #endif  /* CONFIG_PGTABLE_LEVELS > 3 */ +#if CONFIG_PGTABLE_LEVELS > 4 + +static __always_inline bool pgtable_l5_enabled(void) +{ +	if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT)) +		return vabits_actual == VA_BITS; +	return alternative_has_cap_unlikely(ARM64_HAS_VA52); +} + +static inline bool mm_p4d_folded(const struct mm_struct *mm) +{ +	return !pgtable_l5_enabled(); +} +#define mm_p4d_folded  mm_p4d_folded + +#define p4d_ERROR(e)	\ +	pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e)) + +#define pgd_none(pgd)		(pgtable_l5_enabled() && !pgd_val(pgd)) +#define pgd_bad(pgd)		(pgtable_l5_enabled() && !(pgd_val(pgd) & 2)) +#define pgd_present(pgd)	(!pgd_none(pgd)) + +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ +	if (in_swapper_pgdir(pgdp)) { +		set_swapper_pgd(pgdp, __pgd(pgd_val(pgd))); +		return; +	} + +	WRITE_ONCE(*pgdp, pgd); +	dsb(ishst); +	isb(); +} + +static inline void pgd_clear(pgd_t *pgdp) +{ +	if (pgtable_l5_enabled()) +		set_pgd(pgdp, __pgd(0)); +} + +static inline phys_addr_t pgd_page_paddr(pgd_t pgd) +{ +	return __pgd_to_phys(pgd); +} + +#define p4d_index(addr)		(((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + +static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr) +{ +	return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr); +} + +static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr) +{ +	BUG_ON(!pgtable_l5_enabled()); + +	return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t); +} + +static inline +p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr) +{ +	if (!pgtable_l5_enabled()) +		return pgd_to_folded_p4d(pgdp, addr); +	return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr); +} +#define p4d_offset_lockless p4d_offset_lockless + +static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr) +{ +	return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr); +} + +static inline p4d_t *p4d_set_fixmap(unsigned long addr) +{ +	if (!pgtable_l5_enabled()) +		return NULL; +	return (p4d_t *)set_fixmap_offset(FIX_P4D, addr); +} + +static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr) +{ +	if (!pgtable_l5_enabled()) +		return pgd_to_folded_p4d(pgdp, addr); +	return p4d_set_fixmap(p4d_offset_phys(pgdp, addr)); +} + +static inline void p4d_clear_fixmap(void) +{ +	if (pgtable_l5_enabled()) +		clear_fixmap(FIX_P4D); +} + +/* use ONLY for statically allocated translation tables */ +static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr) +{ +	if (!pgtable_l5_enabled()) +		return pgd_to_folded_p4d(pgdp, addr); +	return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr)); +} + +#define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) + +#else + +static inline bool pgtable_l5_enabled(void) { return false; } + +/* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */ +#define p4d_set_fixmap(addr)		NULL +#define p4d_set_fixmap_offset(p4dp, addr)	((p4d_t *)p4dp) +#define p4d_clear_fixmap() + +#define p4d_offset_kimg(dir,addr)	((p4d_t *)dir) + +#endif  /* CONFIG_PGTABLE_LEVELS > 4 */ +  #define pgd_ERROR(e)	\  	pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) @@ -848,8 +1048,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)  	return pte_pmd(pte_modify(pmd_pte(pmd), newprot));  } -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -extern int ptep_set_access_flags(struct vm_area_struct *vma, +extern int __ptep_set_access_flags(struct vm_area_struct *vma,  				 unsigned long address, pte_t *ptep,  				 pte_t entry, int dirty); @@ -859,7 +1058,8 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,  					unsigned long address, pmd_t *pmdp,  					pmd_t entry, int dirty)  { -	return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty); +	return __ptep_set_access_flags(vma, address, (pte_t *)pmdp, +							pmd_pte(entry), dirty);  }  static inline int pud_devmap(pud_t pud) @@ -893,12 +1093,13 @@ static inline bool pud_user_accessible_page(pud_t pud)  /*   * Atomic pte/pmd modifications.   */ -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int __ptep_test_and_clear_young(pte_t *ptep) +static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma, +					      unsigned long address, +					      pte_t *ptep)  {  	pte_t old_pte, pte; -	pte = READ_ONCE(*ptep); +	pte = __ptep_get(ptep);  	do {  		old_pte = pte;  		pte = pte_mkold(pte); @@ -909,18 +1110,10 @@ static inline int __ptep_test_and_clear_young(pte_t *ptep)  	return pte_young(pte);  } -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, -					    unsigned long address, -					    pte_t *ptep) -{ -	return __ptep_test_and_clear_young(ptep); -} - -#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -static inline int ptep_clear_flush_young(struct vm_area_struct *vma, +static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,  					 unsigned long address, pte_t *ptep)  { -	int young = ptep_test_and_clear_young(vma, address, ptep); +	int young = __ptep_test_and_clear_young(vma, address, ptep);  	if (young) {  		/* @@ -943,12 +1136,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,  					    unsigned long address,  					    pmd_t *pmdp)  { -	return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); +	return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);  }  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, +static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,  				       unsigned long address, pte_t *ptep)  {  	pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); @@ -958,6 +1150,37 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,  	return pte;  } +static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr, +				pte_t *ptep, unsigned int nr, int full) +{ +	for (;;) { +		__ptep_get_and_clear(mm, addr, ptep); +		if (--nr == 0) +			break; +		ptep++; +		addr += PAGE_SIZE; +	} +} + +static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm, +				unsigned long addr, pte_t *ptep, +				unsigned int nr, int full) +{ +	pte_t pte, tmp_pte; + +	pte = __ptep_get_and_clear(mm, addr, ptep); +	while (--nr) { +		ptep++; +		addr += PAGE_SIZE; +		tmp_pte = __ptep_get_and_clear(mm, addr, ptep); +		if (pte_dirty(tmp_pte)) +			pte = pte_mkdirty(pte); +		if (pte_young(tmp_pte)) +			pte = pte_mkyoung(pte); +	} +	return pte; +} +  #ifdef CONFIG_TRANSPARENT_HUGEPAGE  #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR  static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, @@ -971,16 +1194,12 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,  }  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -/* - * ptep_set_wrprotect - mark read-only while trasferring potential hardware - * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. - */ -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) +static inline void ___ptep_set_wrprotect(struct mm_struct *mm, +					unsigned long address, pte_t *ptep, +					pte_t pte)  { -	pte_t old_pte, pte; +	pte_t old_pte; -	pte = READ_ONCE(*ptep);  	do {  		old_pte = pte;  		pte = pte_wrprotect(pte); @@ -989,12 +1208,31 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres  	} while (pte_val(pte) != pte_val(old_pte));  } +/* + * __ptep_set_wrprotect - mark read-only while trasferring potential hardware + * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. + */ +static inline void __ptep_set_wrprotect(struct mm_struct *mm, +					unsigned long address, pte_t *ptep) +{ +	___ptep_set_wrprotect(mm, address, ptep, __ptep_get(ptep)); +} + +static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address, +				pte_t *ptep, unsigned int nr) +{ +	unsigned int i; + +	for (i = 0; i < nr; i++, address += PAGE_SIZE, ptep++) +		__ptep_set_wrprotect(mm, address, ptep); +} +  #ifdef CONFIG_TRANSPARENT_HUGEPAGE  #define __HAVE_ARCH_PMDP_SET_WRPROTECT  static inline void pmdp_set_wrprotect(struct mm_struct *mm,  				      unsigned long address, pmd_t *pmdp)  { -	ptep_set_wrprotect(mm, address, (pte_t *)pmdp); +	__ptep_set_wrprotect(mm, address, (pte_t *)pmdp);  }  #define pmdp_establish pmdp_establish @@ -1072,7 +1310,7 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)  #endif /* CONFIG_ARM64_MTE */  /* - * On AArch64, the cache coherency is handled via the set_pte_at() function. + * On AArch64, the cache coherency is handled via the __set_ptes() function.   */  static inline void update_mmu_cache_range(struct vm_fault *vmf,  		struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, @@ -1124,6 +1362,282 @@ extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,  extern void ptep_modify_prot_commit(struct vm_area_struct *vma,  				    unsigned long addr, pte_t *ptep,  				    pte_t old_pte, pte_t new_pte); + +#ifdef CONFIG_ARM64_CONTPTE + +/* + * The contpte APIs are used to transparently manage the contiguous bit in ptes + * where it is possible and makes sense to do so. The PTE_CONT bit is considered + * a private implementation detail of the public ptep API (see below). + */ +extern void __contpte_try_fold(struct mm_struct *mm, unsigned long addr, +				pte_t *ptep, pte_t pte); +extern void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr, +				pte_t *ptep, pte_t pte); +extern pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte); +extern pte_t contpte_ptep_get_lockless(pte_t *orig_ptep); +extern void contpte_set_ptes(struct mm_struct *mm, unsigned long addr, +				pte_t *ptep, pte_t pte, unsigned int nr); +extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr, +				pte_t *ptep, unsigned int nr, int full); +extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm, +				unsigned long addr, pte_t *ptep, +				unsigned int nr, int full); +extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma, +				unsigned long addr, pte_t *ptep); +extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma, +				unsigned long addr, pte_t *ptep); +extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr, +				pte_t *ptep, unsigned int nr); +extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma, +				unsigned long addr, pte_t *ptep, +				pte_t entry, int dirty); + +static __always_inline void contpte_try_fold(struct mm_struct *mm, +				unsigned long addr, pte_t *ptep, pte_t pte) +{ +	/* +	 * Only bother trying if both the virtual and physical addresses are +	 * aligned and correspond to the last entry in a contig range. The core +	 * code mostly modifies ranges from low to high, so this is the likely +	 * the last modification in the contig range, so a good time to fold. +	 * We can't fold special mappings, because there is no associated folio. +	 */ + +	const unsigned long contmask = CONT_PTES - 1; +	bool valign = ((addr >> PAGE_SHIFT) & contmask) == contmask; + +	if (unlikely(valign)) { +		bool palign = (pte_pfn(pte) & contmask) == contmask; + +		if (unlikely(palign && +		    pte_valid(pte) && !pte_cont(pte) && !pte_special(pte))) +			__contpte_try_fold(mm, addr, ptep, pte); +	} +} + +static __always_inline void contpte_try_unfold(struct mm_struct *mm, +				unsigned long addr, pte_t *ptep, pte_t pte) +{ +	if (unlikely(pte_valid_cont(pte))) +		__contpte_try_unfold(mm, addr, ptep, pte); +} + +#define pte_batch_hint pte_batch_hint +static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte) +{ +	if (!pte_valid_cont(pte)) +		return 1; + +	return CONT_PTES - (((unsigned long)ptep >> 3) & (CONT_PTES - 1)); +} + +/* + * The below functions constitute the public API that arm64 presents to the + * core-mm to manipulate PTE entries within their page tables (or at least this + * is the subset of the API that arm64 needs to implement). These public + * versions will automatically and transparently apply the contiguous bit where + * it makes sense to do so. Therefore any users that are contig-aware (e.g. + * hugetlb, kernel mapper) should NOT use these APIs, but instead use the + * private versions, which are prefixed with double underscore. All of these + * APIs except for ptep_get_lockless() are expected to be called with the PTL + * held. Although the contiguous bit is considered private to the + * implementation, it is deliberately allowed to leak through the getters (e.g. + * ptep_get()), back to core code. This is required so that pte_leaf_size() can + * provide an accurate size for perf_get_pgtable_size(). But this leakage means + * its possible a pte will be passed to a setter with the contiguous bit set, so + * we explicitly clear the contiguous bit in those cases to prevent accidentally + * setting it in the pgtable. + */ + +#define ptep_get ptep_get +static inline pte_t ptep_get(pte_t *ptep) +{ +	pte_t pte = __ptep_get(ptep); + +	if (likely(!pte_valid_cont(pte))) +		return pte; + +	return contpte_ptep_get(ptep, pte); +} + +#define ptep_get_lockless ptep_get_lockless +static inline pte_t ptep_get_lockless(pte_t *ptep) +{ +	pte_t pte = __ptep_get(ptep); + +	if (likely(!pte_valid_cont(pte))) +		return pte; + +	return contpte_ptep_get_lockless(ptep); +} + +static inline void set_pte(pte_t *ptep, pte_t pte) +{ +	/* +	 * We don't have the mm or vaddr so cannot unfold contig entries (since +	 * it requires tlb maintenance). set_pte() is not used in core code, so +	 * this should never even be called. Regardless do our best to service +	 * any call and emit a warning if there is any attempt to set a pte on +	 * top of an existing contig range. +	 */ +	pte_t orig_pte = __ptep_get(ptep); + +	WARN_ON_ONCE(pte_valid_cont(orig_pte)); +	__set_pte(ptep, pte_mknoncont(pte)); +} + +#define set_ptes set_ptes +static __always_inline void set_ptes(struct mm_struct *mm, unsigned long addr, +				pte_t *ptep, pte_t pte, unsigned int nr) +{ +	pte = pte_mknoncont(pte); + +	if (likely(nr == 1)) { +		contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); +		__set_ptes(mm, addr, ptep, pte, 1); +		contpte_try_fold(mm, addr, ptep, pte); +	} else { +		contpte_set_ptes(mm, addr, ptep, pte, nr); +	} +} + +static inline void pte_clear(struct mm_struct *mm, +				unsigned long addr, pte_t *ptep) +{ +	contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); +	__pte_clear(mm, addr, ptep); +} + +#define clear_full_ptes clear_full_ptes +static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, +				pte_t *ptep, unsigned int nr, int full) +{ +	if (likely(nr == 1)) { +		contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); +		__clear_full_ptes(mm, addr, ptep, nr, full); +	} else { +		contpte_clear_full_ptes(mm, addr, ptep, nr, full); +	} +} + +#define get_and_clear_full_ptes get_and_clear_full_ptes +static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, +				unsigned long addr, pte_t *ptep, +				unsigned int nr, int full) +{ +	pte_t pte; + +	if (likely(nr == 1)) { +		contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); +		pte = __get_and_clear_full_ptes(mm, addr, ptep, nr, full); +	} else { +		pte = contpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full); +	} + +	return pte; +} + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, +				unsigned long addr, pte_t *ptep) +{ +	contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); +	return __ptep_get_and_clear(mm, addr, ptep); +} + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, +				unsigned long addr, pte_t *ptep) +{ +	pte_t orig_pte = __ptep_get(ptep); + +	if (likely(!pte_valid_cont(orig_pte))) +		return __ptep_test_and_clear_young(vma, addr, ptep); + +	return contpte_ptep_test_and_clear_young(vma, addr, ptep); +} + +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +static inline int ptep_clear_flush_young(struct vm_area_struct *vma, +				unsigned long addr, pte_t *ptep) +{ +	pte_t orig_pte = __ptep_get(ptep); + +	if (likely(!pte_valid_cont(orig_pte))) +		return __ptep_clear_flush_young(vma, addr, ptep); + +	return contpte_ptep_clear_flush_young(vma, addr, ptep); +} + +#define wrprotect_ptes wrprotect_ptes +static __always_inline void wrprotect_ptes(struct mm_struct *mm, +				unsigned long addr, pte_t *ptep, unsigned int nr) +{ +	if (likely(nr == 1)) { +		/* +		 * Optimization: wrprotect_ptes() can only be called for present +		 * ptes so we only need to check contig bit as condition for +		 * unfold, and we can remove the contig bit from the pte we read +		 * to avoid re-reading. This speeds up fork() which is sensitive +		 * for order-0 folios. Equivalent to contpte_try_unfold(). +		 */ +		pte_t orig_pte = __ptep_get(ptep); + +		if (unlikely(pte_cont(orig_pte))) { +			__contpte_try_unfold(mm, addr, ptep, orig_pte); +			orig_pte = pte_mknoncont(orig_pte); +		} +		___ptep_set_wrprotect(mm, addr, ptep, orig_pte); +	} else { +		contpte_wrprotect_ptes(mm, addr, ptep, nr); +	} +} + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, +				unsigned long addr, pte_t *ptep) +{ +	wrprotect_ptes(mm, addr, ptep, 1); +} + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +static inline int ptep_set_access_flags(struct vm_area_struct *vma, +				unsigned long addr, pte_t *ptep, +				pte_t entry, int dirty) +{ +	pte_t orig_pte = __ptep_get(ptep); + +	entry = pte_mknoncont(entry); + +	if (likely(!pte_valid_cont(orig_pte))) +		return __ptep_set_access_flags(vma, addr, ptep, entry, dirty); + +	return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty); +} + +#else /* CONFIG_ARM64_CONTPTE */ + +#define ptep_get				__ptep_get +#define set_pte					__set_pte +#define set_ptes				__set_ptes +#define pte_clear				__pte_clear +#define clear_full_ptes				__clear_full_ptes +#define get_and_clear_full_ptes			__get_and_clear_full_ptes +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define ptep_get_and_clear			__ptep_get_and_clear +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define ptep_test_and_clear_young		__ptep_test_and_clear_young +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +#define ptep_clear_flush_young			__ptep_clear_flush_young +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +#define ptep_set_wrprotect			__ptep_set_wrprotect +#define wrprotect_ptes				__wrprotect_ptes +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +#define ptep_set_access_flags			__ptep_set_access_flags + +#endif /* CONFIG_ARM64_CONTPTE */ +  #endif /* !__ASSEMBLY__ */  #endif /* __ASM_PGTABLE_H */  |