diff options
Diffstat (limited to 'arch/x86/mm/pgtable.c')
| -rw-r--r-- | arch/x86/mm/pgtable.c | 169 | 
1 files changed, 137 insertions, 32 deletions
| diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 47b5951e592b..3ef095c70ae3 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -182,6 +182,14 @@ static void pgd_dtor(pgd_t *pgd)   */  #define PREALLOCATED_PMDS	UNSHARED_PTRS_PER_PGD +/* + * We allocate separate PMDs for the kernel part of the user page-table + * when PTI is enabled. We need them to map the per-process LDT into the + * user-space page-table. + */ +#define PREALLOCATED_USER_PMDS	 (static_cpu_has(X86_FEATURE_PTI) ? \ +					KERNEL_PGD_PTRS : 0) +  void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)  {  	paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); @@ -202,14 +210,14 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)  /* No need to prepopulate any pagetable entries in non-PAE modes. */  #define PREALLOCATED_PMDS	0 - +#define PREALLOCATED_USER_PMDS	 0  #endif	/* CONFIG_X86_PAE */ -static void free_pmds(struct mm_struct *mm, pmd_t *pmds[]) +static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)  {  	int i; -	for(i = 0; i < PREALLOCATED_PMDS; i++) +	for (i = 0; i < count; i++)  		if (pmds[i]) {  			pgtable_pmd_page_dtor(virt_to_page(pmds[i]));  			free_page((unsigned long)pmds[i]); @@ -217,7 +225,7 @@ static void free_pmds(struct mm_struct *mm, pmd_t *pmds[])  		}  } -static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[]) +static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)  {  	int i;  	bool failed = false; @@ -226,7 +234,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])  	if (mm == &init_mm)  		gfp &= ~__GFP_ACCOUNT; -	for(i = 0; i < PREALLOCATED_PMDS; i++) { +	for (i = 0; i < count; i++) {  		pmd_t *pmd = (pmd_t *)__get_free_page(gfp);  		if (!pmd)  			failed = true; @@ -241,7 +249,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])  	}  	if (failed) { -		free_pmds(mm, pmds); +		free_pmds(mm, pmds, count);  		return -ENOMEM;  	} @@ -254,23 +262,38 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])   * preallocate which never got a corresponding vma will need to be   * freed manually.   */ +static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp) +{ +	pgd_t pgd = *pgdp; + +	if (pgd_val(pgd) != 0) { +		pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); + +		*pgdp = native_make_pgd(0); + +		paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); +		pmd_free(mm, pmd); +		mm_dec_nr_pmds(mm); +	} +} +  static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)  {  	int i; -	for(i = 0; i < PREALLOCATED_PMDS; i++) { -		pgd_t pgd = pgdp[i]; +	for (i = 0; i < PREALLOCATED_PMDS; i++) +		mop_up_one_pmd(mm, &pgdp[i]); -		if (pgd_val(pgd) != 0) { -			pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); +#ifdef CONFIG_PAGE_TABLE_ISOLATION -			pgdp[i] = native_make_pgd(0); +	if (!static_cpu_has(X86_FEATURE_PTI)) +		return; -			paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); -			pmd_free(mm, pmd); -			mm_dec_nr_pmds(mm); -		} -	} +	pgdp = kernel_to_user_pgdp(pgdp); + +	for (i = 0; i < PREALLOCATED_USER_PMDS; i++) +		mop_up_one_pmd(mm, &pgdp[i + KERNEL_PGD_BOUNDARY]); +#endif  }  static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) @@ -296,6 +319,38 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])  	}  } +#ifdef CONFIG_PAGE_TABLE_ISOLATION +static void pgd_prepopulate_user_pmd(struct mm_struct *mm, +				     pgd_t *k_pgd, pmd_t *pmds[]) +{ +	pgd_t *s_pgd = kernel_to_user_pgdp(swapper_pg_dir); +	pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); +	p4d_t *u_p4d; +	pud_t *u_pud; +	int i; + +	u_p4d = p4d_offset(u_pgd, 0); +	u_pud = pud_offset(u_p4d, 0); + +	s_pgd += KERNEL_PGD_BOUNDARY; +	u_pud += KERNEL_PGD_BOUNDARY; + +	for (i = 0; i < PREALLOCATED_USER_PMDS; i++, u_pud++, s_pgd++) { +		pmd_t *pmd = pmds[i]; + +		memcpy(pmd, (pmd_t *)pgd_page_vaddr(*s_pgd), +		       sizeof(pmd_t) * PTRS_PER_PMD); + +		pud_populate(mm, u_pud, pmd); +	} + +} +#else +static void pgd_prepopulate_user_pmd(struct mm_struct *mm, +				     pgd_t *k_pgd, pmd_t *pmds[]) +{ +} +#endif  /*   * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also   * assumes that pgd should be in one page. @@ -329,9 +384,6 @@ static int __init pgd_cache_init(void)  	 */  	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,  				      SLAB_PANIC, NULL); -	if (!pgd_cache) -		return -ENOMEM; -  	return 0;  }  core_initcall(pgd_cache_init); @@ -343,7 +395,8 @@ static inline pgd_t *_pgd_alloc(void)  	 * We allocate one page for pgd.  	 */  	if (!SHARED_KERNEL_PMD) -		return (pgd_t *)__get_free_page(PGALLOC_GFP); +		return (pgd_t *)__get_free_pages(PGALLOC_GFP, +						 PGD_ALLOCATION_ORDER);  	/*  	 * Now PAE kernel is not running as a Xen domain. We can allocate @@ -355,7 +408,7 @@ static inline pgd_t *_pgd_alloc(void)  static inline void _pgd_free(pgd_t *pgd)  {  	if (!SHARED_KERNEL_PMD) -		free_page((unsigned long)pgd); +		free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);  	else  		kmem_cache_free(pgd_cache, pgd);  } @@ -375,6 +428,7 @@ static inline void _pgd_free(pgd_t *pgd)  pgd_t *pgd_alloc(struct mm_struct *mm)  {  	pgd_t *pgd; +	pmd_t *u_pmds[PREALLOCATED_USER_PMDS];  	pmd_t *pmds[PREALLOCATED_PMDS];  	pgd = _pgd_alloc(); @@ -384,12 +438,15 @@ pgd_t *pgd_alloc(struct mm_struct *mm)  	mm->pgd = pgd; -	if (preallocate_pmds(mm, pmds) != 0) +	if (preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0)  		goto out_free_pgd; -	if (paravirt_pgd_alloc(mm) != 0) +	if (preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0)  		goto out_free_pmds; +	if (paravirt_pgd_alloc(mm) != 0) +		goto out_free_user_pmds; +  	/*  	 * Make sure that pre-populating the pmds is atomic with  	 * respect to anything walking the pgd_list, so that they @@ -399,13 +456,16 @@ pgd_t *pgd_alloc(struct mm_struct *mm)  	pgd_ctor(mm, pgd);  	pgd_prepopulate_pmd(mm, pgd, pmds); +	pgd_prepopulate_user_pmd(mm, pgd, u_pmds);  	spin_unlock(&pgd_lock);  	return pgd; +out_free_user_pmds: +	free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS);  out_free_pmds: -	free_pmds(mm, pmds); +	free_pmds(mm, pmds, PREALLOCATED_PMDS);  out_free_pgd:  	_pgd_free(pgd);  out: @@ -719,28 +779,50 @@ int pmd_clear_huge(pmd_t *pmd)  	return 0;  } +#ifdef CONFIG_X86_64  /**   * pud_free_pmd_page - Clear pud entry and free pmd page.   * @pud: Pointer to a PUD. + * @addr: Virtual address associated with pud.   * - * Context: The pud range has been unmaped and TLB purged. + * Context: The pud range has been unmapped and TLB purged.   * Return: 1 if clearing the entry succeeded. 0 otherwise. + * + * NOTE: Callers must allow a single page allocation.   */ -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr)  { -	pmd_t *pmd; +	pmd_t *pmd, *pmd_sv; +	pte_t *pte;  	int i;  	if (pud_none(*pud))  		return 1;  	pmd = (pmd_t *)pud_page_vaddr(*pud); +	pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL); +	if (!pmd_sv) +		return 0; -	for (i = 0; i < PTRS_PER_PMD; i++) -		if (!pmd_free_pte_page(&pmd[i])) -			return 0; +	for (i = 0; i < PTRS_PER_PMD; i++) { +		pmd_sv[i] = pmd[i]; +		if (!pmd_none(pmd[i])) +			pmd_clear(&pmd[i]); +	}  	pud_clear(pud); + +	/* INVLPG to clear all paging-structure caches */ +	flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); + +	for (i = 0; i < PTRS_PER_PMD; i++) { +		if (!pmd_none(pmd_sv[i])) { +			pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]); +			free_page((unsigned long)pte); +		} +	} + +	free_page((unsigned long)pmd_sv);  	free_page((unsigned long)pmd);  	return 1; @@ -749,11 +831,12 @@ int pud_free_pmd_page(pud_t *pud)  /**   * pmd_free_pte_page - Clear pmd entry and free pte page.   * @pmd: Pointer to a PMD. + * @addr: Virtual address associated with pmd.   * - * Context: The pmd range has been unmaped and TLB purged. + * Context: The pmd range has been unmapped and TLB purged.   * Return: 1 if clearing the entry succeeded. 0 otherwise.   */ -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)  {  	pte_t *pte; @@ -762,8 +845,30 @@ int pmd_free_pte_page(pmd_t *pmd)  	pte = (pte_t *)pmd_page_vaddr(*pmd);  	pmd_clear(pmd); + +	/* INVLPG to clear all paging-structure caches */ +	flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); +  	free_page((unsigned long)pte);  	return 1;  } + +#else /* !CONFIG_X86_64 */ + +int pud_free_pmd_page(pud_t *pud, unsigned long addr) +{ +	return pud_none(*pud); +} + +/* + * Disable free page handling on x86-PAE. This assures that ioremap() + * does not update sync'd pmd entries. See vmalloc_sync_one(). + */ +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) +{ +	return pmd_none(*pmd); +} + +#endif /* CONFIG_X86_64 */  #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */ |