diff options
Diffstat (limited to 'arch/s390/mm/pgtable.c')
| -rw-r--r-- | arch/s390/mm/pgtable.c | 421 | 
1 files changed, 398 insertions, 23 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 37a23c223705..2adb23938a7f 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -16,6 +16,7 @@  #include <linux/module.h>  #include <linux/quicklist.h>  #include <linux/rcupdate.h> +#include <linux/slab.h>  #include <asm/system.h>  #include <asm/pgtable.h> @@ -133,30 +134,374 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)  }  #endif -static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +#ifdef CONFIG_PGSTE + +/** + * gmap_alloc - allocate a guest address space + * @mm: pointer to the parent mm_struct + * + * Returns a guest address space structure. + */ +struct gmap *gmap_alloc(struct mm_struct *mm)  { -	unsigned int old, new; +	struct gmap *gmap; +	struct page *page; +	unsigned long *table; -	do { -		old = atomic_read(v); -		new = old ^ bits; -	} while (atomic_cmpxchg(v, old, new) != old); -	return new; +	gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); +	if (!gmap) +		goto out; +	INIT_LIST_HEAD(&gmap->crst_list); +	gmap->mm = mm; +	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); +	if (!page) +		goto out_free; +	list_add(&page->lru, &gmap->crst_list); +	table = (unsigned long *) page_to_phys(page); +	crst_table_init(table, _REGION1_ENTRY_EMPTY); +	gmap->table = table; +	list_add(&gmap->list, &mm->context.gmap_list); +	return gmap; + +out_free: +	kfree(gmap); +out: +	return NULL;  } +EXPORT_SYMBOL_GPL(gmap_alloc); -/* - * page table entry allocation/free routines. +static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) +{ +	struct gmap_pgtable *mp; +	struct gmap_rmap *rmap; +	struct page *page; + +	if (*table & _SEGMENT_ENTRY_INV) +		return 0; +	page = pfn_to_page(*table >> PAGE_SHIFT); +	mp = (struct gmap_pgtable *) page->index; +	list_for_each_entry(rmap, &mp->mapper, list) { +		if (rmap->entry != table) +			continue; +		list_del(&rmap->list); +		kfree(rmap); +		break; +	} +	*table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; +	return 1; +} + +static void gmap_flush_tlb(struct gmap *gmap) +{ +	if (MACHINE_HAS_IDTE) +		__tlb_flush_idte((unsigned long) gmap->table | +				 _ASCE_TYPE_REGION1); +	else +		__tlb_flush_global(); +} + +/** + * gmap_free - free a guest address space + * @gmap: pointer to the guest address space structure   */ -#ifdef CONFIG_PGSTE -static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) +void gmap_free(struct gmap *gmap) +{ +	struct page *page, *next; +	unsigned long *table; +	int i; + + +	/* Flush tlb. */ +	if (MACHINE_HAS_IDTE) +		__tlb_flush_idte((unsigned long) gmap->table | +				 _ASCE_TYPE_REGION1); +	else +		__tlb_flush_global(); + +	/* Free all segment & region tables. */ +	down_read(&gmap->mm->mmap_sem); +	list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { +		table = (unsigned long *) page_to_phys(page); +		if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) +			/* Remove gmap rmap structures for segment table. */ +			for (i = 0; i < PTRS_PER_PMD; i++, table++) +				gmap_unlink_segment(gmap, table); +		__free_pages(page, ALLOC_ORDER); +	} +	up_read(&gmap->mm->mmap_sem); +	list_del(&gmap->list); +	kfree(gmap); +} +EXPORT_SYMBOL_GPL(gmap_free); + +/** + * gmap_enable - switch primary space to the guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_enable(struct gmap *gmap) +{ +	/* Load primary space page table origin. */ +	S390_lowcore.user_asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | +				 _ASCE_USER_BITS | __pa(gmap->table); +	asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); +	S390_lowcore.gmap = (unsigned long) gmap; +} +EXPORT_SYMBOL_GPL(gmap_enable); + +/** + * gmap_disable - switch back to the standard primary address space + * @gmap: pointer to the guest address space structure + */ +void gmap_disable(struct gmap *gmap) +{ +	/* Load primary space page table origin. */ +	S390_lowcore.user_asce = +		gmap->mm->context.asce_bits | __pa(gmap->mm->pgd); +	asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); +	S390_lowcore.gmap = 0UL; +} +EXPORT_SYMBOL_GPL(gmap_disable); + +static int gmap_alloc_table(struct gmap *gmap, +			       unsigned long *table, unsigned long init) +{ +	struct page *page; +	unsigned long *new; + +	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); +	if (!page) +		return -ENOMEM; +	new = (unsigned long *) page_to_phys(page); +	crst_table_init(new, init); +	down_read(&gmap->mm->mmap_sem); +	if (*table & _REGION_ENTRY_INV) { +		list_add(&page->lru, &gmap->crst_list); +		*table = (unsigned long) new | _REGION_ENTRY_LENGTH | +			(*table & _REGION_ENTRY_TYPE_MASK); +	} else +		__free_pages(page, ALLOC_ORDER); +	up_read(&gmap->mm->mmap_sem); +	return 0; +} + +/** + * gmap_unmap_segment - unmap segment from the guest address space + * @gmap: pointer to the guest address space structure + * @addr: address in the guest address space + * @len: length of the memory area to unmap + * + * Returns 0 if the unmap succeded, -EINVAL if not. + */ +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) +{ +	unsigned long *table; +	unsigned long off; +	int flush; + +	if ((to | len) & (PMD_SIZE - 1)) +		return -EINVAL; +	if (len == 0 || to + len < to) +		return -EINVAL; + +	flush = 0; +	down_read(&gmap->mm->mmap_sem); +	for (off = 0; off < len; off += PMD_SIZE) { +		/* Walk the guest addr space page table */ +		table = gmap->table + (((to + off) >> 53) & 0x7ff); +		if (*table & _REGION_ENTRY_INV) +			return 0; +		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); +		table = table + (((to + off) >> 42) & 0x7ff); +		if (*table & _REGION_ENTRY_INV) +			return 0; +		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); +		table = table + (((to + off) >> 31) & 0x7ff); +		if (*table & _REGION_ENTRY_INV) +			return 0; +		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); +		table = table + (((to + off) >> 20) & 0x7ff); + +		/* Clear segment table entry in guest address space. */ +		flush |= gmap_unlink_segment(gmap, table); +		*table = _SEGMENT_ENTRY_INV; +	} +	up_read(&gmap->mm->mmap_sem); +	if (flush) +		gmap_flush_tlb(gmap); +	return 0; +} +EXPORT_SYMBOL_GPL(gmap_unmap_segment); + +/** + * gmap_mmap_segment - map a segment to the guest address space + * @gmap: pointer to the guest address space structure + * @from: source address in the parent address space + * @to: target address in the guest address space + * + * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not. + */ +int gmap_map_segment(struct gmap *gmap, unsigned long from, +		     unsigned long to, unsigned long len) +{ +	unsigned long *table; +	unsigned long off; +	int flush; + +	if ((from | to | len) & (PMD_SIZE - 1)) +		return -EINVAL; +	if (len == 0 || from + len > PGDIR_SIZE || +	    from + len < from || to + len < to) +		return -EINVAL; + +	flush = 0; +	down_read(&gmap->mm->mmap_sem); +	for (off = 0; off < len; off += PMD_SIZE) { +		/* Walk the gmap address space page table */ +		table = gmap->table + (((to + off) >> 53) & 0x7ff); +		if ((*table & _REGION_ENTRY_INV) && +		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) +			goto out_unmap; +		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); +		table = table + (((to + off) >> 42) & 0x7ff); +		if ((*table & _REGION_ENTRY_INV) && +		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) +			goto out_unmap; +		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); +		table = table + (((to + off) >> 31) & 0x7ff); +		if ((*table & _REGION_ENTRY_INV) && +		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) +			goto out_unmap; +		table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); +		table = table + (((to + off) >> 20) & 0x7ff); + +		/* Store 'from' address in an invalid segment table entry. */ +		flush |= gmap_unlink_segment(gmap, table); +		*table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); +	} +	up_read(&gmap->mm->mmap_sem); +	if (flush) +		gmap_flush_tlb(gmap); +	return 0; + +out_unmap: +	up_read(&gmap->mm->mmap_sem); +	gmap_unmap_segment(gmap, to, len); +	return -ENOMEM; +} +EXPORT_SYMBOL_GPL(gmap_map_segment); + +unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +{ +	unsigned long *table, vmaddr, segment; +	struct mm_struct *mm; +	struct gmap_pgtable *mp; +	struct gmap_rmap *rmap; +	struct vm_area_struct *vma; +	struct page *page; +	pgd_t *pgd; +	pud_t *pud; +	pmd_t *pmd; + +	current->thread.gmap_addr = address; +	mm = gmap->mm; +	/* Walk the gmap address space page table */ +	table = gmap->table + ((address >> 53) & 0x7ff); +	if (unlikely(*table & _REGION_ENTRY_INV)) +		return -EFAULT; +	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); +	table = table + ((address >> 42) & 0x7ff); +	if (unlikely(*table & _REGION_ENTRY_INV)) +		return -EFAULT; +	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); +	table = table + ((address >> 31) & 0x7ff); +	if (unlikely(*table & _REGION_ENTRY_INV)) +		return -EFAULT; +	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); +	table = table + ((address >> 20) & 0x7ff); + +	/* Convert the gmap address to an mm address. */ +	segment = *table; +	if (likely(!(segment & _SEGMENT_ENTRY_INV))) { +		page = pfn_to_page(segment >> PAGE_SHIFT); +		mp = (struct gmap_pgtable *) page->index; +		return mp->vmaddr | (address & ~PMD_MASK); +	} else if (segment & _SEGMENT_ENTRY_RO) { +		vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; +		vma = find_vma(mm, vmaddr); +		if (!vma || vma->vm_start > vmaddr) +			return -EFAULT; + +		/* Walk the parent mm page table */ +		pgd = pgd_offset(mm, vmaddr); +		pud = pud_alloc(mm, pgd, vmaddr); +		if (!pud) +			return -ENOMEM; +		pmd = pmd_alloc(mm, pud, vmaddr); +		if (!pmd) +			return -ENOMEM; +		if (!pmd_present(*pmd) && +		    __pte_alloc(mm, vma, pmd, vmaddr)) +			return -ENOMEM; +		/* pmd now points to a valid segment table entry. */ +		rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); +		if (!rmap) +			return -ENOMEM; +		/* Link gmap segment table entry location to page table. */ +		page = pmd_page(*pmd); +		mp = (struct gmap_pgtable *) page->index; +		rmap->entry = table; +		list_add(&rmap->list, &mp->mapper); +		/* Set gmap segment table entry to page table. */ +		*table = pmd_val(*pmd) & PAGE_MASK; +		return vmaddr | (address & ~PMD_MASK); +	} +	return -EFAULT; + +} +EXPORT_SYMBOL_GPL(gmap_fault); + +void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) +{ +	struct gmap_rmap *rmap, *next; +	struct gmap_pgtable *mp; +	struct page *page; +	int flush; + +	flush = 0; +	spin_lock(&mm->page_table_lock); +	page = pfn_to_page(__pa(table) >> PAGE_SHIFT); +	mp = (struct gmap_pgtable *) page->index; +	list_for_each_entry_safe(rmap, next, &mp->mapper, list) { +		*rmap->entry = +			_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; +		list_del(&rmap->list); +		kfree(rmap); +		flush = 1; +	} +	spin_unlock(&mm->page_table_lock); +	if (flush) +		__tlb_flush_global(); +} + +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, +						    unsigned long vmaddr)  {  	struct page *page;  	unsigned long *table; +	struct gmap_pgtable *mp;  	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);  	if (!page)  		return NULL; +	mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); +	if (!mp) { +		__free_page(page); +		return NULL; +	}  	pgtable_page_ctor(page); +	mp->vmaddr = vmaddr & PMD_MASK; +	INIT_LIST_HEAD(&mp->mapper); +	page->index = (unsigned long) mp;  	atomic_set(&page->_mapcount, 3);  	table = (unsigned long *) page_to_phys(page);  	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); @@ -167,24 +512,57 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)  static inline void page_table_free_pgste(unsigned long *table)  {  	struct page *page; +	struct gmap_pgtable *mp;  	page = pfn_to_page(__pa(table) >> PAGE_SHIFT); +	mp = (struct gmap_pgtable *) page->index; +	BUG_ON(!list_empty(&mp->mapper));  	pgtable_page_ctor(page);  	atomic_set(&page->_mapcount, -1); +	kfree(mp);  	__free_page(page);  } -#endif -unsigned long *page_table_alloc(struct mm_struct *mm) +#else /* CONFIG_PGSTE */ + +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, +						    unsigned long vmaddr) +{ +} + +static inline void page_table_free_pgste(unsigned long *table) +{ +} + +static inline void gmap_unmap_notifier(struct mm_struct *mm, +					  unsigned long *table) +{ +} + +#endif /* CONFIG_PGSTE */ + +static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +{ +	unsigned int old, new; + +	do { +		old = atomic_read(v); +		new = old ^ bits; +	} while (atomic_cmpxchg(v, old, new) != old); +	return new; +} + +/* + * page table entry allocation/free routines. + */ +unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)  {  	struct page *page;  	unsigned long *table;  	unsigned int mask, bit; -#ifdef CONFIG_PGSTE  	if (mm_has_pgste(mm)) -		return page_table_alloc_pgste(mm); -#endif +		return page_table_alloc_pgste(mm, vmaddr);  	/* Allocate fragments of a 4K page as 1K/2K page table */  	spin_lock_bh(&mm->context.list_lock);  	mask = FRAG_MASK; @@ -222,10 +600,10 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)  	struct page *page;  	unsigned int bit, mask; -#ifdef CONFIG_PGSTE -	if (mm_has_pgste(mm)) +	if (mm_has_pgste(mm)) { +		gmap_unmap_notifier(mm, table);  		return page_table_free_pgste(table); -#endif +	}  	/* Free 1K/2K page table fragment of a 4K page */  	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);  	bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); @@ -249,10 +627,8 @@ static void __page_table_free_rcu(void *table, unsigned bit)  {  	struct page *page; -#ifdef CONFIG_PGSTE  	if (bit == FRAG_MASK)  		return page_table_free_pgste(table); -#endif  	/* Free 1K/2K page table fragment of a 4K page */  	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);  	if (atomic_xor_bits(&page->_mapcount, bit) == 0) { @@ -269,13 +645,12 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)  	unsigned int bit, mask;  	mm = tlb->mm; -#ifdef CONFIG_PGSTE  	if (mm_has_pgste(mm)) { +		gmap_unmap_notifier(mm, table);  		table = (unsigned long *) (__pa(table) | FRAG_MASK);  		tlb_remove_table(tlb, table);  		return;  	} -#endif  	bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));  	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);  	spin_lock_bh(&mm->context.list_lock);  |