diff options
| author | Rodrigo Vivi <[email protected]> | 2018-07-23 09:13:12 -0700 | 
|---|---|---|
| committer | Rodrigo Vivi <[email protected]> | 2018-07-23 09:13:12 -0700 | 
| commit | c74a7469f97c0f40b46e82ee979f9fb1bb6e847c (patch) | |
| tree | f2690a1a916b73ef94657fbf0e0141ae57701825 /arch/powerpc/mm/tlb-radix.c | |
| parent | 6f15a7de86c8cf2dc09fc9e6d07047efa40ef809 (diff) | |
| parent | 500775074f88d9cf5416bed2ca19592812d62c41 (diff) | |
Merge drm/drm-next into drm-intel-next-queued
We need a backmerge to get DP_DPCD_REV_14 before we push other
i915 changes to dinq that could break compilation.
Signed-off-by: Rodrigo Vivi <[email protected]>
Diffstat (limited to 'arch/powerpc/mm/tlb-radix.c')
| -rw-r--r-- | arch/powerpc/mm/tlb-radix.c | 464 | 
1 files changed, 415 insertions, 49 deletions
| diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index a5d7309c2d05..1135b43a597c 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -12,6 +12,8 @@  #include <linux/mm.h>  #include <linux/hugetlb.h>  #include <linux/memblock.h> +#include <linux/mmu_context.h> +#include <linux/sched/mm.h>  #include <asm/ppc-opcode.h>  #include <asm/tlb.h> @@ -118,6 +120,53 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric)  	trace_tlbie(0, 0, rb, rs, ric, prs, r);  } +static inline void __tlbiel_lpid(unsigned long lpid, int set, +				unsigned long ric) +{ +	unsigned long rb,rs,prs,r; + +	rb = PPC_BIT(52); /* IS = 2 */ +	rb |= set << PPC_BITLSHIFT(51); +	rs = 0;  /* LPID comes from LPIDR */ +	prs = 0; /* partition scoped */ +	r = 1;   /* radix format */ + +	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) +		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); +	trace_tlbie(lpid, 1, rb, rs, ric, prs, r); +} + +static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) +{ +	unsigned long rb,rs,prs,r; + +	rb = PPC_BIT(52); /* IS = 2 */ +	rs = lpid; +	prs = 0; /* partition scoped */ +	r = 1;   /* radix format */ + +	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) +		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); +	trace_tlbie(lpid, 0, rb, rs, ric, prs, r); +} + +static inline void __tlbiel_lpid_guest(unsigned long lpid, int set, +				unsigned long ric) +{ +	unsigned long rb,rs,prs,r; + +	rb = PPC_BIT(52); /* IS = 2 */ +	rb |= set << PPC_BITLSHIFT(51); +	rs = 0;  /* LPID comes from LPIDR */ +	prs = 1; /* process scoped */ +	r = 1;   /* radix format */ + +	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) +		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); +	trace_tlbie(lpid, 1, rb, rs, ric, prs, r); +} + +  static inline void __tlbiel_va(unsigned long va, unsigned long pid,  			       unsigned long ap, unsigned long ric)  { @@ -150,6 +199,22 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,  	trace_tlbie(0, 0, rb, rs, ric, prs, r);  } +static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, +			      unsigned long ap, unsigned long ric) +{ +	unsigned long rb,rs,prs,r; + +	rb = va & ~(PPC_BITMASK(52, 63)); +	rb |= ap << PPC_BITLSHIFT(58); +	rs = lpid; +	prs = 0; /* partition scoped */ +	r = 1;   /* radix format */ + +	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) +		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); +	trace_tlbie(lpid, 0, rb, rs, ric, prs, r); +} +  static inline void fixup_tlbie(void)  {  	unsigned long pid = 0; @@ -161,6 +226,16 @@ static inline void fixup_tlbie(void)  	}  } +static inline void fixup_tlbie_lpid(unsigned long lpid) +{ +	unsigned long va = ((1UL << 52) - 1); + +	if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { +		asm volatile("ptesync": : :"memory"); +		__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); +	} +} +  /*   * We use 128 set in radix mode and 256 set in hpt mode.   */ @@ -214,6 +289,86 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)  	asm volatile("eieio; tlbsync; ptesync": : :"memory");  } +static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric) +{ +	int set; + +	VM_BUG_ON(mfspr(SPRN_LPID) != lpid); + +	asm volatile("ptesync": : :"memory"); + +	/* +	 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, +	 * also flush the entire Page Walk Cache. +	 */ +	__tlbiel_lpid(lpid, 0, ric); + +	/* For PWC, only one flush is needed */ +	if (ric == RIC_FLUSH_PWC) { +		asm volatile("ptesync": : :"memory"); +		return; +	} + +	/* For the remaining sets, just flush the TLB */ +	for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) +		__tlbiel_lpid(lpid, set, RIC_FLUSH_TLB); + +	asm volatile("ptesync": : :"memory"); +	asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); +} + +static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) +{ +	asm volatile("ptesync": : :"memory"); + +	/* +	 * Workaround the fact that the "ric" argument to __tlbie_pid +	 * must be a compile-time contraint to match the "i" constraint +	 * in the asm statement. +	 */ +	switch (ric) { +	case RIC_FLUSH_TLB: +		__tlbie_lpid(lpid, RIC_FLUSH_TLB); +		break; +	case RIC_FLUSH_PWC: +		__tlbie_lpid(lpid, RIC_FLUSH_PWC); +		break; +	case RIC_FLUSH_ALL: +	default: +		__tlbie_lpid(lpid, RIC_FLUSH_ALL); +	} +	fixup_tlbie_lpid(lpid); +	asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} + +static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric) +{ +	int set; + +	VM_BUG_ON(mfspr(SPRN_LPID) != lpid); + +	asm volatile("ptesync": : :"memory"); + +	/* +	 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, +	 * also flush the entire Page Walk Cache. +	 */ +	__tlbiel_lpid_guest(lpid, 0, ric); + +	/* For PWC, only one flush is needed */ +	if (ric == RIC_FLUSH_PWC) { +		asm volatile("ptesync": : :"memory"); +		return; +	} + +	/* For the remaining sets, just flush the TLB */ +	for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) +		__tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB); + +	asm volatile("ptesync": : :"memory"); +} + +  static inline void __tlbiel_va_range(unsigned long start, unsigned long end,  				    unsigned long pid, unsigned long page_size,  				    unsigned long psize) @@ -268,6 +423,17 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,  	asm volatile("eieio; tlbsync; ptesync": : :"memory");  } +static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, +			      unsigned long psize, unsigned long ric) +{ +	unsigned long ap = mmu_get_ap(psize); + +	asm volatile("ptesync": : :"memory"); +	__tlbie_lpid_va(va, lpid, ap, ric); +	fixup_tlbie_lpid(lpid); +	asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} +  static inline void _tlbie_va_range(unsigned long start, unsigned long end,  				    unsigned long pid, unsigned long page_size,  				    unsigned long psize, bool also_pwc) @@ -340,6 +506,15 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd  }  EXPORT_SYMBOL(radix__local_flush_tlb_page); +static bool mm_is_singlethreaded(struct mm_struct *mm) +{ +	if (atomic_read(&mm->context.copros) > 0) +		return false; +	if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) +		return true; +	return false; +} +  static bool mm_needs_flush_escalation(struct mm_struct *mm)  {  	/* @@ -347,10 +522,47 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm)  	 * caching PTEs and not flushing them properly when  	 * RIC = 0 for a PID/LPID invalidate  	 */ -	return atomic_read(&mm->context.copros) != 0; +	if (atomic_read(&mm->context.copros) > 0) +		return true; +	return false;  }  #ifdef CONFIG_SMP +static void do_exit_flush_lazy_tlb(void *arg) +{ +	struct mm_struct *mm = arg; +	unsigned long pid = mm->context.id; + +	if (current->mm == mm) +		return; /* Local CPU */ + +	if (current->active_mm == mm) { +		/* +		 * Must be a kernel thread because sender is single-threaded. +		 */ +		BUG_ON(current->mm); +		mmgrab(&init_mm); +		switch_mm(mm, &init_mm, current); +		current->active_mm = &init_mm; +		mmdrop(mm); +	} +	_tlbiel_pid(pid, RIC_FLUSH_ALL); +} + +static void exit_flush_lazy_tlbs(struct mm_struct *mm) +{ +	/* +	 * Would be nice if this was async so it could be run in +	 * parallel with our local flush, but generic code does not +	 * give a good API for it. Could extend the generic code or +	 * make a special powerpc IPI for flushing TLBs. +	 * For now it's not too performance critical. +	 */ +	smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, +				(void *)mm, 1); +	mm_reset_thread_local(mm); +} +  void radix__flush_tlb_mm(struct mm_struct *mm)  {  	unsigned long pid; @@ -360,18 +572,30 @@ void radix__flush_tlb_mm(struct mm_struct *mm)  		return;  	preempt_disable(); +	/* +	 * Order loads of mm_cpumask vs previous stores to clear ptes before +	 * the invalidate. See barrier in switch_mm_irqs_off +	 */ +	smp_mb();  	if (!mm_is_thread_local(mm)) { +		if (unlikely(mm_is_singlethreaded(mm))) { +			exit_flush_lazy_tlbs(mm); +			goto local; +		} +  		if (mm_needs_flush_escalation(mm))  			_tlbie_pid(pid, RIC_FLUSH_ALL);  		else  			_tlbie_pid(pid, RIC_FLUSH_TLB); -	} else +	} else { +local:  		_tlbiel_pid(pid, RIC_FLUSH_TLB); +	}  	preempt_enable();  }  EXPORT_SYMBOL(radix__flush_tlb_mm); -void radix__flush_all_mm(struct mm_struct *mm) +static void __flush_all_mm(struct mm_struct *mm, bool fullmm)  {  	unsigned long pid; @@ -380,12 +604,25 @@ void radix__flush_all_mm(struct mm_struct *mm)  		return;  	preempt_disable(); -	if (!mm_is_thread_local(mm)) +	smp_mb(); /* see radix__flush_tlb_mm */ +	if (!mm_is_thread_local(mm)) { +		if (unlikely(mm_is_singlethreaded(mm))) { +			if (!fullmm) { +				exit_flush_lazy_tlbs(mm); +				goto local; +			} +		}  		_tlbie_pid(pid, RIC_FLUSH_ALL); -	else +	} else { +local:  		_tlbiel_pid(pid, RIC_FLUSH_ALL); +	}  	preempt_enable();  } +void radix__flush_all_mm(struct mm_struct *mm) +{ +	__flush_all_mm(mm, false); +}  EXPORT_SYMBOL(radix__flush_all_mm);  void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) @@ -404,10 +641,17 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,  		return;  	preempt_disable(); -	if (!mm_is_thread_local(mm)) +	smp_mb(); /* see radix__flush_tlb_mm */ +	if (!mm_is_thread_local(mm)) { +		if (unlikely(mm_is_singlethreaded(mm))) { +			exit_flush_lazy_tlbs(mm); +			goto local; +		}  		_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); -	else +	} else { +local:  		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); +	}  	preempt_enable();  } @@ -445,35 +689,38 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);  static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;  static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; -void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, -		     unsigned long end) +static inline void __radix__flush_tlb_range(struct mm_struct *mm, +					unsigned long start, unsigned long end, +					bool flush_all_sizes)  { -	struct mm_struct *mm = vma->vm_mm;  	unsigned long pid;  	unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;  	unsigned long page_size = 1UL << page_shift;  	unsigned long nr_pages = (end - start) >> page_shift;  	bool local, full; -#ifdef CONFIG_HUGETLB_PAGE -	if (is_vm_hugetlb_page(vma)) -		return radix__flush_hugetlb_tlb_range(vma, start, end); -#endif -  	pid = mm->context.id;  	if (unlikely(pid == MMU_NO_CONTEXT))  		return;  	preempt_disable(); -	if (mm_is_thread_local(mm)) { -		local = true; -		full = (end == TLB_FLUSH_ALL || -				nr_pages > tlb_local_single_page_flush_ceiling); -	} else { +	smp_mb(); /* see radix__flush_tlb_mm */ +	if (!mm_is_thread_local(mm)) { +		if (unlikely(mm_is_singlethreaded(mm))) { +			if (end != TLB_FLUSH_ALL) { +				exit_flush_lazy_tlbs(mm); +				goto is_local; +			} +		}  		local = false;  		full = (end == TLB_FLUSH_ALL ||  				nr_pages > tlb_single_page_flush_ceiling); +	} else { +is_local: +		local = true; +		full = (end == TLB_FLUSH_ALL || +				nr_pages > tlb_local_single_page_flush_ceiling);  	}  	if (full) { @@ -486,37 +733,64 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,  				_tlbie_pid(pid, RIC_FLUSH_TLB);  		}  	} else { -		bool hflush = false; +		bool hflush = flush_all_sizes; +		bool gflush = flush_all_sizes;  		unsigned long hstart, hend; +		unsigned long gstart, gend; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -		hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT; -		hend = end >> HPAGE_PMD_SHIFT; -		if (hstart < hend) { -			hstart <<= HPAGE_PMD_SHIFT; -			hend <<= HPAGE_PMD_SHIFT; +		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))  			hflush = true; + +		if (hflush) { +			hstart = (start + PMD_SIZE - 1) & PMD_MASK; +			hend = end & PMD_MASK; +			if (hstart == hend) +				hflush = false; +		} + +		if (gflush) { +			gstart = (start + PUD_SIZE - 1) & PUD_MASK; +			gend = end & PUD_MASK; +			if (gstart == gend) +				gflush = false;  		} -#endif  		asm volatile("ptesync": : :"memory");  		if (local) {  			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);  			if (hflush)  				__tlbiel_va_range(hstart, hend, pid, -						HPAGE_PMD_SIZE, MMU_PAGE_2M); +						PMD_SIZE, MMU_PAGE_2M); +			if (gflush) +				__tlbiel_va_range(gstart, gend, pid, +						PUD_SIZE, MMU_PAGE_1G);  			asm volatile("ptesync": : :"memory");  		} else {  			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);  			if (hflush)  				__tlbie_va_range(hstart, hend, pid, -						HPAGE_PMD_SIZE, MMU_PAGE_2M); +						PMD_SIZE, MMU_PAGE_2M); +			if (gflush) +				__tlbie_va_range(gstart, gend, pid, +						PUD_SIZE, MMU_PAGE_1G);  			fixup_tlbie();  			asm volatile("eieio; tlbsync; ptesync": : :"memory");  		}  	}  	preempt_enable();  } + +void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, +		     unsigned long end) + +{ +#ifdef CONFIG_HUGETLB_PAGE +	if (is_vm_hugetlb_page(vma)) +		return radix__flush_hugetlb_tlb_range(vma, start, end); +#endif + +	__radix__flush_tlb_range(vma->vm_mm, start, end, false); +}  EXPORT_SYMBOL(radix__flush_tlb_range);  static int radix_get_mmu_psize(int page_size) @@ -534,6 +808,49 @@ static int radix_get_mmu_psize(int page_size)  	return psize;  } +/* + * Flush partition scoped LPID address translation for all CPUs. + */ +void radix__flush_tlb_lpid_page(unsigned int lpid, +					unsigned long addr, +					unsigned long page_size) +{ +	int psize = radix_get_mmu_psize(page_size); + +	_tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); +} +EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); + +/* + * Flush partition scoped PWC from LPID for all CPUs. + */ +void radix__flush_pwc_lpid(unsigned int lpid) +{ +	_tlbie_lpid(lpid, RIC_FLUSH_PWC); +} +EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); + +/* + * Flush partition scoped translations from LPID (=LPIDR) + */ +void radix__local_flush_tlb_lpid(unsigned int lpid) +{ +	_tlbiel_lpid(lpid, RIC_FLUSH_ALL); +} +EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid); + +/* + * Flush process scoped translations from LPID (=LPIDR). + * Important difference, the guest normally manages its own translations, + * but some cases e.g., vCPU CPU migration require KVM to flush. + */ +void radix__local_flush_tlb_lpid_guest(unsigned int lpid) +{ +	_tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL); +} +EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest); + +  static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,  				  unsigned long end, int psize); @@ -542,6 +859,8 @@ void radix__tlb_flush(struct mmu_gather *tlb)  	int psize = 0;  	struct mm_struct *mm = tlb->mm;  	int page_size = tlb->page_size; +	unsigned long start = tlb->start; +	unsigned long end = tlb->end;  	/*  	 * if page size is not something we understand, do a full mm flush @@ -551,16 +870,46 @@ void radix__tlb_flush(struct mmu_gather *tlb)  	 * See the comment for radix in arch_exit_mmap().  	 */  	if (tlb->fullmm) { -		radix__flush_all_mm(mm); +		__flush_all_mm(mm, true); +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) +	} else if (mm_tlb_flush_nested(mm)) { +		/* +		 * If there is a concurrent invalidation that is clearing ptes, +		 * then it's possible this invalidation will miss one of those +		 * cleared ptes and miss flushing the TLB. If this invalidate +		 * returns before the other one flushes TLBs, that can result +		 * in it returning while there are still valid TLBs inside the +		 * range to be invalidated. +		 * +		 * See mm/memory.c:tlb_finish_mmu() for more details. +		 * +		 * The solution to this is ensure the entire range is always +		 * flushed here. The problem for powerpc is that the flushes +		 * are page size specific, so this "forced flush" would not +		 * do the right thing if there are a mix of page sizes in +		 * the range to be invalidated. So use __flush_tlb_range +		 * which invalidates all possible page sizes in the range. +		 * +		 * PWC flush probably is not be required because the core code +		 * shouldn't free page tables in this path, but accounting +		 * for the possibility makes us a bit more robust. +		 * +		 * need_flush_all is an uncommon case because page table +		 * teardown should be done with exclusive locks held (but +		 * after locks are dropped another invalidate could come +		 * in), it could be optimized further if necessary. +		 */ +		if (!tlb->need_flush_all) +			__radix__flush_tlb_range(mm, start, end, true); +		else +			radix__flush_all_mm(mm); +#endif  	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {  		if (!tlb->need_flush_all)  			radix__flush_tlb_mm(mm);  		else  			radix__flush_all_mm(mm);  	} else { -		unsigned long start = tlb->start; -		unsigned long end = tlb->end; -  		if (!tlb->need_flush_all)  			radix__flush_tlb_range_psize(mm, start, end, psize);  		else @@ -584,24 +933,33 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,  		return;  	preempt_disable(); -	if (mm_is_thread_local(mm)) { -		local = true; -		full = (end == TLB_FLUSH_ALL || -				nr_pages > tlb_local_single_page_flush_ceiling); -	} else { +	smp_mb(); /* see radix__flush_tlb_mm */ +	if (!mm_is_thread_local(mm)) { +		if (unlikely(mm_is_singlethreaded(mm))) { +			if (end != TLB_FLUSH_ALL) { +				exit_flush_lazy_tlbs(mm); +				goto is_local; +			} +		}  		local = false;  		full = (end == TLB_FLUSH_ALL ||  				nr_pages > tlb_single_page_flush_ceiling); +	} else { +is_local: +		local = true; +		full = (end == TLB_FLUSH_ALL || +				nr_pages > tlb_local_single_page_flush_ceiling);  	}  	if (full) { -		if (!local && mm_needs_flush_escalation(mm)) -			also_pwc = true; - -		if (local) +		if (local) {  			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); -		else -			_tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL: RIC_FLUSH_TLB); +		} else { +			if (mm_needs_flush_escalation(mm)) +				also_pwc = true; + +			_tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); +		}  	} else {  		if (local)  			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); @@ -642,11 +1000,17 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)  	/* Otherwise first do the PWC, then iterate the pages. */  	preempt_disable(); - -	if (mm_is_thread_local(mm)) { -		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); -	} else { +	smp_mb(); /* see radix__flush_tlb_mm */ +	if (!mm_is_thread_local(mm)) { +		if (unlikely(mm_is_singlethreaded(mm))) { +			exit_flush_lazy_tlbs(mm); +			goto local; +		}  		_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); +		goto local; +	} else { +local: +		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);  	}  	preempt_enable(); @@ -733,6 +1097,8 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)  		for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {  			if (sib == cpu)  				continue; +			if (!cpu_possible(sib)) +				continue;  			if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)  				flush = true;  		} |