diff options
| author | Mark Brown <[email protected]> | 2015-10-12 18:09:27 +0100 | 
|---|---|---|
| committer | Mark Brown <[email protected]> | 2015-10-12 18:09:27 +0100 | 
| commit | 79828b4fa835f73cdaf4bffa48696abdcbea9d02 (patch) | |
| tree | 5e0fa7156acb75ba603022bc807df8f2fedb97a8 /arch/x86/kvm/mmu.c | |
| parent | 721b51fcf91898299d96f4b72cb9434cda29dce6 (diff) | |
| parent | 8c1a9d6323abf0fb1e5dad96cf3f1c783505ea5a (diff) | |
Merge remote-tracking branch 'asoc/fix/rt5645' into asoc-fix-rt5645
Diffstat (limited to 'arch/x86/kvm/mmu.c')
| -rw-r--r-- | arch/x86/kvm/mmu.c | 296 | 
1 files changed, 181 insertions, 115 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f807496b62c2..69088a1ba509 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -357,12 +357,6 @@ static u64 __get_spte_lockless(u64 *sptep)  {  	return ACCESS_ONCE(*sptep);  } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ -	/* It is valid if the spte is zapped. */ -	return spte == 0ull; -}  #else  union split_spte {  	struct { @@ -478,23 +472,6 @@ retry:  	return spte.spte;  } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ -	union split_spte sspte = (union split_spte)spte; -	u32 high_mmio_mask = shadow_mmio_mask >> 32; - -	/* It is valid if the spte is zapped. */ -	if (spte == 0ull) -		return true; - -	/* It is valid if the spte is being zapped. */ -	if (sspte.spte_low == 0ull && -	    (sspte.spte_high & high_mmio_mask) == high_mmio_mask) -		return true; - -	return false; -}  #endif  static bool spte_is_locklessly_modifiable(u64 spte) @@ -2479,6 +2456,14 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,  	return 0;  } +static bool kvm_is_mmio_pfn(pfn_t pfn) +{ +	if (pfn_valid(pfn)) +		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); + +	return true; +} +  static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,  		    unsigned pte_access, int level,  		    gfn_t gfn, pfn_t pfn, bool speculative, @@ -2506,7 +2491,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,  		spte |= PT_PAGE_SIZE_MASK;  	if (tdp_enabled)  		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, -			kvm_is_reserved_pfn(pfn)); +			kvm_is_mmio_pfn(pfn));  	if (host_writable)  		spte |= SPTE_HOST_WRITEABLE; @@ -3283,54 +3268,90 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,  	return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);  } -static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) +static bool +__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)  { -	if (direct) -		return vcpu_match_mmio_gpa(vcpu, addr); +	int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f; -	return vcpu_match_mmio_gva(vcpu, addr); +	return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) | +		((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0);  } +static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) +{ +	return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level); +} -/* - * On direct hosts, the last spte is only allows two states - * for mmio page fault: - *   - It is the mmio spte - *   - It is zapped or it is being zapped. - * - * This function completely checks the spte when the last spte - * is not the mmio spte. - */ -static bool check_direct_spte_mmio_pf(u64 spte) +static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)  { -	return __check_direct_spte_mmio_pf(spte); +	return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);  } -static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) +static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) +{ +	if (direct) +		return vcpu_match_mmio_gpa(vcpu, addr); + +	return vcpu_match_mmio_gva(vcpu, addr); +} + +/* return true if reserved bit is detected on spte. */ +static bool +walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)  {  	struct kvm_shadow_walk_iterator iterator; -	u64 spte = 0ull; +	u64 sptes[PT64_ROOT_LEVEL], spte = 0ull; +	int root, leaf; +	bool reserved = false;  	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) -		return spte; +		goto exit;  	walk_shadow_page_lockless_begin(vcpu); -	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) + +	for (shadow_walk_init(&iterator, vcpu, addr), +		 leaf = root = iterator.level; +	     shadow_walk_okay(&iterator); +	     __shadow_walk_next(&iterator, spte)) { +		spte = mmu_spte_get_lockless(iterator.sptep); + +		sptes[leaf - 1] = spte; +		leaf--; +  		if (!is_shadow_present_pte(spte))  			break; + +		reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte, +						    leaf); +	} +  	walk_shadow_page_lockless_end(vcpu); -	return spte; +	if (reserved) { +		pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n", +		       __func__, addr); +		while (root > leaf) { +			pr_err("------ spte 0x%llx level %d.\n", +			       sptes[root - 1], root); +			root--; +		} +	} +exit: +	*sptep = spte; +	return reserved;  }  int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)  {  	u64 spte; +	bool reserved;  	if (quickly_check_mmio_pf(vcpu, addr, direct))  		return RET_MMIO_PF_EMULATE; -	spte = walk_shadow_page_get_mmio_spte(vcpu, addr); +	reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); +	if (unlikely(reserved)) +		return RET_MMIO_PF_BUG;  	if (is_mmio_spte(spte)) {  		gfn_t gfn = get_mmio_spte_gfn(spte); @@ -3348,13 +3369,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)  	}  	/* -	 * It's ok if the gva is remapped by other cpus on shadow guest, -	 * it's a BUG if the gfn is not a mmio page. -	 */ -	if (direct && !check_direct_spte_mmio_pf(spte)) -		return RET_MMIO_PF_BUG; - -	/*  	 * If the page table is zapped by other cpus, let CPU fault again on  	 * the address.  	 */ @@ -3596,19 +3610,21 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp  #include "paging_tmpl.h"  #undef PTTYPE -static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, -				  struct kvm_mmu *context) +static void +__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, +			struct rsvd_bits_validate *rsvd_check, +			int maxphyaddr, int level, bool nx, bool gbpages, +			bool pse)  { -	int maxphyaddr = cpuid_maxphyaddr(vcpu);  	u64 exb_bit_rsvd = 0;  	u64 gbpages_bit_rsvd = 0;  	u64 nonleaf_bit8_rsvd = 0; -	context->bad_mt_xwr = 0; +	rsvd_check->bad_mt_xwr = 0; -	if (!context->nx) +	if (!nx)  		exb_bit_rsvd = rsvd_bits(63, 63); -	if (!guest_cpuid_has_gbpages(vcpu)) +	if (!gbpages)  		gbpages_bit_rsvd = rsvd_bits(7, 7);  	/* @@ -3618,80 +3634,95 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,  	if (guest_cpuid_is_amd(vcpu))  		nonleaf_bit8_rsvd = rsvd_bits(8, 8); -	switch (context->root_level) { +	switch (level) {  	case PT32_ROOT_LEVEL:  		/* no rsvd bits for 2 level 4K page table entries */ -		context->rsvd_bits_mask[0][1] = 0; -		context->rsvd_bits_mask[0][0] = 0; -		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; +		rsvd_check->rsvd_bits_mask[0][1] = 0; +		rsvd_check->rsvd_bits_mask[0][0] = 0; +		rsvd_check->rsvd_bits_mask[1][0] = +			rsvd_check->rsvd_bits_mask[0][0]; -		if (!is_pse(vcpu)) { -			context->rsvd_bits_mask[1][1] = 0; +		if (!pse) { +			rsvd_check->rsvd_bits_mask[1][1] = 0;  			break;  		}  		if (is_cpuid_PSE36())  			/* 36bits PSE 4MB page */ -			context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); +			rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);  		else  			/* 32 bits PSE 4MB page */ -			context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); +			rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);  		break;  	case PT32E_ROOT_LEVEL: -		context->rsvd_bits_mask[0][2] = +		rsvd_check->rsvd_bits_mask[0][2] =  			rsvd_bits(maxphyaddr, 63) |  			rsvd_bits(5, 8) | rsvd_bits(1, 2);	/* PDPTE */ -		context->rsvd_bits_mask[0][1] = exb_bit_rsvd | +		rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |  			rsvd_bits(maxphyaddr, 62);	/* PDE */ -		context->rsvd_bits_mask[0][0] = exb_bit_rsvd | +		rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |  			rsvd_bits(maxphyaddr, 62); 	/* PTE */ -		context->rsvd_bits_mask[1][1] = exb_bit_rsvd | +		rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |  			rsvd_bits(maxphyaddr, 62) |  			rsvd_bits(13, 20);		/* large page */ -		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; +		rsvd_check->rsvd_bits_mask[1][0] = +			rsvd_check->rsvd_bits_mask[0][0];  		break;  	case PT64_ROOT_LEVEL: -		context->rsvd_bits_mask[0][3] = exb_bit_rsvd | -			nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); -		context->rsvd_bits_mask[0][2] = exb_bit_rsvd | -			nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); -		context->rsvd_bits_mask[0][1] = exb_bit_rsvd | +		rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | +			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |  			rsvd_bits(maxphyaddr, 51); -		context->rsvd_bits_mask[0][0] = exb_bit_rsvd | +		rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd | +			nonleaf_bit8_rsvd | gbpages_bit_rsvd |  			rsvd_bits(maxphyaddr, 51); -		context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; -		context->rsvd_bits_mask[1][2] = exb_bit_rsvd | +		rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd | +			rsvd_bits(maxphyaddr, 51); +		rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd | +			rsvd_bits(maxphyaddr, 51); +		rsvd_check->rsvd_bits_mask[1][3] = +			rsvd_check->rsvd_bits_mask[0][3]; +		rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd |  			gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |  			rsvd_bits(13, 29); -		context->rsvd_bits_mask[1][1] = exb_bit_rsvd | +		rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |  			rsvd_bits(maxphyaddr, 51) |  			rsvd_bits(13, 20);		/* large page */ -		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; +		rsvd_check->rsvd_bits_mask[1][0] = +			rsvd_check->rsvd_bits_mask[0][0];  		break;  	}  } -static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, -		struct kvm_mmu *context, bool execonly) +static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, +				  struct kvm_mmu *context) +{ +	__reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, +				cpuid_maxphyaddr(vcpu), context->root_level, +				context->nx, guest_cpuid_has_gbpages(vcpu), +				is_pse(vcpu)); +} + +static void +__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, +			    int maxphyaddr, bool execonly)  { -	int maxphyaddr = cpuid_maxphyaddr(vcpu);  	int pte; -	context->rsvd_bits_mask[0][3] = +	rsvd_check->rsvd_bits_mask[0][3] =  		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); -	context->rsvd_bits_mask[0][2] = +	rsvd_check->rsvd_bits_mask[0][2] =  		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); -	context->rsvd_bits_mask[0][1] = +	rsvd_check->rsvd_bits_mask[0][1] =  		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); -	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); +	rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);  	/* large page */ -	context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; -	context->rsvd_bits_mask[1][2] = +	rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; +	rsvd_check->rsvd_bits_mask[1][2] =  		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); -	context->rsvd_bits_mask[1][1] = +	rsvd_check->rsvd_bits_mask[1][1] =  		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); -	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; +	rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];  	for (pte = 0; pte < 64; pte++) {  		int rwx_bits = pte & 7; @@ -3699,10 +3730,64 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,  		if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||  				rwx_bits == 0x2 || rwx_bits == 0x6 ||  				(rwx_bits == 0x4 && !execonly)) -			context->bad_mt_xwr |= (1ull << pte); +			rsvd_check->bad_mt_xwr |= (1ull << pte);  	}  } +static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, +		struct kvm_mmu *context, bool execonly) +{ +	__reset_rsvds_bits_mask_ept(&context->guest_rsvd_check, +				    cpuid_maxphyaddr(vcpu), execonly); +} + +/* + * the page table on host is the shadow page table for the page + * table in guest or amd nested guest, its mmu features completely + * follow the features in guest. + */ +void +reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) +{ +	__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, +				boot_cpu_data.x86_phys_bits, +				context->shadow_root_level, context->nx, +				guest_cpuid_has_gbpages(vcpu), is_pse(vcpu)); +} +EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask); + +/* + * the direct page table on host, use as much mmu features as + * possible, however, kvm currently does not do execution-protection. + */ +static void +reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, +				struct kvm_mmu *context) +{ +	if (guest_cpuid_is_amd(vcpu)) +		__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, +					boot_cpu_data.x86_phys_bits, +					context->shadow_root_level, false, +					cpu_has_gbpages, true); +	else +		__reset_rsvds_bits_mask_ept(&context->shadow_zero_check, +					    boot_cpu_data.x86_phys_bits, +					    false); + +} + +/* + * as the comments in reset_shadow_zero_bits_mask() except it + * is the shadow page table for intel nested guest. + */ +static void +reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, +				struct kvm_mmu *context, bool execonly) +{ +	__reset_rsvds_bits_mask_ept(&context->shadow_zero_check, +				    boot_cpu_data.x86_phys_bits, execonly); +} +  static void update_permission_bitmask(struct kvm_vcpu *vcpu,  				      struct kvm_mmu *mmu, bool ept)  { @@ -3881,6 +3966,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)  	update_permission_bitmask(vcpu, context, false);  	update_last_pte_bitmap(vcpu, context); +	reset_tdp_shadow_zero_bits_mask(vcpu, context);  }  void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) @@ -3908,6 +3994,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)  	context->base_role.smap_andnot_wp  		= smap && !is_write_protection(vcpu);  	context->base_role.smm = is_smm(vcpu); +	reset_shadow_zero_bits_mask(vcpu, context);  }  EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); @@ -3931,6 +4018,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)  	update_permission_bitmask(vcpu, context, true);  	reset_rsvds_bits_mask_ept(vcpu, context, execonly); +	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);  }  EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); @@ -4852,28 +4940,6 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)  	return nr_mmu_pages;  } -int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) -{ -	struct kvm_shadow_walk_iterator iterator; -	u64 spte; -	int nr_sptes = 0; - -	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) -		return nr_sptes; - -	walk_shadow_page_lockless_begin(vcpu); -	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { -		sptes[iterator.level-1] = spte; -		nr_sptes++; -		if (!is_shadow_present_pte(spte)) -			break; -	} -	walk_shadow_page_lockless_end(vcpu); - -	return nr_sptes; -} -EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); -  void kvm_mmu_destroy(struct kvm_vcpu *vcpu)  {  	kvm_mmu_unload(vcpu);  |