diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
| -rw-r--r-- | arch/x86/kvm/x86.c | 390 | 
1 files changed, 260 insertions, 130 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 91602d310a3f..0ed07d8d2caa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -360,7 +360,8 @@ EXPORT_SYMBOL_GPL(kvm_set_apic_base);  asmlinkage __visible void kvm_spurious_fault(void)  {  	/* Fault while not rebooting.  We want the trace. */ -	BUG(); +	if (!kvm_rebooting) +		BUG();  }  EXPORT_SYMBOL_GPL(kvm_spurious_fault); @@ -674,8 +675,14 @@ static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,  				       data, offset, len, access);  } +static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu) +{ +	return rsvd_bits(cpuid_maxphyaddr(vcpu), 63) | rsvd_bits(5, 8) | +	       rsvd_bits(1, 2); +} +  /* - * Load the pae pdptrs.  Return true is they are all valid. + * Load the pae pdptrs.  Return 1 if they are all valid, 0 otherwise.   */  int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)  { @@ -694,8 +701,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)  	}  	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {  		if ((pdpte[i] & PT_PRESENT_MASK) && -		    (pdpte[i] & -		     vcpu->arch.mmu->guest_rsvd_check.rsvd_bits_mask[0][2])) { +		    (pdpte[i] & pdptr_rsvd_bits(vcpu))) {  			ret = 0;  			goto out;  		} @@ -1140,6 +1146,44 @@ static u32 msrs_to_save[] = {  	MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,  	MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,  	MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, +	MSR_IA32_UMWAIT_CONTROL, + +	MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, +	MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3, +	MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, +	MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL, +	MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, +	MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, +	MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, +	MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, +	MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9, +	MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11, +	MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, +	MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, +	MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, +	MSR_ARCH_PERFMON_PERFCTR0 + 18, MSR_ARCH_PERFMON_PERFCTR0 + 19, +	MSR_ARCH_PERFMON_PERFCTR0 + 20, MSR_ARCH_PERFMON_PERFCTR0 + 21, +	MSR_ARCH_PERFMON_PERFCTR0 + 22, MSR_ARCH_PERFMON_PERFCTR0 + 23, +	MSR_ARCH_PERFMON_PERFCTR0 + 24, MSR_ARCH_PERFMON_PERFCTR0 + 25, +	MSR_ARCH_PERFMON_PERFCTR0 + 26, MSR_ARCH_PERFMON_PERFCTR0 + 27, +	MSR_ARCH_PERFMON_PERFCTR0 + 28, MSR_ARCH_PERFMON_PERFCTR0 + 29, +	MSR_ARCH_PERFMON_PERFCTR0 + 30, MSR_ARCH_PERFMON_PERFCTR0 + 31, +	MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, +	MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, +	MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, +	MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, +	MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9, +	MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11, +	MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, +	MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, +	MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, +	MSR_ARCH_PERFMON_EVENTSEL0 + 18, MSR_ARCH_PERFMON_EVENTSEL0 + 19, +	MSR_ARCH_PERFMON_EVENTSEL0 + 20, MSR_ARCH_PERFMON_EVENTSEL0 + 21, +	MSR_ARCH_PERFMON_EVENTSEL0 + 22, MSR_ARCH_PERFMON_EVENTSEL0 + 23, +	MSR_ARCH_PERFMON_EVENTSEL0 + 24, MSR_ARCH_PERFMON_EVENTSEL0 + 25, +	MSR_ARCH_PERFMON_EVENTSEL0 + 26, MSR_ARCH_PERFMON_EVENTSEL0 + 27, +	MSR_ARCH_PERFMON_EVENTSEL0 + 28, MSR_ARCH_PERFMON_EVENTSEL0 + 29, +	MSR_ARCH_PERFMON_EVENTSEL0 + 30, MSR_ARCH_PERFMON_EVENTSEL0 + 31,  };  static unsigned num_msrs_to_save; @@ -1254,6 +1298,13 @@ static u64 kvm_get_arch_capabilities(void)  	if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)  		data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; +	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) +		data |= ARCH_CAP_RDCL_NO; +	if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) +		data |= ARCH_CAP_SSB_NO; +	if (!boot_cpu_has_bug(X86_BUG_MDS)) +		data |= ARCH_CAP_MDS_NO; +  	return data;  } @@ -1351,19 +1402,23 @@ void kvm_enable_efer_bits(u64 mask)  EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);  /* - * Writes msr value into into the appropriate "register". + * Write @data into the MSR specified by @index.  Select MSR specific fault + * checks are bypassed if @host_initiated is %true.   * Returns 0 on success, non-0 otherwise.   * Assumes vcpu_load() was already called.   */ -int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) +static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, +			 bool host_initiated)  { -	switch (msr->index) { +	struct msr_data msr; + +	switch (index) {  	case MSR_FS_BASE:  	case MSR_GS_BASE:  	case MSR_KERNEL_GS_BASE:  	case MSR_CSTAR:  	case MSR_LSTAR: -		if (is_noncanonical_address(msr->data, vcpu)) +		if (is_noncanonical_address(data, vcpu))  			return 1;  		break;  	case MSR_IA32_SYSENTER_EIP: @@ -1380,38 +1435,95 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)  		 * value, and that something deterministic happens if the guest  		 * invokes 64-bit SYSENTER.  		 */ -		msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu)); +		data = get_canonical(data, vcpu_virt_addr_bits(vcpu));  	} -	return kvm_x86_ops->set_msr(vcpu, msr); + +	msr.data = data; +	msr.index = index; +	msr.host_initiated = host_initiated; + +	return kvm_x86_ops->set_msr(vcpu, &msr);  } -EXPORT_SYMBOL_GPL(kvm_set_msr);  /* - * Adapt set_msr() to msr_io()'s calling convention + * Read the MSR specified by @index into @data.  Select MSR specific fault + * checks are bypassed if @host_initiated is %true. + * Returns 0 on success, non-0 otherwise. + * Assumes vcpu_load() was already called.   */ -static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) +static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, +			 bool host_initiated)  {  	struct msr_data msr; -	int r; +	int ret;  	msr.index = index; -	msr.host_initiated = true; -	r = kvm_get_msr(vcpu, &msr); -	if (r) -		return r; +	msr.host_initiated = host_initiated; -	*data = msr.data; -	return 0; +	ret = kvm_x86_ops->get_msr(vcpu, &msr); +	if (!ret) +		*data = msr.data; +	return ret;  } -static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) +int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)  { -	struct msr_data msr; +	return __kvm_get_msr(vcpu, index, data, false); +} +EXPORT_SYMBOL_GPL(kvm_get_msr); -	msr.data = *data; -	msr.index = index; -	msr.host_initiated = true; -	return kvm_set_msr(vcpu, &msr); +int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) +{ +	return __kvm_set_msr(vcpu, index, data, false); +} +EXPORT_SYMBOL_GPL(kvm_set_msr); + +int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) +{ +	u32 ecx = kvm_rcx_read(vcpu); +	u64 data; + +	if (kvm_get_msr(vcpu, ecx, &data)) { +		trace_kvm_msr_read_ex(ecx); +		kvm_inject_gp(vcpu, 0); +		return 1; +	} + +	trace_kvm_msr_read(ecx, data); + +	kvm_rax_write(vcpu, data & -1u); +	kvm_rdx_write(vcpu, (data >> 32) & -1u); +	return kvm_skip_emulated_instruction(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); + +int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) +{ +	u32 ecx = kvm_rcx_read(vcpu); +	u64 data = kvm_read_edx_eax(vcpu); + +	if (kvm_set_msr(vcpu, ecx, data)) { +		trace_kvm_msr_write_ex(ecx, data); +		kvm_inject_gp(vcpu, 0); +		return 1; +	} + +	trace_kvm_msr_write(ecx, data); +	return kvm_skip_emulated_instruction(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); + +/* + * Adapt set_msr() to msr_io()'s calling convention + */ +static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) +{ +	return __kvm_get_msr(vcpu, index, data, true); +} + +static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) +{ +	return __kvm_set_msr(vcpu, index, *data, true);  }  #ifdef CONFIG_X86_64 @@ -2452,6 +2564,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)  	 * Doing a TLB flush here, on the guest's behalf, can avoid  	 * expensive IPIs.  	 */ +	trace_kvm_pv_tlb_flush(vcpu->vcpu_id, +		vcpu->arch.st.steal.preempted & KVM_VCPU_FLUSH_TLB);  	if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)  		kvm_vcpu_flush_tlb(vcpu, false); @@ -2748,18 +2862,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)  }  EXPORT_SYMBOL_GPL(kvm_set_msr_common); - -/* - * Reads an msr value (of 'msr_index') into 'pdata'. - * Returns 0 on success, non-0 otherwise. - * Assumes vcpu_load() was already called. - */ -int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) -{ -	return kvm_x86_ops->get_msr(vcpu, msr); -} -EXPORT_SYMBOL_GPL(kvm_get_msr); -  static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)  {  	u64 data; @@ -3106,7 +3208,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)  	case KVM_CAP_HYPERV_EVENTFD:  	case KVM_CAP_HYPERV_TLBFLUSH:  	case KVM_CAP_HYPERV_SEND_IPI: -	case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:  	case KVM_CAP_HYPERV_CPUID:  	case KVM_CAP_PCI_SEGMENT:  	case KVM_CAP_DEBUGREGS: @@ -3183,6 +3284,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)  		r = kvm_x86_ops->get_nested_state ?  			kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;  		break; +	case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: +		r = kvm_x86_ops->enable_direct_tlbflush != NULL; +		break; +	case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: +		r = kvm_x86_ops->nested_enable_evmcs != NULL; +		break;  	default:  		break;  	} @@ -3506,8 +3613,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,  	for (bank = 0; bank < bank_num; bank++)  		vcpu->arch.mce_banks[bank*4] = ~(u64)0; -	if (kvm_x86_ops->setup_mce) -		kvm_x86_ops->setup_mce(vcpu); +	kvm_x86_ops->setup_mce(vcpu);  out:  	return r;  } @@ -3957,6 +4063,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,  				r = -EFAULT;  		}  		return r; +	case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: +		if (!kvm_x86_ops->enable_direct_tlbflush) +			return -ENOTTY; + +		return kvm_x86_ops->enable_direct_tlbflush(vcpu);  	default:  		return -EINVAL; @@ -4989,6 +5100,11 @@ static void kvm_init_msr_list(void)  	u32 dummy[2];  	unsigned i, j; +	BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4, +			 "Please update the fixed PMCs in msrs_to_save[]"); +	BUILD_BUG_ON_MSG(INTEL_PMC_MAX_GENERIC != 32, +			 "Please update the generic perfctr/eventsel MSRs in msrs_to_save[]"); +  	for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {  		if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)  			continue; @@ -5327,7 +5443,6 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);  int handle_ud(struct kvm_vcpu *vcpu)  {  	int emul_type = EMULTYPE_TRAP_UD; -	enum emulation_result er;  	char sig[5]; /* ud2; .ascii "kvm" */  	struct x86_exception e; @@ -5336,15 +5451,10 @@ int handle_ud(struct kvm_vcpu *vcpu)  				sig, sizeof(sig), &e) == 0 &&  	    memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {  		kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig)); -		emul_type = 0; +		emul_type = EMULTYPE_TRAP_UD_FORCED;  	} -	er = kvm_emulate_instruction(vcpu, emul_type); -	if (er == EMULATE_USER_EXIT) -		return 0; -	if (er != EMULATE_DONE) -		kvm_queue_exception(vcpu, UD_VECTOR); -	return 1; +	return kvm_emulate_instruction(vcpu, emul_type);  }  EXPORT_SYMBOL_GPL(handle_ud); @@ -5377,7 +5487,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,  	 */  	if (vcpu_match_mmio_gva(vcpu, gva)  	    && !permission_fault(vcpu, vcpu->arch.walk_mmu, -				 vcpu->arch.access, 0, access)) { +				 vcpu->arch.mmio_access, 0, access)) {  		*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |  					(gva & (PAGE_SIZE - 1));  		trace_vcpu_match_mmio(gva, *gpa, write, false); @@ -5971,28 +6081,13 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,  static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,  			    u32 msr_index, u64 *pdata)  { -	struct msr_data msr; -	int r; - -	msr.index = msr_index; -	msr.host_initiated = false; -	r = kvm_get_msr(emul_to_vcpu(ctxt), &msr); -	if (r) -		return r; - -	*pdata = msr.data; -	return 0; +	return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);  }  static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,  			    u32 msr_index, u64 data)  { -	struct msr_data msr; - -	msr.data = data; -	msr.index = msr_index; -	msr.host_initiated = false; -	return kvm_set_msr(emul_to_vcpu(ctxt), &msr); +	return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);  }  static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt) @@ -6075,6 +6170,11 @@ static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)  	kvm_smm_changed(emul_to_vcpu(ctxt));  } +static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr) +{ +	return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr); +} +  static const struct x86_emulate_ops emulate_ops = {  	.read_gpr            = emulator_read_gpr,  	.write_gpr           = emulator_write_gpr, @@ -6116,6 +6216,7 @@ static const struct x86_emulate_ops emulate_ops = {  	.set_hflags          = emulator_set_hflags,  	.pre_leave_smm       = emulator_pre_leave_smm,  	.post_leave_smm      = emulator_post_leave_smm, +	.set_xcr             = emulator_set_xcr,  };  static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) @@ -6175,7 +6276,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)  	vcpu->arch.emulate_regs_need_sync_from_vcpu = false;  } -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) +void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)  {  	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;  	int ret; @@ -6187,37 +6288,43 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)  	ctxt->_eip = ctxt->eip + inc_eip;  	ret = emulate_int_real(ctxt, irq); -	if (ret != X86EMUL_CONTINUE) -		return EMULATE_FAIL; - -	ctxt->eip = ctxt->_eip; -	kvm_rip_write(vcpu, ctxt->eip); -	kvm_set_rflags(vcpu, ctxt->eflags); - -	return EMULATE_DONE; +	if (ret != X86EMUL_CONTINUE) { +		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); +	} else { +		ctxt->eip = ctxt->_eip; +		kvm_rip_write(vcpu, ctxt->eip); +		kvm_set_rflags(vcpu, ctxt->eflags); +	}  }  EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);  static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)  { -	int r = EMULATE_DONE; -  	++vcpu->stat.insn_emulation_fail;  	trace_kvm_emulate_insn_failed(vcpu); -	if (emulation_type & EMULTYPE_NO_UD_ON_FAIL) -		return EMULATE_FAIL; +	if (emulation_type & EMULTYPE_VMWARE_GP) { +		kvm_queue_exception_e(vcpu, GP_VECTOR, 0); +		return 1; +	} -	if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { +	if (emulation_type & EMULTYPE_SKIP) {  		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;  		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;  		vcpu->run->internal.ndata = 0; -		r = EMULATE_USER_EXIT; +		return 0;  	}  	kvm_queue_exception(vcpu, UD_VECTOR); -	return r; +	if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { +		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; +		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; +		vcpu->run->internal.ndata = 0; +		return 0; +	} + +	return 1;  }  static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, @@ -6372,7 +6479,7 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,  	return dr6;  } -static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) +static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)  {  	struct kvm_run *kvm_run = vcpu->run; @@ -6381,18 +6488,20 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)  		kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;  		kvm_run->debug.arch.exception = DB_VECTOR;  		kvm_run->exit_reason = KVM_EXIT_DEBUG; -		*r = EMULATE_USER_EXIT; -	} else { -		kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS); +		return 0;  	} +	kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS); +	return 1;  }  int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)  {  	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); -	int r = EMULATE_DONE; +	int r; -	kvm_x86_ops->skip_emulated_instruction(vcpu); +	r = kvm_x86_ops->skip_emulated_instruction(vcpu); +	if (unlikely(!r)) +		return 0;  	/*  	 * rflags is the old, "raw" value of the flags.  The new value has @@ -6403,8 +6512,8 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)  	 * that sets the TF flag".  	 */  	if (unlikely(rflags & X86_EFLAGS_TF)) -		kvm_vcpu_do_singlestep(vcpu, &r); -	return r == EMULATE_DONE; +		r = kvm_vcpu_do_singlestep(vcpu); +	return r;  }  EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); @@ -6423,7 +6532,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)  			kvm_run->debug.arch.pc = eip;  			kvm_run->debug.arch.exception = DB_VECTOR;  			kvm_run->exit_reason = KVM_EXIT_DEBUG; -			*r = EMULATE_USER_EXIT; +			*r = 0;  			return true;  		}  	} @@ -6439,7 +6548,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)  			vcpu->arch.dr6 &= ~DR_TRAP_BITS;  			vcpu->arch.dr6 |= dr6 | DR6_RTM;  			kvm_queue_exception(vcpu, DB_VECTOR); -			*r = EMULATE_DONE; +			*r = 1;  			return true;  		}  	} @@ -6523,32 +6632,48 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,  		trace_kvm_emulate_insn_start(vcpu);  		++vcpu->stat.insn_emulation;  		if (r != EMULATION_OK)  { -			if (emulation_type & EMULTYPE_TRAP_UD) -				return EMULATE_FAIL; +			if ((emulation_type & EMULTYPE_TRAP_UD) || +			    (emulation_type & EMULTYPE_TRAP_UD_FORCED)) { +				kvm_queue_exception(vcpu, UD_VECTOR); +				return 1; +			}  			if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,  						emulation_type)) -				return EMULATE_DONE; -			if (ctxt->have_exception && inject_emulated_exception(vcpu)) -				return EMULATE_DONE; -			if (emulation_type & EMULTYPE_SKIP) -				return EMULATE_FAIL; +				return 1; +			if (ctxt->have_exception) { +				/* +				 * #UD should result in just EMULATION_FAILED, and trap-like +				 * exception should not be encountered during decode. +				 */ +				WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR || +					     exception_type(ctxt->exception.vector) == EXCPT_TRAP); +				inject_emulated_exception(vcpu); +				return 1; +			}  			return handle_emulation_failure(vcpu, emulation_type);  		}  	} -	if ((emulation_type & EMULTYPE_VMWARE) && -	    !is_vmware_backdoor_opcode(ctxt)) -		return EMULATE_FAIL; +	if ((emulation_type & EMULTYPE_VMWARE_GP) && +	    !is_vmware_backdoor_opcode(ctxt)) { +		kvm_queue_exception_e(vcpu, GP_VECTOR, 0); +		return 1; +	} +	/* +	 * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks +	 * for kvm_skip_emulated_instruction().  The caller is responsible for +	 * updating interruptibility state and injecting single-step #DBs. +	 */  	if (emulation_type & EMULTYPE_SKIP) {  		kvm_rip_write(vcpu, ctxt->_eip);  		if (ctxt->eflags & X86_EFLAGS_RF)  			kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF); -		return EMULATE_DONE; +		return 1;  	}  	if (retry_instruction(ctxt, cr2, emulation_type)) -		return EMULATE_DONE; +		return 1;  	/* this is needed for vmware backdoor interface to work since it  	   changes registers values  during IO operation */ @@ -6564,18 +6689,18 @@ restart:  	r = x86_emulate_insn(ctxt);  	if (r == EMULATION_INTERCEPTED) -		return EMULATE_DONE; +		return 1;  	if (r == EMULATION_FAILED) {  		if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,  					emulation_type)) -			return EMULATE_DONE; +			return 1;  		return handle_emulation_failure(vcpu, emulation_type);  	}  	if (ctxt->have_exception) { -		r = EMULATE_DONE; +		r = 1;  		if (inject_emulated_exception(vcpu))  			return r;  	} else if (vcpu->arch.pio.count) { @@ -6586,16 +6711,18 @@ restart:  			writeback = false;  			vcpu->arch.complete_userspace_io = complete_emulated_pio;  		} -		r = EMULATE_USER_EXIT; +		r = 0;  	} else if (vcpu->mmio_needed) { +		++vcpu->stat.mmio_exits; +  		if (!vcpu->mmio_is_write)  			writeback = false; -		r = EMULATE_USER_EXIT; +		r = 0;  		vcpu->arch.complete_userspace_io = complete_emulated_mmio;  	} else if (r == EMULATION_RESTART)  		goto restart;  	else -		r = EMULATE_DONE; +		r = 1;  	if (writeback) {  		unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); @@ -6604,8 +6731,8 @@ restart:  		if (!ctxt->have_exception ||  		    exception_type(ctxt->exception.vector) == EXCPT_TRAP) {  			kvm_rip_write(vcpu, ctxt->eip); -			if (r == EMULATE_DONE && ctxt->tf) -				kvm_vcpu_do_singlestep(vcpu, &r); +			if (r && ctxt->tf) +				r = kvm_vcpu_do_singlestep(vcpu);  			__kvm_set_rflags(vcpu, ctxt->eflags);  		} @@ -8199,12 +8326,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu)  static inline int complete_emulated_io(struct kvm_vcpu *vcpu)  {  	int r; +  	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);  	r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);  	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); -	if (r != EMULATE_DONE) -		return 0; -	return 1; +	return r;  }  static int complete_emulated_pio(struct kvm_vcpu *vcpu) @@ -8572,14 +8698,17 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,  	ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,  				   has_error_code, error_code); - -	if (ret) -		return EMULATE_FAIL; +	if (ret) { +		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; +		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; +		vcpu->run->internal.ndata = 0; +		return 0; +	}  	kvm_rip_write(vcpu, ctxt->eip);  	kvm_set_rflags(vcpu, ctxt->eflags);  	kvm_make_request(KVM_REQ_EVENT, vcpu); -	return EMULATE_DONE; +	return 1;  }  EXPORT_SYMBOL_GPL(kvm_task_switch); @@ -9297,6 +9426,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)  	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);  	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); +	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);  	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);  	atomic_set(&kvm->arch.noncoherent_dma_count, 0); @@ -9322,10 +9452,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)  	kvm_page_track_init(kvm);  	kvm_mmu_init_vm(kvm); -	if (kvm_x86_ops->vm_init) -		return kvm_x86_ops->vm_init(kvm); - -	return 0; +	return kvm_x86_ops->vm_init(kvm);  }  static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) @@ -9629,8 +9756,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,  	 * Scan sptes if dirty logging has been stopped, dropping those  	 * which can be collapsed into a single large-page spte.  Later  	 * page faults will create the large-page sptes. +	 * +	 * There is no need to do this in any of the following cases: +	 * CREATE:	No dirty mappings will already exist. +	 * MOVE/DELETE:	The old mappings will already have been cleaned up by +	 *		kvm_arch_flush_shadow_memslot()  	 */ -	if ((change != KVM_MR_DELETE) && +	if (change == KVM_MR_FLAGS_ONLY &&  		(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&  		!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))  		kvm_mmu_zap_collapsible_sptes(kvm, new); @@ -10017,7 +10149,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);  bool kvm_arch_has_irq_bypass(void)  { -	return kvm_x86_ops->update_pi_irte != NULL; +	return true;  }  int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, @@ -10057,9 +10189,6 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,  int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,  				   uint32_t guest_irq, bool set)  { -	if (!kvm_x86_ops->update_pi_irte) -		return -EINVAL; -  	return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);  } @@ -10086,11 +10215,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); -EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);  |