diff options
Diffstat (limited to 'arch/x86/kvm/svm/nested.c')
| -rw-r--r-- | arch/x86/kvm/svm/nested.c | 271 | 
1 files changed, 170 insertions, 101 deletions
| diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index f8b7bc04b3e7..1218b5a342fc 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -58,8 +58,9 @@ static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_excep         struct vcpu_svm *svm = to_svm(vcpu);         WARN_ON(!is_guest_mode(vcpu)); -       if (vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) && -	   !svm->nested.nested_run_pending) { +	if (vmcb12_is_intercept(&svm->nested.ctl, +				INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) && +	    !svm->nested.nested_run_pending) {                 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;                 svm->vmcb->control.exit_code_hi = 0;                 svm->vmcb->control.exit_info_1 = fault->error_code; @@ -121,7 +122,8 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)  void recalc_intercepts(struct vcpu_svm *svm)  { -	struct vmcb_control_area *c, *h, *g; +	struct vmcb_control_area *c, *h; +	struct vmcb_ctrl_area_cached *g;  	unsigned int i;  	vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -163,37 +165,6 @@ void recalc_intercepts(struct vcpu_svm *svm)  	vmcb_set_intercept(c, INTERCEPT_VMSAVE);  } -static void copy_vmcb_control_area(struct vmcb_control_area *dst, -				   struct vmcb_control_area *from) -{ -	unsigned int i; - -	for (i = 0; i < MAX_INTERCEPT; i++) -		dst->intercepts[i] = from->intercepts[i]; - -	dst->iopm_base_pa         = from->iopm_base_pa; -	dst->msrpm_base_pa        = from->msrpm_base_pa; -	dst->tsc_offset           = from->tsc_offset; -	/* asid not copied, it is handled manually for svm->vmcb.  */ -	dst->tlb_ctl              = from->tlb_ctl; -	dst->int_ctl              = from->int_ctl; -	dst->int_vector           = from->int_vector; -	dst->int_state            = from->int_state; -	dst->exit_code            = from->exit_code; -	dst->exit_code_hi         = from->exit_code_hi; -	dst->exit_info_1          = from->exit_info_1; -	dst->exit_info_2          = from->exit_info_2; -	dst->exit_int_info        = from->exit_int_info; -	dst->exit_int_info_err    = from->exit_int_info_err; -	dst->nested_ctl           = from->nested_ctl; -	dst->event_inj            = from->event_inj; -	dst->event_inj_err        = from->event_inj_err; -	dst->nested_cr3           = from->nested_cr3; -	dst->virt_ext              = from->virt_ext; -	dst->pause_filter_count   = from->pause_filter_count; -	dst->pause_filter_thresh  = from->pause_filter_thresh; -} -  static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)  {  	/* @@ -203,7 +174,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)  	 */  	int i; -	if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) +	if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))  		return true;  	for (i = 0; i < MSRPM_OFFSETS; i++) { @@ -250,10 +221,10 @@ static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)  	}  } -static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu, -				       struct vmcb_control_area *control) +static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu, +					 struct vmcb_ctrl_area_cached *control)  { -	if (CC(!vmcb_is_intercept(control, INTERCEPT_VMRUN))) +	if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))  		return false;  	if (CC(control->asid == 0)) @@ -275,9 +246,20 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,  	return true;  } -static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu, -				      struct vmcb_save_area *save) +/* Common checks that apply to both L1 and L2 state.  */ +static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu, +				     struct vmcb_save_area_cached *save)  { +	if (CC(!(save->efer & EFER_SVME))) +		return false; + +	if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) || +	    CC(save->cr0 & ~0xffffffffULL)) +		return false; + +	if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7))) +		return false; +  	/*  	 * These checks are also performed by KVM_SET_SREGS,  	 * except that EFER.LMA is not checked by SVM against @@ -293,48 +275,90 @@ static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu,  	if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))  		return false; +	if (CC(!kvm_valid_efer(vcpu, save->efer))) +		return false; +  	return true;  } -/* Common checks that apply to both L1 and L2 state.  */ -static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu, -				    struct vmcb_save_area *save) +static bool nested_vmcb_check_save(struct kvm_vcpu *vcpu)  { -	/* -	 * FIXME: these should be done after copying the fields, -	 * to avoid TOC/TOU races.  For these save area checks -	 * the possible damage is limited since kvm_set_cr0 and -	 * kvm_set_cr4 handle failure; EFER_SVME is an exception -	 * so it is force-set later in nested_prepare_vmcb_save. -	 */ -	if (CC(!(save->efer & EFER_SVME))) -		return false; +	struct vcpu_svm *svm = to_svm(vcpu); +	struct vmcb_save_area_cached *save = &svm->nested.save; -	if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) || -	    CC(save->cr0 & ~0xffffffffULL)) -		return false; +	return __nested_vmcb_check_save(vcpu, save); +} -	if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7))) -		return false; +static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu) +{ +	struct vcpu_svm *svm = to_svm(vcpu); +	struct vmcb_ctrl_area_cached *ctl = &svm->nested.ctl; -	if (!nested_vmcb_check_cr3_cr4(vcpu, save)) -		return false; +	return __nested_vmcb_check_controls(vcpu, ctl); +} -	if (CC(!kvm_valid_efer(vcpu, save->efer))) -		return false; +static +void __nested_copy_vmcb_control_to_cache(struct vmcb_ctrl_area_cached *to, +					 struct vmcb_control_area *from) +{ +	unsigned int i; -	return true; +	for (i = 0; i < MAX_INTERCEPT; i++) +		to->intercepts[i] = from->intercepts[i]; + +	to->iopm_base_pa        = from->iopm_base_pa; +	to->msrpm_base_pa       = from->msrpm_base_pa; +	to->tsc_offset          = from->tsc_offset; +	to->tlb_ctl             = from->tlb_ctl; +	to->int_ctl             = from->int_ctl; +	to->int_vector          = from->int_vector; +	to->int_state           = from->int_state; +	to->exit_code           = from->exit_code; +	to->exit_code_hi        = from->exit_code_hi; +	to->exit_info_1         = from->exit_info_1; +	to->exit_info_2         = from->exit_info_2; +	to->exit_int_info       = from->exit_int_info; +	to->exit_int_info_err   = from->exit_int_info_err; +	to->nested_ctl          = from->nested_ctl; +	to->event_inj           = from->event_inj; +	to->event_inj_err       = from->event_inj_err; +	to->nested_cr3          = from->nested_cr3; +	to->virt_ext            = from->virt_ext; +	to->pause_filter_count  = from->pause_filter_count; +	to->pause_filter_thresh = from->pause_filter_thresh; + +	/* Copy asid here because nested_vmcb_check_controls will check it.  */ +	to->asid           = from->asid; +	to->msrpm_base_pa &= ~0x0fffULL; +	to->iopm_base_pa  &= ~0x0fffULL;  } -void nested_load_control_from_vmcb12(struct vcpu_svm *svm, -				     struct vmcb_control_area *control) +void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm, +				       struct vmcb_control_area *control)  { -	copy_vmcb_control_area(&svm->nested.ctl, control); +	__nested_copy_vmcb_control_to_cache(&svm->nested.ctl, control); +} -	/* Copy it here because nested_svm_check_controls will check it.  */ -	svm->nested.ctl.asid           = control->asid; -	svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL; -	svm->nested.ctl.iopm_base_pa  &= ~0x0fffULL; +static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached *to, +					     struct vmcb_save_area *from) +{ +	/* +	 * Copy only fields that are validated, as we need them +	 * to avoid TOC/TOU races. +	 */ +	to->efer = from->efer; +	to->cr0 = from->cr0; +	to->cr3 = from->cr3; +	to->cr4 = from->cr4; + +	to->dr6 = from->dr6; +	to->dr7 = from->dr7; +} + +void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm, +				    struct vmcb_save_area *save) +{ +	__nested_copy_vmcb_save_to_cache(&svm->nested.save, save);  }  /* @@ -437,14 +461,13 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,  		return -EINVAL;  	if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) && -	    CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) +	    CC(!load_pdptrs(vcpu, cr3)))  		return -EINVAL;  	if (!nested_npt)  		kvm_mmu_new_pgd(vcpu, cr3);  	vcpu->arch.cr3 = cr3; -	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);  	/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */  	kvm_init_mmu(vcpu); @@ -490,15 +513,10 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12  	kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); -	/* -	 * Force-set EFER_SVME even though it is checked earlier on the -	 * VMCB12, because the guest can flip the bit between the check -	 * and now.  Clearing EFER_SVME would call svm_free_nested. -	 */ -	svm_set_efer(&svm->vcpu, vmcb12->save.efer | EFER_SVME); +	svm_set_efer(&svm->vcpu, svm->nested.save.efer); -	svm_set_cr0(&svm->vcpu, vmcb12->save.cr0); -	svm_set_cr4(&svm->vcpu, vmcb12->save.cr4); +	svm_set_cr0(&svm->vcpu, svm->nested.save.cr0); +	svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);  	svm->vcpu.arch.cr2 = vmcb12->save.cr2; @@ -513,8 +531,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12  	/* These bits will be set properly on the first execution when new_vmc12 is true */  	if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) { -		svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1; -		svm->vcpu.arch.dr6  = vmcb12->save.dr6 | DR6_ACTIVE_LOW; +		svm->vmcb->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1; +		svm->vcpu.arch.dr6  = svm->nested.save.dr6 | DR6_ACTIVE_LOW;  		vmcb_mark_dirty(svm->vmcb, VMCB_DR);  	}  } @@ -628,7 +646,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,  	nested_vmcb02_prepare_control(svm);  	nested_vmcb02_prepare_save(svm, vmcb12); -	ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3, +	ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,  				  nested_npt_enabled(svm), from_vmrun);  	if (ret)  		return ret; @@ -678,10 +696,11 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)  	if (WARN_ON_ONCE(!svm->nested.initialized))  		return -EINVAL; -	nested_load_control_from_vmcb12(svm, &vmcb12->control); +	nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); +	nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); -	if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) || -	    !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) { +	if (!nested_vmcb_check_save(vcpu) || +	    !nested_vmcb_check_controls(vcpu)) {  		vmcb12->control.exit_code    = SVM_EXIT_ERR;  		vmcb12->control.exit_code_hi = 0;  		vmcb12->control.exit_info_1  = 0; @@ -964,9 +983,9 @@ void svm_free_nested(struct vcpu_svm *svm)  /*   * Forcibly leave nested mode in order to be able to reset the VCPU later on.   */ -void svm_leave_nested(struct vcpu_svm *svm) +void svm_leave_nested(struct kvm_vcpu *vcpu)  { -	struct kvm_vcpu *vcpu = &svm->vcpu; +	struct vcpu_svm *svm = to_svm(vcpu);  	if (is_guest_mode(vcpu)) {  		svm->nested.nested_run_pending = 0; @@ -988,7 +1007,7 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)  	u32 offset, msr, value;  	int write, mask; -	if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) +	if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))  		return NESTED_EXIT_HOST;  	msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX]; @@ -1015,7 +1034,7 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)  	u8 start_bit;  	u64 gpa; -	if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT))) +	if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))  		return NESTED_EXIT_HOST;  	port = svm->vmcb->control.exit_info_1 >> 16; @@ -1046,12 +1065,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm)  		vmexit = nested_svm_intercept_ioio(svm);  		break;  	case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { -		if (vmcb_is_intercept(&svm->nested.ctl, exit_code)) +		if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))  			vmexit = NESTED_EXIT_DONE;  		break;  	}  	case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { -		if (vmcb_is_intercept(&svm->nested.ctl, exit_code)) +		if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))  			vmexit = NESTED_EXIT_DONE;  		break;  	} @@ -1069,7 +1088,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)  		break;  	}  	default: { -		if (vmcb_is_intercept(&svm->nested.ctl, exit_code)) +		if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))  			vmexit = NESTED_EXIT_DONE;  	}  	} @@ -1147,7 +1166,7 @@ static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)  static inline bool nested_exit_on_init(struct vcpu_svm *svm)  { -	return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INIT); +	return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);  }  static int svm_check_nested_events(struct kvm_vcpu *vcpu) @@ -1251,11 +1270,47 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)  	svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio);  } +/* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */ +static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst, +					      struct vmcb_ctrl_area_cached *from) +{ +	unsigned int i; + +	memset(dst, 0, sizeof(struct vmcb_control_area)); + +	for (i = 0; i < MAX_INTERCEPT; i++) +		dst->intercepts[i] = from->intercepts[i]; + +	dst->iopm_base_pa         = from->iopm_base_pa; +	dst->msrpm_base_pa        = from->msrpm_base_pa; +	dst->tsc_offset           = from->tsc_offset; +	dst->asid                 = from->asid; +	dst->tlb_ctl              = from->tlb_ctl; +	dst->int_ctl              = from->int_ctl; +	dst->int_vector           = from->int_vector; +	dst->int_state            = from->int_state; +	dst->exit_code            = from->exit_code; +	dst->exit_code_hi         = from->exit_code_hi; +	dst->exit_info_1          = from->exit_info_1; +	dst->exit_info_2          = from->exit_info_2; +	dst->exit_int_info        = from->exit_int_info; +	dst->exit_int_info_err    = from->exit_int_info_err; +	dst->nested_ctl           = from->nested_ctl; +	dst->event_inj            = from->event_inj; +	dst->event_inj_err        = from->event_inj_err; +	dst->nested_cr3           = from->nested_cr3; +	dst->virt_ext              = from->virt_ext; +	dst->pause_filter_count   = from->pause_filter_count; +	dst->pause_filter_thresh  = from->pause_filter_thresh; +} +  static int svm_get_nested_state(struct kvm_vcpu *vcpu,  				struct kvm_nested_state __user *user_kvm_nested_state,  				u32 user_data_size)  {  	struct vcpu_svm *svm; +	struct vmcb_control_area *ctl; +	unsigned long r;  	struct kvm_nested_state kvm_state = {  		.flags = 0,  		.format = KVM_STATE_NESTED_FORMAT_SVM, @@ -1297,9 +1352,18 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu,  	 */  	if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))  		return -EFAULT; -	if (copy_to_user(&user_vmcb->control, &svm->nested.ctl, -			 sizeof(user_vmcb->control))) + +	ctl = kzalloc(sizeof(*ctl), GFP_KERNEL); +	if (!ctl) +		return -ENOMEM; + +	nested_copy_vmcb_cache_to_control(ctl, &svm->nested.ctl); +	r = copy_to_user(&user_vmcb->control, ctl, +			 sizeof(user_vmcb->control)); +	kfree(ctl); +	if (r)  		return -EFAULT; +  	if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,  			 sizeof(user_vmcb->save)))  		return -EFAULT; @@ -1316,6 +1380,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,  		&user_kvm_nested_state->data.svm[0];  	struct vmcb_control_area *ctl;  	struct vmcb_save_area *save; +	struct vmcb_save_area_cached save_cached; +	struct vmcb_ctrl_area_cached ctl_cached;  	unsigned long cr0;  	int ret; @@ -1345,7 +1411,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,  		return -EINVAL;  	if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { -		svm_leave_nested(svm); +		svm_leave_nested(vcpu);  		svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));  		return 0;  	} @@ -1368,7 +1434,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,  		goto out_free;  	ret = -EINVAL; -	if (!nested_vmcb_check_controls(vcpu, ctl)) +	__nested_copy_vmcb_control_to_cache(&ctl_cached, ctl); +	if (!__nested_vmcb_check_controls(vcpu, &ctl_cached))  		goto out_free;  	/* @@ -1383,10 +1450,11 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,  	 * Validate host state saved from before VMRUN (see  	 * nested_svm_check_permissions).  	 */ +	__nested_copy_vmcb_save_to_cache(&save_cached, save);  	if (!(save->cr0 & X86_CR0_PG) ||  	    !(save->cr0 & X86_CR0_PE) ||  	    (save->rflags & X86_EFLAGS_VM) || -	    !nested_vmcb_valid_sregs(vcpu, save)) +	    !__nested_vmcb_check_save(vcpu, &save_cached))  		goto out_free;  	/* @@ -1410,7 +1478,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,  	 */  	if (is_guest_mode(vcpu)) -		svm_leave_nested(svm); +		svm_leave_nested(vcpu);  	else  		svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; @@ -1422,7 +1490,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,  	svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;  	svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save); -	nested_load_control_from_vmcb12(svm, ctl); +	nested_copy_vmcb_control_to_cache(svm, ctl);  	svm_switch_vmcb(svm, &svm->nested.vmcb02);  	nested_vmcb02_prepare_control(svm); @@ -1449,7 +1517,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)  		 * the guest CR3 might be restored prior to setting the nested  		 * state which can lead to a load of wrong PDPTRs.  		 */ -		if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3))) +		if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))  			return false;  	if (!nested_svm_vmrun_msrpm(svm)) { @@ -1464,6 +1532,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)  }  struct kvm_x86_nested_ops svm_nested_ops = { +	.leave_nested = svm_leave_nested,  	.check_events = svm_check_nested_events,  	.triple_fault = nested_svm_triple_fault,  	.get_nested_state_pages = svm_get_nested_state_pages, |