diff options
Diffstat (limited to 'arch/x86/kvm')
| -rw-r--r-- | arch/x86/kvm/Kconfig | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/Makefile | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/cpuid.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/emulate.c | 17 | ||||
| -rw-r--r-- | arch/x86/kvm/i8254.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/ioapic.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/kvm_cache_regs.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.c | 5 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu.c | 17 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/mmutrace.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/pmu.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/trace.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/tss.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx.c | 253 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.h | 1 | 
19 files changed, 166 insertions, 145 deletions
| diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 3ea624452f93..3df51c287844 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0  #  # KVM configuration  # @@ -23,6 +24,7 @@ config KVM  	depends on HIGH_RES_TIMERS  	# for TASKSTATS/TASK_DELAY_ACCT:  	depends on NET && MULTIUSER +	depends on X86_LOCAL_APIC  	select PREEMPT_NOTIFIERS  	select MMU_NOTIFIER  	select ANON_INODES diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 09d4b17be022..dc4f2fdf5e57 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0  ccflags-y += -Iarch/x86/kvm diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 0bc5c1315708..cdc70a3a6583 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef ARCH_X86_KVM_CPUID_H  #define ARCH_X86_KVM_CPUID_H diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 16bf6655aa85..d90cdc77e077 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -425,8 +425,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));  	#op " %al \n\t" \  	FOP_RET -asm(".global kvm_fastop_exception \n" -    "kvm_fastop_exception: xor %esi, %esi; ret"); +asm(".pushsection .fixup, \"ax\"\n" +    ".global kvm_fastop_exception \n" +    "kvm_fastop_exception: xor %esi, %esi; ret\n" +    ".popsection");  FOP_START(setcc)  FOP_SETCC(seto) @@ -4102,10 +4104,12 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)  		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);  		if (efer & EFER_LMA) {  			u64 maxphyaddr; -			u32 eax = 0x80000008; +			u32 eax, ebx, ecx, edx; -			if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, -						 NULL, false)) +			eax = 0x80000008; +			ecx = 0; +			if (ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, +						 &edx, false))  				maxphyaddr = eax & 0xff;  			else  				maxphyaddr = 36; @@ -5296,7 +5300,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,  static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))  { -	register void *__sp asm(_ASM_SP);  	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;  	if (!(ctxt->d & ByteOp)) @@ -5304,7 +5307,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))  	asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"  	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), -	      [fastop]"+S"(fop), "+r"(__sp) +	      [fastop]"+S"(fop), ASM_CALL_CONSTRAINT  	    : "c"(ctxt->src2.val));  	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 600bee9dcbbd..394d9527da7e 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef __I8254_H  #define __I8254_H diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 29ce19732ccf..ea1a4e0297da 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef __KVM_IO_APIC_H  #define __KVM_IO_APIC_H diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 9add410f195f..f500293dad8d 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef ASM_KVM_CACHE_REGS_H  #define ASM_KVM_CACHE_REGS_H diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 69c5612be786..36c90d631096 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1992,6 +1992,11 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)  				vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);  	vcpu->arch.pv_eoi.msr_val = 0;  	apic_update_ppr(apic); +	if (vcpu->arch.apicv_active) { +		kvm_x86_ops->apicv_post_state_restore(vcpu); +		kvm_x86_ops->hwapic_irr_update(vcpu, -1); +		kvm_x86_ops->hwapic_isr_update(vcpu, -1); +	}  	vcpu->arch.apic_arb_prio = 0;  	vcpu->arch.apic_attention = 0; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 215721e1426a..4b9935a38347 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef __KVM_X86_LAPIC_H  #define __KVM_X86_LAPIC_H diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eca30c1eb1d9..7a69cf053711 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3837,7 +3837,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,  	case KVM_PV_REASON_PAGE_NOT_PRESENT:  		vcpu->arch.apf.host_apf_reason = 0;  		local_irq_disable(); -		kvm_async_pf_task_wait(fault_address); +		kvm_async_pf_task_wait(fault_address, 0);  		local_irq_enable();  		break;  	case KVM_PV_REASON_PAGE_READY: @@ -3974,19 +3974,19 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu,  				unsigned level, unsigned gpte)  {  	/* -	 * PT_PAGE_TABLE_LEVEL always terminates.  The RHS has bit 7 set -	 * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means -	 * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. -	 */ -	gpte |= level - PT_PAGE_TABLE_LEVEL - 1; - -	/*  	 * The RHS has bit 7 set iff level < mmu->last_nonleaf_level.  	 * If it is clear, there are no large pages at this level, so clear  	 * PT_PAGE_SIZE_MASK in gpte if that is the case.  	 */  	gpte &= level - mmu->last_nonleaf_level; +	/* +	 * PT_PAGE_TABLE_LEVEL always terminates.  The RHS has bit 7 set +	 * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means +	 * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. +	 */ +	gpte |= level - PT_PAGE_TABLE_LEVEL - 1; +  	return gpte & PT_PAGE_SIZE_MASK;  } @@ -4555,6 +4555,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,  	update_permission_bitmask(vcpu, context, true);  	update_pkru_bitmask(vcpu, context, true); +	update_last_nonleaf_level(vcpu, context);  	reset_rsvds_bits_mask_ept(vcpu, context, execonly);  	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);  } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 64a2dbd2b1af..efc857615d8e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef __KVM_X86_MMU_H  #define __KVM_X86_MMU_H diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 8b97a6cba8d1..c73bf4e4988c 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #if !defined(_TRACE_KVMMMU_H) || defined(TRACE_HEADER_MULTI_READ)  #define _TRACE_KVMMMU_H diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 86b68dc5a649..f18d1f8d332b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -334,10 +334,11 @@ retry_walk:  		--walker->level;  		index = PT_INDEX(addr, walker->level); -  		table_gfn = gpte_to_gfn(pte);  		offset    = index * sizeof(pt_element_t);  		pte_gpa   = gfn_to_gpa(table_gfn) + offset; + +		BUG_ON(walker->level < 1);  		walker->table_gfn[walker->level - 1] = table_gfn;  		walker->pte_gpa[walker->level - 1] = pte_gpa; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index f96e1f962587..a9a62b9a73e2 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef __KVM_X86_PMU_H  #define __KVM_X86_PMU_H diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 8a202c49e2a0..9807c314c478 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)  #define _TRACE_KVM_H diff --git a/arch/x86/kvm/tss.h b/arch/x86/kvm/tss.h index 622aa10f692f..3f9150125e70 100644 --- a/arch/x86/kvm/tss.h +++ b/arch/x86/kvm/tss.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef __TSS_SEGMENT_H  #define __TSS_SEGMENT_H diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 06c0c6d0541e..a6f4f095f8f4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -200,6 +200,8 @@ struct loaded_vmcs {  	int cpu;  	bool launched;  	bool nmi_known_unmasked; +	unsigned long vmcs_host_cr3;	/* May not match real cr3 */ +	unsigned long vmcs_host_cr4;	/* May not match real cr4 */  	struct list_head loaded_vmcss_on_cpu_link;  }; @@ -600,8 +602,6 @@ struct vcpu_vmx {  		int           gs_ldt_reload_needed;  		int           fs_reload_needed;  		u64           msr_host_bndcfgs; -		unsigned long vmcs_host_cr3;	/* May not match real cr3 */ -		unsigned long vmcs_host_cr4;	/* May not match real cr4 */  	} host_state;  	struct {  		int vm86_active; @@ -2202,46 +2202,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)  	struct pi_desc old, new;  	unsigned int dest; -	if (!kvm_arch_has_assigned_device(vcpu->kvm) || -		!irq_remapping_cap(IRQ_POSTING_CAP)  || -		!kvm_vcpu_apicv_active(vcpu)) +	/* +	 * In case of hot-plug or hot-unplug, we may have to undo +	 * vmx_vcpu_pi_put even if there is no assigned device.  And we +	 * always keep PI.NDST up to date for simplicity: it makes the +	 * code easier, and CPU migration is not a fast path. +	 */ +	if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) +		return; + +	/* +	 * First handle the simple case where no cmpxchg is necessary; just +	 * allow posting non-urgent interrupts. +	 * +	 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change +	 * PI.NDST: pi_post_block will do it for us and the wakeup_handler +	 * expects the VCPU to be on the blocked_vcpu_list that matches +	 * PI.NDST. +	 */ +	if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || +	    vcpu->cpu == cpu) { +		pi_clear_sn(pi_desc);  		return; +	} +	/* The full case.  */  	do {  		old.control = new.control = pi_desc->control; -		/* -		 * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there -		 * are two possible cases: -		 * 1. After running 'pre_block', context switch -		 *    happened. For this case, 'sn' was set in -		 *    vmx_vcpu_put(), so we need to clear it here. -		 * 2. After running 'pre_block', we were blocked, -		 *    and woken up by some other guy. For this case, -		 *    we don't need to do anything, 'pi_post_block' -		 *    will do everything for us. However, we cannot -		 *    check whether it is case #1 or case #2 here -		 *    (maybe, not needed), so we also clear sn here, -		 *    I think it is not a big deal. -		 */ -		if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) { -			if (vcpu->cpu != cpu) { -				dest = cpu_physical_id(cpu); - -				if (x2apic_enabled()) -					new.ndst = dest; -				else -					new.ndst = (dest << 8) & 0xFF00; -			} +		dest = cpu_physical_id(cpu); -			/* set 'NV' to 'notification vector' */ -			new.nv = POSTED_INTR_VECTOR; -		} +		if (x2apic_enabled()) +			new.ndst = dest; +		else +			new.ndst = (dest << 8) & 0xFF00; -		/* Allow posting non-urgent interrupts */  		new.sn = 0; -	} while (cmpxchg(&pi_desc->control, old.control, -			new.control) != old.control); +	} while (cmpxchg64(&pi_desc->control, old.control, +			   new.control) != old.control);  }  static void decache_tsc_multiplier(struct vcpu_vmx *vmx) @@ -5077,21 +5075,30 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,  	int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;  	if (vcpu->mode == IN_GUEST_MODE) { -		struct vcpu_vmx *vmx = to_vmx(vcpu); -  		/* -		 * Currently, we don't support urgent interrupt, -		 * all interrupts are recognized as non-urgent -		 * interrupt, so we cannot post interrupts when -		 * 'SN' is set. +		 * The vector of interrupt to be delivered to vcpu had +		 * been set in PIR before this function. +		 * +		 * Following cases will be reached in this block, and +		 * we always send a notification event in all cases as +		 * explained below. +		 * +		 * Case 1: vcpu keeps in non-root mode. Sending a +		 * notification event posts the interrupt to vcpu.  		 * -		 * If the vcpu is in guest mode, it means it is -		 * running instead of being scheduled out and -		 * waiting in the run queue, and that's the only -		 * case when 'SN' is set currently, warning if -		 * 'SN' is set. +		 * Case 2: vcpu exits to root mode and is still +		 * runnable. PIR will be synced to vIRR before the +		 * next vcpu entry. Sending a notification event in +		 * this case has no effect, as vcpu is not in root +		 * mode. +		 * +		 * Case 3: vcpu exits to root mode and is blocked. +		 * vcpu_block() has already synced PIR to vIRR and +		 * never blocks vcpu if vIRR is not cleared. Therefore, +		 * a blocked vcpu here does not wait for any requested +		 * interrupts in PIR, and sending a notification event +		 * which has no effect is safe here.  		 */ -		WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));  		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);  		return true; @@ -5169,12 +5176,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)  	 */  	cr3 = __read_cr3();  	vmcs_writel(HOST_CR3, cr3);		/* 22.2.3  FIXME: shadow tables */ -	vmx->host_state.vmcs_host_cr3 = cr3; +	vmx->loaded_vmcs->vmcs_host_cr3 = cr3;  	/* Save the most likely value for this task's CR4 in the VMCS. */  	cr4 = cr4_read_shadow();  	vmcs_writel(HOST_CR4, cr4);			/* 22.2.3, 22.2.5 */ -	vmx->host_state.vmcs_host_cr4 = cr4; +	vmx->loaded_vmcs->vmcs_host_cr4 = cr4;  	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */  #ifdef CONFIG_X86_64 @@ -5612,9 +5619,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)  	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); -	if (kvm_vcpu_apicv_active(vcpu)) -		memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); -  	if (vmx->vpid != 0)  		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); @@ -9036,7 +9040,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)  static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)  {  	u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); -	register void *__sp asm(_ASM_SP);  	if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))  			== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { @@ -9065,7 +9068,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)  #ifdef CONFIG_X86_64  			[sp]"=&r"(tmp),  #endif -			"+r"(__sp) +			ASM_CALL_CONSTRAINT  			:  			[entry]"r"(entry),  			[ss]"i"(__KERNEL_DS), @@ -9265,15 +9268,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)  		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);  	cr3 = __get_current_cr3_fast(); -	if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { +	if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {  		vmcs_writel(HOST_CR3, cr3); -		vmx->host_state.vmcs_host_cr3 = cr3; +		vmx->loaded_vmcs->vmcs_host_cr3 = cr3;  	}  	cr4 = cr4_read_shadow(); -	if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { +	if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {  		vmcs_writel(HOST_CR4, cr4); -		vmx->host_state.vmcs_host_cr4 = cr4; +		vmx->loaded_vmcs->vmcs_host_cr4 = cr4;  	}  	/* When single-stepping over STI and MOV SS, we must clear the @@ -9583,6 +9586,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)  	vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; +	/* +	 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR +	 * or POSTED_INTR_WAKEUP_VECTOR. +	 */ +	vmx->pi_desc.nv = POSTED_INTR_VECTOR; +	vmx->pi_desc.sn = 1; +  	return &vmx->vcpu;  free_vmcs: @@ -9831,7 +9841,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,  	WARN_ON(!is_guest_mode(vcpu)); -	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { +	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) && +		!to_vmx(vcpu)->nested.nested_run_pending) {  		vmcs12->vm_exit_intr_error_code = fault->error_code;  		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,  				  PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | @@ -11283,7 +11294,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,  	/* Same as above - no reason to call set_cr4_guest_host_mask().  */  	vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); -	kvm_set_cr4(vcpu, vmcs12->host_cr4); +	vmx_set_cr4(vcpu, vmcs12->host_cr4);  	nested_ept_uninit_mmu_context(vcpu); @@ -11696,6 +11707,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,  	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);  } +static void __pi_post_block(struct kvm_vcpu *vcpu) +{ +	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); +	struct pi_desc old, new; +	unsigned int dest; + +	do { +		old.control = new.control = pi_desc->control; +		WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, +		     "Wakeup handler not enabled while the VCPU is blocked\n"); + +		dest = cpu_physical_id(vcpu->cpu); + +		if (x2apic_enabled()) +			new.ndst = dest; +		else +			new.ndst = (dest << 8) & 0xFF00; + +		/* set 'NV' to 'notification vector' */ +		new.nv = POSTED_INTR_VECTOR; +	} while (cmpxchg64(&pi_desc->control, old.control, +			   new.control) != old.control); + +	if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { +		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); +		list_del(&vcpu->blocked_vcpu_list); +		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); +		vcpu->pre_pcpu = -1; +	} +} +  /*   * This routine does the following things for vCPU which is going   * to be blocked if VT-d PI is enabled. @@ -11711,7 +11753,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,   */  static int pi_pre_block(struct kvm_vcpu *vcpu)  { -	unsigned long flags;  	unsigned int dest;  	struct pi_desc old, new;  	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); @@ -11721,34 +11762,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)  		!kvm_vcpu_apicv_active(vcpu))  		return 0; -	vcpu->pre_pcpu = vcpu->cpu; -	spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, -			  vcpu->pre_pcpu), flags); -	list_add_tail(&vcpu->blocked_vcpu_list, -		      &per_cpu(blocked_vcpu_on_cpu, -		      vcpu->pre_pcpu)); -	spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, -			       vcpu->pre_pcpu), flags); +	WARN_ON(irqs_disabled()); +	local_irq_disable(); +	if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { +		vcpu->pre_pcpu = vcpu->cpu; +		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); +		list_add_tail(&vcpu->blocked_vcpu_list, +			      &per_cpu(blocked_vcpu_on_cpu, +				       vcpu->pre_pcpu)); +		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); +	}  	do {  		old.control = new.control = pi_desc->control; -		/* -		 * We should not block the vCPU if -		 * an interrupt is posted for it. -		 */ -		if (pi_test_on(pi_desc) == 1) { -			spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, -					  vcpu->pre_pcpu), flags); -			list_del(&vcpu->blocked_vcpu_list); -			spin_unlock_irqrestore( -					&per_cpu(blocked_vcpu_on_cpu_lock, -					vcpu->pre_pcpu), flags); -			vcpu->pre_pcpu = -1; - -			return 1; -		} -  		WARN((pi_desc->sn == 1),  		     "Warning: SN field of posted-interrupts "  		     "is set before blocking\n"); @@ -11770,10 +11797,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)  		/* set 'NV' to 'wakeup vector' */  		new.nv = POSTED_INTR_WAKEUP_VECTOR; -	} while (cmpxchg(&pi_desc->control, old.control, -			new.control) != old.control); +	} while (cmpxchg64(&pi_desc->control, old.control, +			   new.control) != old.control); -	return 0; +	/* We should not block the vCPU if an interrupt is posted for it.  */ +	if (pi_test_on(pi_desc) == 1) +		__pi_post_block(vcpu); + +	local_irq_enable(); +	return (vcpu->pre_pcpu == -1);  }  static int vmx_pre_block(struct kvm_vcpu *vcpu) @@ -11789,44 +11821,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)  static void pi_post_block(struct kvm_vcpu *vcpu)  { -	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); -	struct pi_desc old, new; -	unsigned int dest; -	unsigned long flags; - -	if (!kvm_arch_has_assigned_device(vcpu->kvm) || -		!irq_remapping_cap(IRQ_POSTING_CAP)  || -		!kvm_vcpu_apicv_active(vcpu)) +	if (vcpu->pre_pcpu == -1)  		return; -	do { -		old.control = new.control = pi_desc->control; - -		dest = cpu_physical_id(vcpu->cpu); - -		if (x2apic_enabled()) -			new.ndst = dest; -		else -			new.ndst = (dest << 8) & 0xFF00; - -		/* Allow posting non-urgent interrupts */ -		new.sn = 0; - -		/* set 'NV' to 'notification vector' */ -		new.nv = POSTED_INTR_VECTOR; -	} while (cmpxchg(&pi_desc->control, old.control, -			new.control) != old.control); - -	if(vcpu->pre_pcpu != -1) { -		spin_lock_irqsave( -			&per_cpu(blocked_vcpu_on_cpu_lock, -			vcpu->pre_pcpu), flags); -		list_del(&vcpu->blocked_vcpu_list); -		spin_unlock_irqrestore( -			&per_cpu(blocked_vcpu_on_cpu_lock, -			vcpu->pre_pcpu), flags); -		vcpu->pre_pcpu = -1; -	} +	WARN_ON(irqs_disabled()); +	local_irq_disable(); +	__pi_post_block(vcpu); +	local_irq_enable();  }  static void vmx_post_block(struct kvm_vcpu *vcpu) @@ -11911,12 +11912,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,  		if (set)  			ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); -		else { -			/* suppress notification event before unposting */ -			pi_set_sn(vcpu_to_pi_desc(vcpu)); +		else  			ret = irq_set_vcpu_affinity(host_irq, NULL); -			pi_clear_sn(vcpu_to_pi_desc(vcpu)); -		}  		if (ret < 0) {  			printk(KERN_INFO "%s: failed to update PI IRTE\n", diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cd17b7d9a107..03869eb7fcd6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7225,7 +7225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)  	int r;  	sigset_t sigsaved; -	fpu__activate_curr(fpu); +	fpu__initialize(fpu);  	if (vcpu->sigset_active)  		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 51e349cf5f45..d0b95b7a90b4 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef ARCH_X86_KVM_X86_H  #define ARCH_X86_KVM_X86_H |