diff options
44 files changed, 1314 insertions, 946 deletions
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index 83afe65d4966..22ff659bc0fb 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -43,6 +43,10 @@ KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by || || writing to msr 0x4b564d02 ------------------------------------------------------------------------------ +KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit + || || before enabling paravirtualized + || || spinlock support. +------------------------------------------------------------------------------ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side || || per-cpu warps are expected in || || kvmclock. diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index ea113b5d87a4..022198e389d7 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt @@ -64,3 +64,17 @@ Purpose: To enable communication between the hypervisor and guest there is a shared page that contains parts of supervisor visible register state. The guest can map this shared page to access its supervisor register through memory using this hypercall. + +5. KVM_HC_KICK_CPU +------------------------ +Architecture: x86 +Status: active +Purpose: Hypercall used to wakeup a vcpu from HLT state +Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest +kernel mode for an event to occur (ex: a spinlock to become available) can +execute HLT instruction once it has busy-waited for more than a threshold +time-interval. Execution of HLT instruction would cause the hypervisor to put +the vcpu to sleep until occurence of an appropriate event. Another vcpu of the +same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall, +specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0) +is used in the hypercall for future use. diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h index 3ed37b4d93da..e072bb2ba1b1 100644 --- a/arch/arm/include/asm/dma-contiguous.h +++ b/arch/arm/include/asm/dma-contiguous.h @@ -2,7 +2,7 @@ #define ASMARM_DMA_CONTIGUOUS_H #ifdef __KERNEL__ -#ifdef CONFIG_CMA +#ifdef CONFIG_DMA_CMA #include <linux/types.h> #include <asm-generic/dma-contiguous.h> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7f9b1798c6cf..dbddc07a3bbd 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -358,7 +358,7 @@ static int __init atomic_pool_init(void) if (!pages) goto no_pages; - if (IS_ENABLED(CONFIG_CMA)) + if (IS_ENABLED(CONFIG_DMA_CMA)) ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page, atomic_pool_init); else @@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, addr = __alloc_simple_buffer(dev, size, gfp, &page); else if (!(gfp & __GFP_WAIT)) addr = __alloc_from_pool(size, &page); - else if (!IS_ENABLED(CONFIG_CMA)) + else if (!IS_ENABLED(CONFIG_DMA_CMA)) addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); else addr = __alloc_from_contiguous(dev, size, prot, &page, caller); @@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, __dma_free_buffer(page, size); } else if (__free_from_pool(cpu_addr, size)) { return; - } else if (!IS_ENABLED(CONFIG_CMA)) { + } else if (!IS_ENABLED(CONFIG_DMA_CMA)) { __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/kvm_locore.S index dca2aa665993..bbace092ad0a 100644 --- a/arch/mips/kvm/kvm_locore.S +++ b/arch/mips/kvm/kvm_locore.S @@ -1,13 +1,13 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* Main entry point for the guest, exception handling. -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Main entry point for the guest, exception handling. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ #include <asm/asm.h> #include <asm/asmmacro.h> @@ -55,195 +55,193 @@ * a0: run * a1: vcpu */ + .set noreorder + .set noat FEXPORT(__kvm_mips_vcpu_run) - .set push - .set noreorder - .set noat - - /* k0/k1 not being used in host kernel context */ - addiu k1,sp, -PT_SIZE - LONG_S $0, PT_R0(k1) - LONG_S $1, PT_R1(k1) - LONG_S $2, PT_R2(k1) - LONG_S $3, PT_R3(k1) - - LONG_S $4, PT_R4(k1) - LONG_S $5, PT_R5(k1) - LONG_S $6, PT_R6(k1) - LONG_S $7, PT_R7(k1) - - LONG_S $8, PT_R8(k1) - LONG_S $9, PT_R9(k1) - LONG_S $10, PT_R10(k1) - LONG_S $11, PT_R11(k1) - LONG_S $12, PT_R12(k1) - LONG_S $13, PT_R13(k1) - LONG_S $14, PT_R14(k1) - LONG_S $15, PT_R15(k1) - LONG_S $16, PT_R16(k1) - LONG_S $17, PT_R17(k1) - - LONG_S $18, PT_R18(k1) - LONG_S $19, PT_R19(k1) - LONG_S $20, PT_R20(k1) - LONG_S $21, PT_R21(k1) - LONG_S $22, PT_R22(k1) - LONG_S $23, PT_R23(k1) - LONG_S $24, PT_R24(k1) - LONG_S $25, PT_R25(k1) + /* k0/k1 not being used in host kernel context */ + INT_ADDIU k1, sp, -PT_SIZE + LONG_S $0, PT_R0(k1) + LONG_S $1, PT_R1(k1) + LONG_S $2, PT_R2(k1) + LONG_S $3, PT_R3(k1) + + LONG_S $4, PT_R4(k1) + LONG_S $5, PT_R5(k1) + LONG_S $6, PT_R6(k1) + LONG_S $7, PT_R7(k1) + + LONG_S $8, PT_R8(k1) + LONG_S $9, PT_R9(k1) + LONG_S $10, PT_R10(k1) + LONG_S $11, PT_R11(k1) + LONG_S $12, PT_R12(k1) + LONG_S $13, PT_R13(k1) + LONG_S $14, PT_R14(k1) + LONG_S $15, PT_R15(k1) + LONG_S $16, PT_R16(k1) + LONG_S $17, PT_R17(k1) + + LONG_S $18, PT_R18(k1) + LONG_S $19, PT_R19(k1) + LONG_S $20, PT_R20(k1) + LONG_S $21, PT_R21(k1) + LONG_S $22, PT_R22(k1) + LONG_S $23, PT_R23(k1) + LONG_S $24, PT_R24(k1) + LONG_S $25, PT_R25(k1) /* XXXKYMA k0/k1 not saved, not being used if we got here through an ioctl() */ - LONG_S $28, PT_R28(k1) - LONG_S $29, PT_R29(k1) - LONG_S $30, PT_R30(k1) - LONG_S $31, PT_R31(k1) + LONG_S $28, PT_R28(k1) + LONG_S $29, PT_R29(k1) + LONG_S $30, PT_R30(k1) + LONG_S $31, PT_R31(k1) - /* Save hi/lo */ - mflo v0 - LONG_S v0, PT_LO(k1) - mfhi v1 - LONG_S v1, PT_HI(k1) + /* Save hi/lo */ + mflo v0 + LONG_S v0, PT_LO(k1) + mfhi v1 + LONG_S v1, PT_HI(k1) /* Save host status */ - mfc0 v0, CP0_STATUS - LONG_S v0, PT_STATUS(k1) + mfc0 v0, CP0_STATUS + LONG_S v0, PT_STATUS(k1) /* Save host ASID, shove it into the BVADDR location */ - mfc0 v1,CP0_ENTRYHI - andi v1, 0xff - LONG_S v1, PT_HOST_ASID(k1) + mfc0 v1, CP0_ENTRYHI + andi v1, 0xff + LONG_S v1, PT_HOST_ASID(k1) - /* Save DDATA_LO, will be used to store pointer to vcpu */ - mfc0 v1, CP0_DDATA_LO - LONG_S v1, PT_HOST_USERLOCAL(k1) + /* Save DDATA_LO, will be used to store pointer to vcpu */ + mfc0 v1, CP0_DDATA_LO + LONG_S v1, PT_HOST_USERLOCAL(k1) - /* DDATA_LO has pointer to vcpu */ - mtc0 a1,CP0_DDATA_LO + /* DDATA_LO has pointer to vcpu */ + mtc0 a1, CP0_DDATA_LO - /* Offset into vcpu->arch */ - addiu k1, a1, VCPU_HOST_ARCH + /* Offset into vcpu->arch */ + INT_ADDIU k1, a1, VCPU_HOST_ARCH - /* Save the host stack to VCPU, used for exception processing when we exit from the Guest */ - LONG_S sp, VCPU_HOST_STACK(k1) + /* + * Save the host stack to VCPU, used for exception processing + * when we exit from the Guest + */ + LONG_S sp, VCPU_HOST_STACK(k1) - /* Save the kernel gp as well */ - LONG_S gp, VCPU_HOST_GP(k1) + /* Save the kernel gp as well */ + LONG_S gp, VCPU_HOST_GP(k1) /* Setup status register for running the guest in UM, interrupts are disabled */ - li k0,(ST0_EXL | KSU_USER| ST0_BEV) - mtc0 k0,CP0_STATUS - ehb - - /* load up the new EBASE */ - LONG_L k0, VCPU_GUEST_EBASE(k1) - mtc0 k0,CP0_EBASE - - /* Now that the new EBASE has been loaded, unset BEV, set interrupt mask as it was - * but make sure that timer interrupts are enabled - */ - li k0,(ST0_EXL | KSU_USER | ST0_IE) - andi v0, v0, ST0_IM - or k0, k0, v0 - mtc0 k0,CP0_STATUS - ehb + li k0, (ST0_EXL | KSU_USER | ST0_BEV) + mtc0 k0, CP0_STATUS + ehb + + /* load up the new EBASE */ + LONG_L k0, VCPU_GUEST_EBASE(k1) + mtc0 k0, CP0_EBASE + + /* + * Now that the new EBASE has been loaded, unset BEV, set + * interrupt mask as it was but make sure that timer interrupts + * are enabled + */ + li k0, (ST0_EXL | KSU_USER | ST0_IE) + andi v0, v0, ST0_IM + or k0, k0, v0 + mtc0 k0, CP0_STATUS + ehb /* Set Guest EPC */ - LONG_L t0, VCPU_PC(k1) - mtc0 t0, CP0_EPC + LONG_L t0, VCPU_PC(k1) + mtc0 t0, CP0_EPC FEXPORT(__kvm_mips_load_asid) - /* Set the ASID for the Guest Kernel */ - sll t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ - /* addresses shift to 0x80000000 */ - bltz t0, 1f /* If kernel */ - addiu t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ - addiu t1, k1, VCPU_GUEST_USER_ASID /* else user */ + /* Set the ASID for the Guest Kernel */ + INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ + /* addresses shift to 0x80000000 */ + bltz t0, 1f /* If kernel */ + INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ + INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: - /* t1: contains the base of the ASID array, need to get the cpu id */ - LONG_L t2, TI_CPU($28) /* smp_processor_id */ - sll t2, t2, 2 /* x4 */ - addu t3, t1, t2 - LONG_L k0, (t3) - andi k0, k0, 0xff - mtc0 k0,CP0_ENTRYHI - ehb - - /* Disable RDHWR access */ - mtc0 zero, CP0_HWRENA - - /* Now load up the Guest Context from VCPU */ - LONG_L $1, VCPU_R1(k1) - LONG_L $2, VCPU_R2(k1) - LONG_L $3, VCPU_R3(k1) - - LONG_L $4, VCPU_R4(k1) - LONG_L $5, VCPU_R5(k1) - LONG_L $6, VCPU_R6(k1) - LONG_L $7, VCPU_R7(k1) - - LONG_L $8, VCPU_R8(k1) - LONG_L $9, VCPU_R9(k1) - LONG_L $10, VCPU_R10(k1) - LONG_L $11, VCPU_R11(k1) - LONG_L $12, VCPU_R12(k1) - LONG_L $13, VCPU_R13(k1) - LONG_L $14, VCPU_R14(k1) - LONG_L $15, VCPU_R15(k1) - LONG_L $16, VCPU_R16(k1) - LONG_L $17, VCPU_R17(k1) - LONG_L $18, VCPU_R18(k1) - LONG_L $19, VCPU_R19(k1) - LONG_L $20, VCPU_R20(k1) - LONG_L $21, VCPU_R21(k1) - LONG_L $22, VCPU_R22(k1) - LONG_L $23, VCPU_R23(k1) - LONG_L $24, VCPU_R24(k1) - LONG_L $25, VCPU_R25(k1) - - /* k0/k1 loaded up later */ - - LONG_L $28, VCPU_R28(k1) - LONG_L $29, VCPU_R29(k1) - LONG_L $30, VCPU_R30(k1) - LONG_L $31, VCPU_R31(k1) - - /* Restore hi/lo */ - LONG_L k0, VCPU_LO(k1) - mtlo k0 - - LONG_L k0, VCPU_HI(k1) - mthi k0 + /* t1: contains the base of the ASID array, need to get the cpu id */ + LONG_L t2, TI_CPU($28) /* smp_processor_id */ + INT_SLL t2, t2, 2 /* x4 */ + REG_ADDU t3, t1, t2 + LONG_L k0, (t3) + andi k0, k0, 0xff + mtc0 k0, CP0_ENTRYHI + ehb + + /* Disable RDHWR access */ + mtc0 zero, CP0_HWRENA + + /* Now load up the Guest Context from VCPU */ + LONG_L $1, VCPU_R1(k1) + LONG_L $2, VCPU_R2(k1) + LONG_L $3, VCPU_R3(k1) + + LONG_L $4, VCPU_R4(k1) + LONG_L $5, VCPU_R5(k1) + LONG_L $6, VCPU_R6(k1) + LONG_L $7, VCPU_R7(k1) + + LONG_L $8, VCPU_R8(k1) + LONG_L $9, VCPU_R9(k1) + LONG_L $10, VCPU_R10(k1) + LONG_L $11, VCPU_R11(k1) + LONG_L $12, VCPU_R12(k1) + LONG_L $13, VCPU_R13(k1) + LONG_L $14, VCPU_R14(k1) + LONG_L $15, VCPU_R15(k1) + LONG_L $16, VCPU_R16(k1) + LONG_L $17, VCPU_R17(k1) + LONG_L $18, VCPU_R18(k1) + LONG_L $19, VCPU_R19(k1) + LONG_L $20, VCPU_R20(k1) + LONG_L $21, VCPU_R21(k1) + LONG_L $22, VCPU_R22(k1) + LONG_L $23, VCPU_R23(k1) + LONG_L $24, VCPU_R24(k1) + LONG_L $25, VCPU_R25(k1) + + /* k0/k1 loaded up later */ + + LONG_L $28, VCPU_R28(k1) + LONG_L $29, VCPU_R29(k1) + LONG_L $30, VCPU_R30(k1) + LONG_L $31, VCPU_R31(k1) + + /* Restore hi/lo */ + LONG_L k0, VCPU_LO(k1) + mtlo k0 + + LONG_L k0, VCPU_HI(k1) + mthi k0 FEXPORT(__kvm_mips_load_k0k1) /* Restore the guest's k0/k1 registers */ - LONG_L k0, VCPU_R26(k1) - LONG_L k1, VCPU_R27(k1) + LONG_L k0, VCPU_R26(k1) + LONG_L k1, VCPU_R27(k1) - /* Jump to guest */ + /* Jump to guest */ eret - .set pop VECTOR(MIPSX(exception), unknown) /* * Find out what mode we came from and jump to the proper handler. */ - .set push - .set noat - .set noreorder - mtc0 k0, CP0_ERROREPC #01: Save guest k0 - ehb #02: - - mfc0 k0, CP0_EBASE #02: Get EBASE - srl k0, k0, 10 #03: Get rid of CPUNum - sll k0, k0, 10 #04 - LONG_S k1, 0x3000(k0) #05: Save k1 @ offset 0x3000 - addiu k0, k0, 0x2000 #06: Exception handler is installed @ offset 0x2000 - j k0 #07: jump to the function - nop #08: branch delay slot - .set push + mtc0 k0, CP0_ERROREPC #01: Save guest k0 + ehb #02: + + mfc0 k0, CP0_EBASE #02: Get EBASE + INT_SRL k0, k0, 10 #03: Get rid of CPUNum + INT_SLL k0, k0, 10 #04 + LONG_S k1, 0x3000(k0) #05: Save k1 @ offset 0x3000 + INT_ADDIU k0, k0, 0x2000 #06: Exception handler is installed @ offset 0x2000 + j k0 #07: jump to the function + nop #08: branch delay slot VECTOR_END(MIPSX(exceptionEnd)) .end MIPSX(exception) @@ -253,329 +251,327 @@ VECTOR_END(MIPSX(exceptionEnd)) * */ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) - .set push - .set noat - .set noreorder - - /* Get the VCPU pointer from DDTATA_LO */ - mfc0 k1, CP0_DDATA_LO - addiu k1, k1, VCPU_HOST_ARCH - - /* Start saving Guest context to VCPU */ - LONG_S $0, VCPU_R0(k1) - LONG_S $1, VCPU_R1(k1) - LONG_S $2, VCPU_R2(k1) - LONG_S $3, VCPU_R3(k1) - LONG_S $4, VCPU_R4(k1) - LONG_S $5, VCPU_R5(k1) - LONG_S $6, VCPU_R6(k1) - LONG_S $7, VCPU_R7(k1) - LONG_S $8, VCPU_R8(k1) - LONG_S $9, VCPU_R9(k1) - LONG_S $10, VCPU_R10(k1) - LONG_S $11, VCPU_R11(k1) - LONG_S $12, VCPU_R12(k1) - LONG_S $13, VCPU_R13(k1) - LONG_S $14, VCPU_R14(k1) - LONG_S $15, VCPU_R15(k1) - LONG_S $16, VCPU_R16(k1) - LONG_S $17,VCPU_R17(k1) - LONG_S $18, VCPU_R18(k1) - LONG_S $19, VCPU_R19(k1) - LONG_S $20, VCPU_R20(k1) - LONG_S $21, VCPU_R21(k1) - LONG_S $22, VCPU_R22(k1) - LONG_S $23, VCPU_R23(k1) - LONG_S $24, VCPU_R24(k1) - LONG_S $25, VCPU_R25(k1) - - /* Guest k0/k1 saved later */ - - LONG_S $28, VCPU_R28(k1) - LONG_S $29, VCPU_R29(k1) - LONG_S $30, VCPU_R30(k1) - LONG_S $31, VCPU_R31(k1) - - /* We need to save hi/lo and restore them on - * the way out - */ - mfhi t0 - LONG_S t0, VCPU_HI(k1) - - mflo t0 - LONG_S t0, VCPU_LO(k1) - - /* Finally save guest k0/k1 to VCPU */ - mfc0 t0, CP0_ERROREPC - LONG_S t0, VCPU_R26(k1) - - /* Get GUEST k1 and save it in VCPU */ - la t1, ~0x2ff - mfc0 t0, CP0_EBASE - and t0, t0, t1 - LONG_L t0, 0x3000(t0) - LONG_S t0, VCPU_R27(k1) - - /* Now that context has been saved, we can use other registers */ - - /* Restore vcpu */ - mfc0 a1, CP0_DDATA_LO - move s1, a1 - - /* Restore run (vcpu->run) */ - LONG_L a0, VCPU_RUN(a1) - /* Save pointer to run in s0, will be saved by the compiler */ - move s0, a0 - - - /* Save Host level EPC, BadVaddr and Cause to VCPU, useful to process the exception */ - mfc0 k0,CP0_EPC - LONG_S k0, VCPU_PC(k1) - - mfc0 k0, CP0_BADVADDR - LONG_S k0, VCPU_HOST_CP0_BADVADDR(k1) - - mfc0 k0, CP0_CAUSE - LONG_S k0, VCPU_HOST_CP0_CAUSE(k1) - - mfc0 k0, CP0_ENTRYHI - LONG_S k0, VCPU_HOST_ENTRYHI(k1) - - /* Now restore the host state just enough to run the handlers */ - - /* Swtich EBASE to the one used by Linux */ - /* load up the host EBASE */ - mfc0 v0, CP0_STATUS - - .set at - or k0, v0, ST0_BEV - .set noat - - mtc0 k0, CP0_STATUS - ehb - - LONG_L k0, VCPU_HOST_EBASE(k1) - mtc0 k0,CP0_EBASE - - - /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ - .set at - and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) - or v0, v0, ST0_CU0 - .set noat - mtc0 v0, CP0_STATUS - ehb - - /* Load up host GP */ - LONG_L gp, VCPU_HOST_GP(k1) - - /* Need a stack before we can jump to "C" */ - LONG_L sp, VCPU_HOST_STACK(k1) - - /* Saved host state */ - addiu sp,sp, -PT_SIZE + /* Get the VCPU pointer from DDTATA_LO */ + mfc0 k1, CP0_DDATA_LO + INT_ADDIU k1, k1, VCPU_HOST_ARCH + + /* Start saving Guest context to VCPU */ + LONG_S $0, VCPU_R0(k1) + LONG_S $1, VCPU_R1(k1) + LONG_S $2, VCPU_R2(k1) + LONG_S $3, VCPU_R3(k1) + LONG_S $4, VCPU_R4(k1) + LONG_S $5, VCPU_R5(k1) + LONG_S $6, VCPU_R6(k1) + LONG_S $7, VCPU_R7(k1) + LONG_S $8, VCPU_R8(k1) + LONG_S $9, VCPU_R9(k1) + LONG_S $10, VCPU_R10(k1) + LONG_S $11, VCPU_R11(k1) + LONG_S $12, VCPU_R12(k1) + LONG_S $13, VCPU_R13(k1) + LONG_S $14, VCPU_R14(k1) + LONG_S $15, VCPU_R15(k1) + LONG_S $16, VCPU_R16(k1) + LONG_S $17, VCPU_R17(k1) + LONG_S $18, VCPU_R18(k1) + LONG_S $19, VCPU_R19(k1) + LONG_S $20, VCPU_R20(k1) + LONG_S $21, VCPU_R21(k1) + LONG_S $22, VCPU_R22(k1) + LONG_S $23, VCPU_R23(k1) + LONG_S $24, VCPU_R24(k1) + LONG_S $25, VCPU_R25(k1) + + /* Guest k0/k1 saved later */ + + LONG_S $28, VCPU_R28(k1) + LONG_S $29, VCPU_R29(k1) + LONG_S $30, VCPU_R30(k1) + LONG_S $31, VCPU_R31(k1) + + /* We need to save hi/lo and restore them on + * the way out + */ + mfhi t0 + LONG_S t0, VCPU_HI(k1) + + mflo t0 + LONG_S t0, VCPU_LO(k1) + + /* Finally save guest k0/k1 to VCPU */ + mfc0 t0, CP0_ERROREPC + LONG_S t0, VCPU_R26(k1) + + /* Get GUEST k1 and save it in VCPU */ + PTR_LI t1, ~0x2ff + mfc0 t0, CP0_EBASE + and t0, t0, t1 + LONG_L t0, 0x3000(t0) + LONG_S t0, VCPU_R27(k1) + + /* Now that context has been saved, we can use other registers */ + + /* Restore vcpu */ + mfc0 a1, CP0_DDATA_LO + move s1, a1 + + /* Restore run (vcpu->run) */ + LONG_L a0, VCPU_RUN(a1) + /* Save pointer to run in s0, will be saved by the compiler */ + move s0, a0 + + /* Save Host level EPC, BadVaddr and Cause to VCPU, useful to + * process the exception */ + mfc0 k0,CP0_EPC + LONG_S k0, VCPU_PC(k1) + + mfc0 k0, CP0_BADVADDR + LONG_S k0, VCPU_HOST_CP0_BADVADDR(k1) + + mfc0 k0, CP0_CAUSE + LONG_S k0, VCPU_HOST_CP0_CAUSE(k1) + + mfc0 k0, CP0_ENTRYHI + LONG_S k0, VCPU_HOST_ENTRYHI(k1) + + /* Now restore the host state just enough to run the handlers */ + + /* Swtich EBASE to the one used by Linux */ + /* load up the host EBASE */ + mfc0 v0, CP0_STATUS + + .set at + or k0, v0, ST0_BEV + .set noat + + mtc0 k0, CP0_STATUS + ehb + + LONG_L k0, VCPU_HOST_EBASE(k1) + mtc0 k0,CP0_EBASE + - /* XXXKYMA do we need to load the host ASID, maybe not because the - * kernel entries are marked GLOBAL, need to verify - */ + /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ + .set at + and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) + or v0, v0, ST0_CU0 + .set noat + mtc0 v0, CP0_STATUS + ehb + + /* Load up host GP */ + LONG_L gp, VCPU_HOST_GP(k1) + + /* Need a stack before we can jump to "C" */ + LONG_L sp, VCPU_HOST_STACK(k1) + + /* Saved host state */ + INT_ADDIU sp, sp, -PT_SIZE - /* Restore host DDATA_LO */ - LONG_L k0, PT_HOST_USERLOCAL(sp) - mtc0 k0, CP0_DDATA_LO + /* XXXKYMA do we need to load the host ASID, maybe not because the + * kernel entries are marked GLOBAL, need to verify + */ - /* Restore RDHWR access */ - la k0, 0x2000000F - mtc0 k0, CP0_HWRENA + /* Restore host DDATA_LO */ + LONG_L k0, PT_HOST_USERLOCAL(sp) + mtc0 k0, CP0_DDATA_LO - /* Jump to handler */ + /* Restore RDHWR access */ + PTR_LI k0, 0x2000000F + mtc0 k0, CP0_HWRENA + + /* Jump to handler */ FEXPORT(__kvm_mips_jump_to_handler) - /* XXXKYMA: not sure if this is safe, how large is the stack?? */ - /* Now jump to the kvm_mips_handle_exit() to see if we can deal with this in the kernel */ - la t9,kvm_mips_handle_exit - jalr.hb t9 - addiu sp,sp, -CALLFRAME_SIZ /* BD Slot */ - - /* Return from handler Make sure interrupts are disabled */ - di - ehb - - /* XXXKYMA: k0/k1 could have been blown away if we processed an exception - * while we were handling the exception from the guest, reload k1 - */ - move k1, s1 - addiu k1, k1, VCPU_HOST_ARCH - - /* Check return value, should tell us if we are returning to the host (handle I/O etc) - * or resuming the guest - */ - andi t0, v0, RESUME_HOST - bnez t0, __kvm_mips_return_to_host - nop + /* XXXKYMA: not sure if this is safe, how large is the stack?? + * Now jump to the kvm_mips_handle_exit() to see if we can deal + * with this in the kernel */ + PTR_LA t9, kvm_mips_handle_exit + jalr.hb t9 + INT_ADDIU sp, sp, -CALLFRAME_SIZ /* BD Slot */ + + /* Return from handler Make sure interrupts are disabled */ + di + ehb + + /* XXXKYMA: k0/k1 could have been blown away if we processed + * an exception while we were handling the exception from the + * guest, reload k1 + */ + + move k1, s1 + INT_ADDIU k1, k1, VCPU_HOST_ARCH + + /* Check return value, should tell us if we are returning to the + * host (handle I/O etc)or resuming the guest + */ + andi t0, v0, RESUME_HOST + bnez t0, __kvm_mips_return_to_host + nop __kvm_mips_return_to_guest: - /* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */ - mtc0 s1, CP0_DDATA_LO - - /* Load up the Guest EBASE to minimize the window where BEV is set */ - LONG_L t0, VCPU_GUEST_EBASE(k1) - - /* Switch EBASE back to the one used by KVM */ - mfc0 v1, CP0_STATUS - .set at - or k0, v1, ST0_BEV - .set noat - mtc0 k0, CP0_STATUS - ehb - mtc0 t0,CP0_EBASE - - /* Setup status register for running guest in UM */ - .set at - or v1, v1, (ST0_EXL | KSU_USER | ST0_IE) - and v1, v1, ~ST0_CU0 - .set noat - mtc0 v1, CP0_STATUS - ehb + /* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */ + mtc0 s1, CP0_DDATA_LO + /* Load up the Guest EBASE to minimize the window where BEV is set */ + LONG_L t0, VCPU_GUEST_EBASE(k1) + + /* Switch EBASE back to the one used by KVM */ + mfc0 v1, CP0_STATUS + .set at + or k0, v1, ST0_BEV + .set noat + mtc0 k0, CP0_STATUS + ehb + mtc0 t0, CP0_EBASE + + /* Setup status register for running guest in UM */ + .set at + or v1, v1, (ST0_EXL | KSU_USER | ST0_IE) + and v1, v1, ~ST0_CU0 + .set noat + mtc0 v1, CP0_STATUS + ehb /* Set Guest EPC */ - LONG_L t0, VCPU_PC(k1) - mtc0 t0, CP0_EPC - - /* Set the ASID for the Guest Kernel */ - sll t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ - /* addresses shift to 0x80000000 */ - bltz t0, 1f /* If kernel */ - addiu t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ - addiu t1, k1, VCPU_GUEST_USER_ASID /* else user */ + LONG_L t0, VCPU_PC(k1) + mtc0 t0, CP0_EPC + + /* Set the ASID for the Guest Kernel */ + INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ + /* addresses shift to 0x80000000 */ + bltz t0, 1f /* If kernel */ + INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ + INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: - /* t1: contains the base of the ASID array, need to get the cpu id */ - LONG_L t2, TI_CPU($28) /* smp_processor_id */ - sll t2, t2, 2 /* x4 */ - addu t3, t1, t2 - LONG_L k0, (t3) - andi k0, k0, 0xff - mtc0 k0,CP0_ENTRYHI - ehb - - /* Disable RDHWR access */ - mtc0 zero, CP0_HWRENA - - /* load the guest context from VCPU and return */ - LONG_L $0, VCPU_R0(k1) - LONG_L $1, VCPU_R1(k1) - LONG_L $2, VCPU_R2(k1) - LONG_L $3, VCPU_R3(k1) - LONG_L $4, VCPU_R4(k1) - LONG_L $5, VCPU_R5(k1) - LONG_L $6, VCPU_R6(k1) - LONG_L $7, VCPU_R7(k1) - LONG_L $8, VCPU_R8(k1) - LONG_L $9, VCPU_R9(k1) - LONG_L $10, VCPU_R10(k1) - LONG_L $11, VCPU_R11(k1) - LONG_L $12, VCPU_R12(k1) - LONG_L $13, VCPU_R13(k1) - LONG_L $14, VCPU_R14(k1) - LONG_L $15, VCPU_R15(k1) - LONG_L $16, VCPU_R16(k1) - LONG_L $17, VCPU_R17(k1) - LONG_L $18, VCPU_R18(k1) - LONG_L $19, VCPU_R19(k1) - LONG_L $20, VCPU_R20(k1) - LONG_L $21, VCPU_R21(k1) - LONG_L $22, VCPU_R22(k1) - LONG_L $23, VCPU_R23(k1) - LONG_L $24, VCPU_R24(k1) - LONG_L $25, VCPU_R25(k1) - - /* $/k1 loaded later */ - LONG_L $28, VCPU_R28(k1) - LONG_L $29, VCPU_R29(k1) - LONG_L $30, VCPU_R30(k1) - LONG_L $31, VCPU_R31(k1) + /* t1: contains the base of the ASID array, need to get the cpu id */ + LONG_L t2, TI_CPU($28) /* smp_processor_id */ + INT_SLL t2, t2, 2 /* x4 */ + REG_ADDU t3, t1, t2 + LONG_L k0, (t3) + andi k0, k0, 0xff + mtc0 k0,CP0_ENTRYHI + ehb + + /* Disable RDHWR access */ + mtc0 zero, CP0_HWRENA + + /* load the guest context from VCPU and return */ + LONG_L $0, VCPU_R0(k1) + LONG_L $1, VCPU_R1(k1) + LONG_L $2, VCPU_R2(k1) + LONG_L $3, VCPU_R3(k1) + LONG_L $4, VCPU_R4(k1) + LONG_L $5, VCPU_R5(k1) + LONG_L $6, VCPU_R6(k1) + LONG_L $7, VCPU_R7(k1) + LONG_L $8, VCPU_R8(k1) + LONG_L $9, VCPU_R9(k1) + LONG_L $10, VCPU_R10(k1) + LONG_L $11, VCPU_R11(k1) + LONG_L $12, VCPU_R12(k1) + LONG_L $13, VCPU_R13(k1) + LONG_L $14, VCPU_R14(k1) + LONG_L $15, VCPU_R15(k1) + LONG_L $16, VCPU_R16(k1) + LONG_L $17, VCPU_R17(k1) + LONG_L $18, VCPU_R18(k1) + LONG_L $19, VCPU_R19(k1) + LONG_L $20, VCPU_R20(k1) + LONG_L $21, VCPU_R21(k1) + LONG_L $22, VCPU_R22(k1) + LONG_L $23, VCPU_R23(k1) + LONG_L $24, VCPU_R24(k1) + LONG_L $25, VCPU_R25(k1) + + /* $/k1 loaded later */ + LONG_L $28, VCPU_R28(k1) + LONG_L $29, VCPU_R29(k1) + LONG_L $30, VCPU_R30(k1) + LONG_L $31, VCPU_R31(k1) FEXPORT(__kvm_mips_skip_guest_restore) - LONG_L k0, VCPU_HI(k1) - mthi k0 + LONG_L k0, VCPU_HI(k1) + mthi k0 - LONG_L k0, VCPU_LO(k1) - mtlo k0 + LONG_L k0, VCPU_LO(k1) + mtlo k0 - LONG_L k0, VCPU_R26(k1) - LONG_L k1, VCPU_R27(k1) + LONG_L k0, VCPU_R26(k1) + LONG_L k1, VCPU_R27(k1) - eret + eret __kvm_mips_return_to_host: - /* EBASE is already pointing to Linux */ - LONG_L k1, VCPU_HOST_STACK(k1) - addiu k1,k1, -PT_SIZE - - /* Restore host DDATA_LO */ - LONG_L k0, PT_HOST_USERLOCAL(k1) - mtc0 k0, CP0_DDATA_LO - - /* Restore host ASID */ - LONG_L k0, PT_HOST_ASID(sp) - andi k0, 0xff - mtc0 k0,CP0_ENTRYHI - ehb - - /* Load context saved on the host stack */ - LONG_L $0, PT_R0(k1) - LONG_L $1, PT_R1(k1) - - /* r2/v0 is the return code, shift it down by 2 (arithmetic) to recover the err code */ - sra k0, v0, 2 - move $2, k0 - - LONG_L $3, PT_R3(k1) - LONG_L $4, PT_R4(k1) - LONG_L $5, PT_R5(k1) - LONG_L $6, PT_R6(k1) - LONG_L $7, PT_R7(k1) - LONG_L $8, PT_R8(k1) - LONG_L $9, PT_R9(k1) - LONG_L $10, PT_R10(k1) - LONG_L $11, PT_R11(k1) - LONG_L $12, PT_R12(k1) - LONG_L $13, PT_R13(k1) - LONG_L $14, PT_R14(k1) - LONG_L $15, PT_R15(k1) - LONG_L $16, PT_R16(k1) - LONG_L $17, PT_R17(k1) - LONG_L $18, PT_R18(k1) - LONG_L $19, PT_R19(k1) - LONG_L $20, PT_R20(k1) - LONG_L $21, PT_R21(k1) - LONG_L $22, PT_R22(k1) - LONG_L $23, PT_R23(k1) - LONG_L $24, PT_R24(k1) - LONG_L $25, PT_R25(k1) - - /* Host k0/k1 were not saved */ - - LONG_L $28, PT_R28(k1) - LONG_L $29, PT_R29(k1) - LONG_L $30, PT_R30(k1) - - LONG_L k0, PT_HI(k1) - mthi k0 - - LONG_L k0, PT_LO(k1) - mtlo k0 - - /* Restore RDHWR access */ - la k0, 0x2000000F - mtc0 k0, CP0_HWRENA - - - /* Restore RA, which is the address we will return to */ - LONG_L ra, PT_R31(k1) - j ra - nop - - .set pop + /* EBASE is already pointing to Linux */ + LONG_L k1, VCPU_HOST_STACK(k1) + INT_ADDIU k1,k1, -PT_SIZE + + /* Restore host DDATA_LO */ + LONG_L k0, PT_HOST_USERLOCAL(k1) + mtc0 k0, CP0_DDATA_LO + + /* Restore host ASID */ + LONG_L k0, PT_HOST_ASID(sp) + andi k0, 0xff + mtc0 k0,CP0_ENTRYHI + ehb + + /* Load context saved on the host stack */ + LONG_L $0, PT_R0(k1) + LONG_L $1, PT_R1(k1) + + /* r2/v0 is the return code, shift it down by 2 (arithmetic) + * to recover the err code */ + INT_SRA k0, v0, 2 + move $2, k0 + + LONG_L $3, PT_R3(k1) + LONG_L $4, PT_R4(k1) + LONG_L $5, PT_R5(k1) + LONG_L $6, PT_R6(k1) + LONG_L $7, PT_R7(k1) + LONG_L $8, PT_R8(k1) + LONG_L $9, PT_R9(k1) + LONG_L $10, PT_R10(k1) + LONG_L $11, PT_R11(k1) + LONG_L $12, PT_R12(k1) + LONG_L $13, PT_R13(k1) + LONG_L $14, PT_R14(k1) + LONG_L $15, PT_R15(k1) + LONG_L $16, PT_R16(k1) + LONG_L $17, PT_R17(k1) + LONG_L $18, PT_R18(k1) + LONG_L $19, PT_R19(k1) + LONG_L $20, PT_R20(k1) + LONG_L $21, PT_R21(k1) + LONG_L $22, PT_R22(k1) + LONG_L $23, PT_R23(k1) + LONG_L $24, PT_R24(k1) + LONG_L $25, PT_R25(k1) + + /* Host k0/k1 were not saved */ + + LONG_L $28, PT_R28(k1) + LONG_L $29, PT_R29(k1) + LONG_L $30, PT_R30(k1) + + LONG_L k0, PT_HI(k1) + mthi k0 + + LONG_L k0, PT_LO(k1) + mtlo k0 + + /* Restore RDHWR access */ + PTR_LI k0, 0x2000000F + mtc0 k0, CP0_HWRENA + + + /* Restore RA, which is the address we will return to */ + LONG_L ra, PT_R31(k1) + j ra + nop + VECTOR_END(MIPSX(GuestExceptionEnd)) .end MIPSX(GuestException) @@ -627,24 +623,23 @@ MIPSX(exceptions): #define HW_SYNCI_Step $1 LEAF(MIPSX(SyncICache)) - .set push + .set push .set mips32r2 - beq a1, zero, 20f - nop - addu a1, a0, a1 - rdhwr v0, HW_SYNCI_Step - beq v0, zero, 20f - nop - + beq a1, zero, 20f + nop + REG_ADDU a1, a0, a1 + rdhwr v0, HW_SYNCI_Step + beq v0, zero, 20f + nop 10: - synci 0(a0) - addu a0, a0, v0 - sltu v1, a0, a1 - bne v1, zero, 10b - nop - sync + synci 0(a0) + REG_ADDU a0, a0, v0 + sltu v1, a0, a1 + bne v1, zero, 10b + nop + sync 20: - jr.hb ra - nop - .set pop + jr.hb ra + nop + .set pop END(MIPSX(SyncICache)) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 08891d07aeb6..fa19e2f1a874 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -334,6 +334,27 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) return r; } +/* + * Like kvmppc_get_last_inst(), but for fetching a sc instruction. + * Because the sc instruction sets SRR0 to point to the following + * instruction, we have to fetch from pc - 4. + */ +static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu) +{ + ulong pc = kvmppc_get_pc(vcpu) - 4; + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + u32 r; + + /* Load the instruction manually if it failed to do so in the + * exit path */ + if (svcpu->last_inst == KVM_INST_FETCH_FAILED) + kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); + + r = svcpu->last_inst; + svcpu_put(svcpu); + return r; +} + static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); @@ -446,6 +467,23 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) return vcpu->arch.last_inst; } +/* + * Like kvmppc_get_last_inst(), but for fetching a sc instruction. + * Because the sc instruction sets SRR0 to point to the following + * instruction, we have to fetch from pc - 4. + */ +static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu) +{ + ulong pc = kvmppc_get_pc(vcpu) - 4; + + /* Load the instruction manually if it failed to do so in the + * exit path */ + if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) + kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false); + + return vcpu->arch.last_inst; +} + static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault_dar; diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index a1ecb14e4442..86d638a3b359 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -37,7 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) #ifdef CONFIG_KVM_BOOK3S_64_HV #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ -extern int kvm_hpt_order; /* order of preallocated HPTs */ +extern unsigned long kvm_rma_pages; #endif #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ @@ -100,7 +100,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, /* (masks depend on page size) */ rb |= 0x1000; /* page encoding in LP field */ rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ - rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */ + rb |= ((va_low << 4) & 0xf0); /* AVAL field (P7 doesn't seem to care) */ } } else { /* 4kB page */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index af326cde7cb6..33283532e9d8 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table { struct page *pages[0]; }; -struct kvmppc_linear_info { - void *base_virt; - unsigned long base_pfn; - unsigned long npages; - struct list_head list; - atomic_t use_count; - int type; +struct kvm_rma_info { + atomic_t use_count; + unsigned long base_pfn; }; /* XICS components, defined in book3s_xics.c */ @@ -246,7 +242,7 @@ struct kvm_arch { int tlbie_lock; unsigned long lpcr; unsigned long rmor; - struct kvmppc_linear_info *rma; + struct kvm_rma_info *rma; unsigned long vrma_slb_v; int rma_setup_done; int using_mmu_notifiers; @@ -259,7 +255,7 @@ struct kvm_arch { spinlock_t slot_phys_lock; cpumask_t need_tlb_flush; struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; - struct kvmppc_linear_info *hpt_li; + int hpt_cma_alloc; #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a5287fe03d77..b15554a26c20 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -137,10 +137,10 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce); extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *rma); -extern struct kvmppc_linear_info *kvm_alloc_rma(void); -extern void kvm_release_rma(struct kvmppc_linear_info *ri); -extern struct kvmppc_linear_info *kvm_alloc_hpt(void); -extern void kvm_release_hpt(struct kvmppc_linear_info *li); +extern struct kvm_rma_info *kvm_alloc_rma(void); +extern void kvm_release_rma(struct kvm_rma_info *ri); +extern struct page *kvm_alloc_hpt(unsigned long nr_pages); +extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, @@ -261,6 +261,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); struct openpic; #ifdef CONFIG_KVM_BOOK3S_64_HV +extern void kvm_cma_reserve(void) __init; static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) { paca[cpu].kvm_hstate.xics_phys = addr; @@ -281,13 +282,12 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) } extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); -extern void kvm_linear_init(void); #else -static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) +static inline void __init kvm_cma_reserve(void) {} -static inline void kvm_linear_init(void) +static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) {} static inline u32 kvmppc_get_xics_latch(void) @@ -394,10 +394,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn) } } -/* Please call after prepare_to_enter. This function puts the lazy ee state - back to normal mode, without actually enabling interrupts. */ -static inline void kvmppc_lazy_ee_enable(void) +/* + * Please call after prepare_to_enter. This function puts the lazy ee and irq + * disabled tracking state back to normal mode, without actually enabling + * interrupts. + */ +static inline void kvmppc_fix_ee_before_entry(void) { + trace_hardirqs_on(); + #ifdef CONFIG_PPC64 /* Only need to enable IRQs by hard enabling them after this */ local_paca->irq_happened = 0; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c7e8afc2ead0..26098c20936d 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -451,6 +451,7 @@ int main(void) DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); #endif + DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3)); DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4)); DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5)); DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6)); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 389fb8077cc9..fe6a58c9f0b7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -229,6 +229,8 @@ void __init early_setup(unsigned long dt_ptr) /* Initialize the hash table or TLB handling */ early_init_mmu(); + kvm_cma_reserve(); + /* * Reserve any gigantic pages requested on the command line. * memblock needs to have been initialized by the time this is @@ -609,8 +611,6 @@ void __init setup_arch(char **cmdline_p) /* Initialize the MMU context management stuff */ mmu_context_init(); - kvm_linear_init(); - /* Interrupt code needs to be 64K-aligned */ if ((unsigned long)_stext & 0xffff) panic("Kernelbase not 64K-aligned (0x%lx)!\n", diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index eb643f862579..ffaef2cb101a 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" depends on KVM_BOOK3S_64 select MMU_NOTIFIER + select CMA ---help--- Support running unmodified book3s_64 guest kernels in virtual machines on POWER7 and PPC970 processors that have diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 008cd856c5b5..6646c952c5e3 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -81,6 +81,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_64_vio_hv.o \ book3s_hv_ras.o \ book3s_hv_builtin.o \ + book3s_hv_cma.o \ $(kvm-book3s_64-builtin-xics-objs-y) kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 739bfbadb85e..7e345e00661a 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -182,10 +182,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, hva_t ptegp; u64 pteg[16]; u64 avpn = 0; + u64 v, r; + u64 v_val, v_mask; + u64 eaddr_mask; int i; - u8 key = 0; + u8 pp, key = 0; bool found = false; - int second = 0; + bool second = false; ulong mp_ea = vcpu->arch.magic_page_ea; /* Magic page override */ @@ -208,8 +211,16 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, goto no_seg_found; avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); + v_val = avpn & HPTE_V_AVPN; + if (slbe->tb) - avpn |= SLB_VSID_B_1T; + v_val |= SLB_VSID_B_1T; + if (slbe->large) + v_val |= HPTE_V_LARGE; + v_val |= HPTE_V_VALID; + + v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID | + HPTE_V_SECONDARY; do_second: ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); @@ -227,91 +238,74 @@ do_second: key = 4; for (i=0; i<16; i+=2) { - u64 v = pteg[i]; - u64 r = pteg[i+1]; - - /* Valid check */ - if (!(v & HPTE_V_VALID)) - continue; - /* Hash check */ - if ((v & HPTE_V_SECONDARY) != second) - continue; - - /* AVPN compare */ - if (HPTE_V_COMPARE(avpn, v)) { - u8 pp = (r & HPTE_R_PP) | key; - int eaddr_mask = 0xFFF; - - gpte->eaddr = eaddr; - gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, - eaddr, - data); - if (slbe->large) - eaddr_mask = 0xFFFFFF; - gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask); - gpte->may_execute = ((r & HPTE_R_N) ? false : true); - gpte->may_read = false; - gpte->may_write = false; - - switch (pp) { - case 0: - case 1: - case 2: - case 6: - gpte->may_write = true; - /* fall through */ - case 3: - case 5: - case 7: - gpte->may_read = true; - break; - } - - dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " - "-> 0x%lx\n", - eaddr, avpn, gpte->vpage, gpte->raddr); + /* Check all relevant fields of 1st dword */ + if ((pteg[i] & v_mask) == v_val) { found = true; break; } } - /* Update PTE R and C bits, so the guest's swapper knows we used the - * page */ - if (found) { - u32 oldr = pteg[i+1]; + if (!found) { + if (second) + goto no_page_found; + v_val |= HPTE_V_SECONDARY; + second = true; + goto do_second; + } - if (gpte->may_read) { - /* Set the accessed flag */ - pteg[i+1] |= HPTE_R_R; - } - if (gpte->may_write) { - /* Set the dirty flag */ - pteg[i+1] |= HPTE_R_C; - } else { - dprintk("KVM: Mapping read-only page!\n"); - } + v = pteg[i]; + r = pteg[i+1]; + pp = (r & HPTE_R_PP) | key; + eaddr_mask = 0xFFF; + + gpte->eaddr = eaddr; + gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); + if (slbe->large) + eaddr_mask = 0xFFFFFF; + gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask); + gpte->may_execute = ((r & HPTE_R_N) ? false : true); + gpte->may_read = false; + gpte->may_write = false; + + switch (pp) { + case 0: + case 1: + case 2: + case 6: + gpte->may_write = true; + /* fall through */ + case 3: + case 5: + case 7: + gpte->may_read = true; + break; + } - /* Write back into the PTEG */ - if (pteg[i+1] != oldr) - copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); + dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " + "-> 0x%lx\n", + eaddr, avpn, gpte->vpage, gpte->raddr); - if (!gpte->may_read) - return -EPERM; - return 0; - } else { - dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx " - "ptegp=0x%lx)\n", - eaddr, to_book3s(vcpu)->sdr1, ptegp); - for (i = 0; i < 16; i += 2) - dprintk(" %02d: 0x%llx - 0x%llx (0x%llx)\n", - i, pteg[i], pteg[i+1], avpn); - - if (!second) { - second = HPTE_V_SECONDARY; - goto do_second; - } + /* Update PTE R and C bits, so the guest's swapper knows we used the + * page */ + if (gpte->may_read) { + /* Set the accessed flag */ + r |= HPTE_R_R; + } + if (data && gpte->may_write) { + /* Set the dirty flag -- XXX even if not writing */ + r |= HPTE_R_C; + } + + /* Write back into the PTEG */ + if (pteg[i+1] != r) { + pteg[i+1] = r; + copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); } + if (!gpte->may_read) + return -EPERM; + return 0; + no_page_found: return -ENOENT; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 710d31317d81..043eec8461e7 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,6 +37,8 @@ #include <asm/ppc-opcode.h> #include <asm/cputable.h> +#include "book3s_hv_cma.h" + /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ #define MAX_LPID_970 63 @@ -52,8 +54,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) { unsigned long hpt; struct revmap_entry *rev; - struct kvmppc_linear_info *li; - long order = kvm_hpt_order; + struct page *page = NULL; + long order = KVM_DEFAULT_HPT_ORDER; if (htab_orderp) { order = *htab_orderp; @@ -61,26 +63,23 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) order = PPC_MIN_HPT_ORDER; } + kvm->arch.hpt_cma_alloc = 0; /* - * If the user wants a different size from default, * try first to allocate it from the kernel page allocator. + * We keep the CMA reserved for failed allocation. */ - hpt = 0; - if (order != kvm_hpt_order) { - hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| - __GFP_NOWARN, order - PAGE_SHIFT); - if (!hpt) - --order; - } + hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT | + __GFP_NOWARN, order - PAGE_SHIFT); /* Next try to allocate from the preallocated pool */ if (!hpt) { - li = kvm_alloc_hpt(); - if (li) { - hpt = (ulong)li->base_virt; - kvm->arch.hpt_li = li; - order = kvm_hpt_order; - } + VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER); + page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT)); + if (page) { + hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); + kvm->arch.hpt_cma_alloc = 1; + } else + --order; } /* Lastly try successively smaller sizes from the page allocator */ @@ -118,8 +117,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) return 0; out_freehpt: - if (kvm->arch.hpt_li) - kvm_release_hpt(kvm->arch.hpt_li); + if (kvm->arch.hpt_cma_alloc) + kvm_release_hpt(page, 1 << (order - PAGE_SHIFT)); else free_pages(hpt, order - PAGE_SHIFT); return -ENOMEM; @@ -165,8 +164,9 @@ void kvmppc_free_hpt(struct kvm *kvm) { kvmppc_free_lpid(kvm->arch.lpid); vfree(kvm->arch.revmap); - if (kvm->arch.hpt_li) - kvm_release_hpt(kvm->arch.hpt_li); + if (kvm->arch.hpt_cma_alloc) + kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), + 1 << (kvm->arch.hpt_order - PAGE_SHIFT)); else free_pages(kvm->arch.hpt_virt, kvm->arch.hpt_order - PAGE_SHIFT); @@ -1579,7 +1579,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) ctx->first_pass = 1; rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; - ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag); + ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); if (ret < 0) { kvm_put_kvm(kvm); return ret; diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index b2d3f3b2de72..54cf9bc94dad 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -136,7 +136,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, mutex_unlock(&kvm->lock); return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, - stt, O_RDWR); + stt, O_RDWR | O_CLOEXEC); fail: if (stt) { diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 1f6344c4408d..360ce68c9809 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -458,6 +458,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_PMC4_GEKKO: case SPRN_WPAR_GEKKO: case SPRN_MSSSR0: + case SPRN_DABR: break; unprivileged: default: @@ -555,6 +556,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_PMC4_GEKKO: case SPRN_WPAR_GEKKO: case SPRN_MSSSR0: + case SPRN_DABR: *spr_val = 0; break; default: diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2efa9dde741a..b0ee3bc9ca76 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -680,13 +680,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) + struct kvm_sregs *sregs) { int i; - sregs->pvr = vcpu->arch.pvr; - memset(sregs, 0, sizeof(struct kvm_sregs)); + sregs->pvr = vcpu->arch.pvr; for (i = 0; i < vcpu->arch.slb_max; i++) { sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; @@ -696,7 +695,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) + struct kvm_sregs *sregs) { int i, j; @@ -1511,10 +1510,10 @@ static inline int lpcr_rmls(unsigned long rma_size) static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - struct kvmppc_linear_info *ri = vma->vm_file->private_data; struct page *page; + struct kvm_rma_info *ri = vma->vm_file->private_data; - if (vmf->pgoff >= ri->npages) + if (vmf->pgoff >= kvm_rma_pages) return VM_FAULT_SIGBUS; page = pfn_to_page(ri->base_pfn + vmf->pgoff); @@ -1536,7 +1535,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) static int kvm_rma_release(struct inode *inode, struct file *filp) { - struct kvmppc_linear_info *ri = filp->private_data; + struct kvm_rma_info *ri = filp->private_data; kvm_release_rma(ri); return 0; @@ -1549,18 +1548,27 @@ static const struct file_operations kvm_rma_fops = { long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) { - struct kvmppc_linear_info *ri; long fd; + struct kvm_rma_info *ri; + /* + * Only do this on PPC970 in HV mode + */ + if (!cpu_has_feature(CPU_FTR_HVMODE) || + !cpu_has_feature(CPU_FTR_ARCH_201)) + return -EINVAL; + + if (!kvm_rma_pages) + return -EINVAL; ri = kvm_alloc_rma(); if (!ri) return -ENOMEM; - fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR); + fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC); if (fd < 0) kvm_release_rma(ri); - ret->rma_size = ri->npages << PAGE_SHIFT; + ret->rma_size = kvm_rma_pages << PAGE_SHIFT; return fd; } @@ -1725,7 +1733,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { int err = 0; struct kvm *kvm = vcpu->kvm; - struct kvmppc_linear_info *ri = NULL; + struct kvm_rma_info *ri = NULL; unsigned long hva; struct kvm_memory_slot *memslot; struct vm_area_struct *vma; @@ -1803,13 +1811,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) } else { /* Set up to use an RMO region */ - rma_size = ri->npages; + rma_size = kvm_rma_pages; if (rma_size > memslot->npages) rma_size = memslot->npages; rma_size <<= PAGE_SHIFT; rmls = lpcr_rmls(rma_size); err = -EINVAL; - if (rmls < 0) { + if ((long)rmls < 0) { pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); goto out_srcu; } @@ -1831,14 +1839,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) /* POWER7 */ lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); lpcr |= rmls << LPCR_RMLS_SH; - kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; + kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT; } kvm->arch.lpcr = lpcr; pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); /* Initialize phys addrs of pages in RMO */ - npages = ri->npages; + npages = kvm_rma_pages; porder = __ilog2(npages); physp = memslot->arch.slot_phys; if (physp) { @@ -1874,7 +1882,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) /* Allocate the guest's logical partition ID */ lpid = kvmppc_alloc_lpid(); - if (lpid < 0) + if ((long)lpid < 0) return -ENOMEM; kvm->arch.lpid = lpid; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index ec0a9e5de100..8cd0daebb82d 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -13,33 +13,34 @@ #include <linux/spinlock.h> #include <linux/bootmem.h> #include <linux/init.h> +#include <linux/memblock.h> +#include <linux/sizes.h> #include <asm/cputable.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#define KVM_LINEAR_RMA 0 -#define KVM_LINEAR_HPT 1 - -static void __init kvm_linear_init_one(ulong size, int count, int type); -static struct kvmppc_linear_info *kvm_alloc_linear(int type); -static void kvm_release_linear(struct kvmppc_linear_info *ri); - -int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER; -EXPORT_SYMBOL_GPL(kvm_hpt_order); - -/*************** RMA *************/ - +#include "book3s_hv_cma.h" +/* + * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) + * should be power of 2. + */ +#define HPT_ALIGN_PAGES ((1 << 18) >> PAGE_SHIFT) /* 256k */ +/* + * By default we reserve 5% of memory for hash pagetable allocation. + */ +static unsigned long kvm_cma_resv_ratio = 5; /* - * This maintains a list of RMAs (real mode areas) for KVM guests to use. + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area. * Each RMA has to be physically contiguous and of a size that the * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, * and other larger sizes. Since we are unlikely to be allocate that * much physically contiguous memory after the system is up and running, - * we preallocate a set of RMAs in early boot for KVM to use. + * we preallocate a set of RMAs in early boot using CMA. + * should be power of 2. */ -static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ -static unsigned long kvm_rma_count; +unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */ +EXPORT_SYMBOL_GPL(kvm_rma_pages); /* Work out RMLS (real mode limit selector) field value for a given RMA size. Assumes POWER7 or PPC970. */ @@ -69,165 +70,114 @@ static inline int lpcr_rmls(unsigned long rma_size) static int __init early_parse_rma_size(char *p) { - if (!p) - return 1; + unsigned long kvm_rma_size; + pr_debug("%s(%s)\n", __func__, p); + if (!p) + return -EINVAL; kvm_rma_size = memparse(p, &p); - + /* + * Check that the requested size is one supported in hardware + */ + if (lpcr_rmls(kvm_rma_size) < 0) { + pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); + return -EINVAL; + } + kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT; return 0; } early_param("kvm_rma_size", early_parse_rma_size); -static int __init early_parse_rma_count(char *p) +struct kvm_rma_info *kvm_alloc_rma() { - if (!p) - return 1; - - kvm_rma_count = simple_strtoul(p, NULL, 0); - - return 0; -} -early_param("kvm_rma_count", early_parse_rma_count); - -struct kvmppc_linear_info *kvm_alloc_rma(void) -{ - return kvm_alloc_linear(KVM_LINEAR_RMA); + struct page *page; + struct kvm_rma_info *ri; + + ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); + if (!ri) + return NULL; + page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages); + if (!page) + goto err_out; + atomic_set(&ri->use_count, 1); + ri->base_pfn = page_to_pfn(page); + return ri; +err_out: + kfree(ri); + return NULL; } EXPORT_SYMBOL_GPL(kvm_alloc_rma); -void kvm_release_rma(struct kvmppc_linear_info *ri) +void kvm_release_rma(struct kvm_rma_info *ri) { - kvm_release_linear(ri); + if (atomic_dec_and_test(&ri->use_count)) { + kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages); + kfree(ri); + } } EXPORT_SYMBOL_GPL(kvm_release_rma); -/*************** HPT *************/ - -/* - * This maintains a list of big linear HPT tables that contain the GVA->HPA - * memory mappings. If we don't reserve those early on, we might not be able - * to get a big (usually 16MB) linear memory region from the kernel anymore. - */ - -static unsigned long kvm_hpt_count; - -static int __init early_parse_hpt_count(char *p) +static int __init early_parse_kvm_cma_resv(char *p) { + pr_debug("%s(%s)\n", __func__, p); if (!p) - return 1; - - kvm_hpt_count = simple_strtoul(p, NULL, 0); - - return 0; + return -EINVAL; + return kstrtoul(p, 0, &kvm_cma_resv_ratio); } -early_param("kvm_hpt_count", early_parse_hpt_count); +early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv); -struct kvmppc_linear_info *kvm_alloc_hpt(void) +struct page *kvm_alloc_hpt(unsigned long nr_pages) { - return kvm_alloc_linear(KVM_LINEAR_HPT); + unsigned long align_pages = HPT_ALIGN_PAGES; + + /* Old CPUs require HPT aligned on a multiple of its size */ + if (!cpu_has_feature(CPU_FTR_ARCH_206)) + align_pages = nr_pages; + return kvm_alloc_cma(nr_pages, align_pages); } EXPORT_SYMBOL_GPL(kvm_alloc_hpt); -void kvm_release_hpt(struct kvmppc_linear_info *li) +void kvm_release_hpt(struct page *page, unsigned long nr_pages) { - kvm_release_linear(li); + kvm_release_cma(page, nr_pages); } EXPORT_SYMBOL_GPL(kvm_release_hpt); -/*************** generic *************/ - -static LIST_HEAD(free_linears); -static DEFINE_SPINLOCK(linear_lock); - -static void __init kvm_linear_init_one(ulong size, int count, int type) -{ - unsigned long i; - unsigned long j, npages; - void *linear; - struct page *pg; - const char *typestr; - struct kvmppc_linear_info *linear_info; - - if (!count) - return; - - typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT"; - - npages = size >> PAGE_SHIFT; - linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); - for (i = 0; i < count; ++i) { - linear = alloc_bootmem_align(size, size); - pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, - size >> 20); - linear_info[i].base_virt = linear; - linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; - linear_info[i].npages = npages; - linear_info[i].type = type; - list_add_tail(&linear_info[i].list, &free_linears); - atomic_set(&linear_info[i].use_count, 0); - - pg = pfn_to_page(linear_info[i].base_pfn); - for (j = 0; j < npages; ++j) { - atomic_inc(&pg->_count); - ++pg; - } - } -} - -static struct kvmppc_linear_info *kvm_alloc_linear(int type) -{ - struct kvmppc_linear_info *ri, *ret; - - ret = NULL; - spin_lock(&linear_lock); - list_for_each_entry(ri, &free_linears, list) { - if (ri->type != type) - continue; - - list_del(&ri->list); - atomic_inc(&ri->use_count); - memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT); - ret = ri; - break; - } - spin_unlock(&linear_lock); - return ret; -} - -static void kvm_release_linear(struct kvmppc_linear_info *ri) -{ - if (atomic_dec_and_test(&ri->use_count)) { - spin_lock(&linear_lock); - list_add_tail(&ri->list, &free_linears); - spin_unlock(&linear_lock); - - } -} - -/* - * Called at boot time while the bootmem allocator is active, - * to allocate contiguous physical memory for the hash page - * tables for guests. +/** + * kvm_cma_reserve() - reserve area for kvm hash pagetable + * + * This function reserves memory from early allocator. It should be + * called by arch specific code once the early allocator (memblock or bootmem) + * has been activated and all other subsystems have already allocated/reserved + * memory. */ -void __init kvm_linear_init(void) +void __init kvm_cma_reserve(void) { - /* HPT */ - kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT); - - /* RMA */ - /* Only do this on PPC970 in HV mode */ - if (!cpu_has_feature(CPU_FTR_HVMODE) || - !cpu_has_feature(CPU_FTR_ARCH_201)) - return; - - if (!kvm_rma_size || !kvm_rma_count) - return; - - /* Check that the requested size is one supported in hardware */ - if (lpcr_rmls(kvm_rma_size) < 0) { - pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); - return; + unsigned long align_size; + struct memblock_region *reg; + phys_addr_t selected_size = 0; + /* + * We cannot use memblock_phys_mem_size() here, because + * memblock_analyze() has not been called yet. + */ + for_each_memblock(memory, reg) + selected_size += memblock_region_memory_end_pfn(reg) - + memblock_region_memory_base_pfn(reg); + + selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT; + if (selected_size) { + pr_debug("%s: reserving %ld MiB for global area\n", __func__, + (unsigned long)selected_size / SZ_1M); + /* + * Old CPUs require HPT aligned on a multiple of its size. So for them + * make the alignment as max size we could request. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_206)) + align_size = __rounddown_pow_of_two(selected_size); + else + align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; + + align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size); + kvm_cma_declare_contiguous(selected_size, align_size); } - - kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA); } diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c new file mode 100644 index 000000000000..d9d3d8553d51 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_cma.c @@ -0,0 +1,240 @@ +/* + * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA + * for DMA mapping framework + * + * Copyright IBM Corporation, 2013 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License or (at your optional) any later version of the license. + * + */ +#define pr_fmt(fmt) "kvm_cma: " fmt + +#ifdef CONFIG_CMA_DEBUG +#ifndef DEBUG +# define DEBUG +#endif +#endif + +#include <linux/memblock.h> +#include <linux/mutex.h> +#include <linux/sizes.h> +#include <linux/slab.h> + +#include "book3s_hv_cma.h" + +struct kvm_cma { + unsigned long base_pfn; + unsigned long count; + unsigned long *bitmap; +}; + +static DEFINE_MUTEX(kvm_cma_mutex); +static struct kvm_cma kvm_cma_area; + +/** + * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling + * for kvm hash pagetable + * @size: Size of the reserved memory. + * @alignment: Alignment for the contiguous memory area + * + * This function reserves memory for kvm cma area. It should be + * called by arch code when early allocator (memblock or bootmem) + * is still activate. + */ +long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment) +{ + long base_pfn; + phys_addr_t addr; + struct kvm_cma *cma = &kvm_cma_area; + + pr_debug("%s(size %lx)\n", __func__, (unsigned long)size); + + if (!size) + return -EINVAL; + /* + * Sanitise input arguments. + * We should be pageblock aligned for CMA. + */ + alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order)); + size = ALIGN(size, alignment); + /* + * Reserve memory + * Use __memblock_alloc_base() since + * memblock_alloc_base() panic()s. + */ + addr = __memblock_alloc_base(size, alignment, 0); + if (!addr) { + base_pfn = -ENOMEM; + goto err; + } else + base_pfn = PFN_DOWN(addr); + + /* + * Each reserved area must be initialised later, when more kernel + * subsystems (like slab allocator) are available. + */ + cma->base_pfn = base_pfn; + cma->count = size >> PAGE_SHIFT; + pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M); + return 0; +err: + pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); + return base_pfn; +} + +/** + * kvm_alloc_cma() - allocate pages from contiguous area + * @nr_pages: Requested number of pages. + * @align_pages: Requested alignment in number of pages + * + * This function allocates memory buffer for hash pagetable. + */ +struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages) +{ + int ret; + struct page *page = NULL; + struct kvm_cma *cma = &kvm_cma_area; + unsigned long chunk_count, nr_chunk; + unsigned long mask, pfn, pageno, start = 0; + + + if (!cma || !cma->count) + return NULL; + + pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__, + (void *)cma, nr_pages, align_pages); + + if (!nr_pages) + return NULL; + /* + * align mask with chunk size. The bit tracks pages in chunk size + */ + VM_BUG_ON(!is_power_of_2(align_pages)); + mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1; + BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER); + + chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); + nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); + + mutex_lock(&kvm_cma_mutex); + for (;;) { + pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count, + start, nr_chunk, mask); + if (pageno >= chunk_count) + break; + + pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)); + ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA); + if (ret == 0) { + bitmap_set(cma->bitmap, pageno, nr_chunk); + page = pfn_to_page(pfn); + memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT); + break; + } else if (ret != -EBUSY) { + break; + } + pr_debug("%s(): memory range at %p is busy, retrying\n", + __func__, pfn_to_page(pfn)); + /* try again with a bit different memory target */ + start = pageno + mask + 1; + } + mutex_unlock(&kvm_cma_mutex); + pr_debug("%s(): returned %p\n", __func__, page); + return page; +} + +/** + * kvm_release_cma() - release allocated pages for hash pagetable + * @pages: Allocated pages. + * @nr_pages: Number of allocated pages. + * + * This function releases memory allocated by kvm_alloc_cma(). + * It returns false when provided pages do not belong to contiguous area and + * true otherwise. + */ +bool kvm_release_cma(struct page *pages, unsigned long nr_pages) +{ + unsigned long pfn; + unsigned long nr_chunk; + struct kvm_cma *cma = &kvm_cma_area; + + if (!cma || !pages) + return false; + + pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages); + + pfn = page_to_pfn(pages); + + if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) + return false; + + VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count); + nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); + + mutex_lock(&kvm_cma_mutex); + bitmap_clear(cma->bitmap, + (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT), + nr_chunk); + free_contig_range(pfn, nr_pages); + mutex_unlock(&kvm_cma_mutex); + + return true; +} + +static int __init kvm_cma_activate_area(unsigned long base_pfn, + unsigned long count) +{ + unsigned long pfn = base_pfn; + unsigned i = count >> pageblock_order; + struct zone *zone; + + WARN_ON_ONCE(!pfn_valid(pfn)); + zone = page_zone(pfn_to_page(pfn)); + do { + unsigned j; + base_pfn = pfn; + for (j = pageblock_nr_pages; j; --j, pfn++) { + WARN_ON_ONCE(!pfn_valid(pfn)); + /* + * alloc_contig_range requires the pfn range + * specified to be in the same zone. Make this + * simple by forcing the entire CMA resv range + * to be in the same zone. + */ + if (page_zone(pfn_to_page(pfn)) != zone) + return -EINVAL; + } + init_cma_reserved_pageblock(pfn_to_page(base_pfn)); + } while (--i); + return 0; +} + +static int __init kvm_cma_init_reserved_areas(void) +{ + int bitmap_size, ret; + unsigned long chunk_count; + struct kvm_cma *cma = &kvm_cma_area; + + pr_debug("%s()\n", __func__); + if (!cma->count) + return 0; + chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); + bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long); + cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!cma->bitmap) + return -ENOMEM; + + ret = kvm_cma_activate_area(cma->base_pfn, cma->count); + if (ret) + goto error; + return 0; + +error: + kfree(cma->bitmap); + return ret; +} +core_initcall(kvm_cma_init_reserved_areas); diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h new file mode 100644 index 000000000000..655144f75fa5 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_cma.h @@ -0,0 +1,27 @@ +/* + * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA + * for DMA mapping framework + * + * Copyright IBM Corporation, 2013 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License or (at your optional) any later version of the license. + * + */ + +#ifndef __POWERPC_KVM_CMA_ALLOC_H__ +#define __POWERPC_KVM_CMA_ALLOC_H__ +/* + * Both RMA and Hash page allocation will be multiple of 256K. + */ +#define KVM_CMA_CHUNK_ORDER 18 + +extern struct page *kvm_alloc_cma(unsigned long nr_pages, + unsigned long align_pages); +extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages); +extern long kvm_cma_declare_contiguous(phys_addr_t size, + phys_addr_t alignment) __init; +#endif diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fc25689a9f35..45e30d6e462b 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -383,6 +383,80 @@ static inline int try_lock_tlbie(unsigned int *lock) return old == 0; } +/* + * tlbie/tlbiel is a bit different on the PPC970 compared to later + * processors such as POWER7; the large page bit is in the instruction + * not RB, and the top 16 bits and the bottom 12 bits of the VA + * in RB must be 0. + */ +static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues, + long npages, int global, bool need_sync) +{ + long i; + + if (global) { + while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + if (need_sync) + asm volatile("ptesync" : : : "memory"); + for (i = 0; i < npages; ++i) { + unsigned long rb = rbvalues[i]; + + if (rb & 1) /* large page */ + asm volatile("tlbie %0,1" : : + "r" (rb & 0x0000fffffffff000ul)); + else + asm volatile("tlbie %0,0" : : + "r" (rb & 0x0000fffffffff000ul)); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; + } else { + if (need_sync) + asm volatile("ptesync" : : : "memory"); + for (i = 0; i < npages; ++i) { + unsigned long rb = rbvalues[i]; + + if (rb & 1) /* large page */ + asm volatile("tlbiel %0,1" : : + "r" (rb & 0x0000fffffffff000ul)); + else + asm volatile("tlbiel %0,0" : : + "r" (rb & 0x0000fffffffff000ul)); + } + asm volatile("ptesync" : : : "memory"); + } +} + +static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, + long npages, int global, bool need_sync) +{ + long i; + + if (cpu_has_feature(CPU_FTR_ARCH_201)) { + /* PPC970 tlbie instruction is a bit different */ + do_tlbies_970(kvm, rbvalues, npages, global, need_sync); + return; + } + if (global) { + while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + if (need_sync) + asm volatile("ptesync" : : : "memory"); + for (i = 0; i < npages; ++i) + asm volatile(PPC_TLBIE(%1,%0) : : + "r" (rbvalues[i]), "r" (kvm->arch.lpid)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; + } else { + if (need_sync) + asm volatile("ptesync" : : : "memory"); + for (i = 0; i < npages; ++i) + asm volatile("tlbiel %0" : : "r" (rbvalues[i])); + asm volatile("ptesync" : : : "memory"); + } +} + long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, unsigned long pte_index, unsigned long avpn, unsigned long *hpret) @@ -408,19 +482,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, if (v & HPTE_V_VALID) { hpte[0] &= ~HPTE_V_VALID; rb = compute_tlbie_rb(v, hpte[1], pte_index); - if (global_invalidates(kvm, flags)) { - while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - asm volatile("ptesync" : : : "memory"); - asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" - : : "r" (rb), "r" (kvm->arch.lpid)); - asm volatile("ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; - } else { - asm volatile("ptesync" : : : "memory"); - asm volatile("tlbiel %0" : : "r" (rb)); - asm volatile("ptesync" : : : "memory"); - } + do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* Read PTE low word after tlbie to get final R/C values */ remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); } @@ -448,12 +510,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) unsigned long *hp, *hptes[4], tlbrb[4]; long int i, j, k, n, found, indexes[4]; unsigned long flags, req, pte_index, rcbits; - long int local = 0; + int global; long int ret = H_SUCCESS; struct revmap_entry *rev, *revs[4]; - if (atomic_read(&kvm->online_vcpus) == 1) - local = 1; + global = global_invalidates(kvm, 0); for (i = 0; i < 4 && ret == H_SUCCESS; ) { n = 0; for (; i < 4; ++i) { @@ -529,22 +590,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) break; /* Now that we've collected a batch, do the tlbies */ - if (!local) { - while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - asm volatile("ptesync" : : : "memory"); - for (k = 0; k < n; ++k) - asm volatile(PPC_TLBIE(%1,%0) : : - "r" (tlbrb[k]), - "r" (kvm->arch.lpid)); - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; - } else { - asm volatile("ptesync" : : : "memory"); - for (k = 0; k < n; ++k) - asm volatile("tlbiel %0" : : "r" (tlbrb[k])); - asm volatile("ptesync" : : : "memory"); - } + do_tlbies(kvm, tlbrb, n, global, true); /* Read PTE low words after tlbie to get final R/C values */ for (k = 0; k < n; ++k) { @@ -603,19 +649,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, if (v & HPTE_V_VALID) { rb = compute_tlbie_rb(v, r, pte_index); hpte[0] = v & ~HPTE_V_VALID; - if (global_invalidates(kvm, flags)) { - while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - asm volatile("ptesync" : : : "memory"); - asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" - : : "r" (rb), "r" (kvm->arch.lpid)); - asm volatile("ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; - } else { - asm volatile("ptesync" : : : "memory"); - asm volatile("tlbiel %0" : : "r" (rb)); - asm volatile("ptesync" : : : "memory"); - } + do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* * If the host has this page as readonly but the guest * wants to make it read/write, reduce the permissions. @@ -686,13 +720,7 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, hptep[0] &= ~HPTE_V_VALID; rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); - while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - asm volatile("ptesync" : : : "memory"); - asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" - : : "r" (rb), "r" (kvm->arch.lpid)); - asm volatile("ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; + do_tlbies(kvm, &rb, 1, 1, true); } EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); @@ -706,12 +734,7 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, rbyte = (hptep[1] & ~HPTE_R_R) >> 8; /* modify only the second-last byte, which contains the ref bit */ *((char *)hptep + 14) = rbyte; - while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" - : : "r" (rb), "r" (kvm->arch.lpid)); - asm volatile("ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; + do_tlbies(kvm, &rb, 1, 1, false); } EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b02f91e4c70d..60dce5bfab3f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1381,7 +1381,7 @@ hcall_try_real_mode: cmpldi r3,hcall_real_table_end - hcall_real_table bge guest_exit_cont LOAD_REG_ADDR(r4, hcall_real_table) - lwzx r3,r3,r4 + lwax r3,r3,r4 cmpwi r3,0 beq guest_exit_cont add r3,r3,r4 diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 48cbbf862958..17cfae5497a3 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -92,6 +92,11 @@ kvm_start_lightweight: PPC_LL r3, VCPU_HFLAGS(r4) rldicl r3, r3, 0, 63 /* r3 &= 1 */ stb r3, HSTATE_RESTORE_HID5(r13) + + /* Load up guest SPRG3 value, since it's user readable */ + ld r3, VCPU_SHARED(r4) + ld r3, VCPU_SHARED_SPRG3(r3) + mtspr SPRN_SPRG3, r3 #endif /* CONFIG_PPC_BOOK3S_64 */ PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ @@ -123,6 +128,15 @@ kvmppc_handler_highmem: /* R7 = vcpu */ PPC_LL r7, GPR4(r1) +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * Reload kernel SPRG3 value. + * No need to save guest value as usermode can't modify SPRG3. + */ + ld r3, PACA_SPRG3(r13) + mtspr SPRN_SPRG3, r3 +#endif /* CONFIG_PPC_BOOK3S_64 */ + PPC_STL r14, VCPU_GPR(R14)(r7) PPC_STL r15, VCPU_GPR(R15)(r7) PPC_STL r16, VCPU_GPR(R16)(r7) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 19498a567a81..27db1e665959 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -468,7 +468,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) * both the traditional FP registers and the added VSX * registers into thread.fpr[]. */ - giveup_fpu(current); + if (current->thread.regs->msr & MSR_FP) + giveup_fpu(current); for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; @@ -483,7 +484,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) #ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { - giveup_altivec(current); + if (current->thread.regs->msr & MSR_VEC) + giveup_altivec(current); memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); vcpu->arch.vscr = t->vscr; } @@ -575,8 +577,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, printk(KERN_INFO "Loading up ext 0x%lx\n", msr); #endif - current->thread.regs->msr |= msr; - if (msr & MSR_FP) { for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; @@ -598,12 +598,32 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, #endif } + current->thread.regs->msr |= msr; vcpu->arch.guest_owned_ext |= msr; kvmppc_recalc_shadow_msr(vcpu); return RESUME_GUEST; } +/* + * Kernel code using FP or VMX could have flushed guest state to + * the thread_struct; if so, get it back now. + */ +static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) +{ + unsigned long lost_ext; + + lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr; + if (!lost_ext) + return; + + if (lost_ext & MSR_FP) + kvmppc_load_up_fpu(); + if (lost_ext & MSR_VEC) + kvmppc_load_up_altivec(); + current->thread.regs->msr |= lost_ext; +} + int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { @@ -772,7 +792,7 @@ program_interrupt: } case BOOK3S_INTERRUPT_SYSCALL: if (vcpu->arch.papr_enabled && - (kvmppc_get_last_inst(vcpu) == 0x44000022) && + (kvmppc_get_last_sc(vcpu) == 0x44000022) && !(vcpu->arch.shared->msr & MSR_PR)) { /* SC 1 papr hypercalls */ ulong cmd = kvmppc_get_gpr(vcpu, 3); @@ -890,8 +910,9 @@ program_interrupt: local_irq_enable(); r = s; } else { - kvmppc_lazy_ee_enable(); + kvmppc_fix_ee_before_entry(); } + kvmppc_handle_lost_ext(vcpu); } trace_kvm_book3s_reenter(r, vcpu); @@ -1047,11 +1068,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto free_shadow_vcpu; + err = -ENOMEM; p = __get_free_page(GFP_KERNEL|__GFP_ZERO); - /* the real shared page fills the last 4k of our page */ - vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096); if (!p) goto uninit_vcpu; + /* the real shared page fills the last 4k of our page */ + vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); #ifdef CONFIG_PPC_BOOK3S_64 /* default to book3s_64 (970fx) */ @@ -1161,7 +1183,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vcpu->arch.shared->msr & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); - kvmppc_lazy_ee_enable(); + kvmppc_fix_ee_before_entry(); ret = __kvmppc_vcpu_run(kvm_run, vcpu); diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 94c1dd46b83d..a3a5cb8ee7ea 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -19,6 +19,7 @@ #include <asm/hvcall.h> #include <asm/xics.h> #include <asm/debug.h> +#include <asm/time.h> #include <linux/debugfs.h> #include <linux/seq_file.h> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index dcc94f016007..17722d82f1d1 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -674,8 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) goto out; } - kvm_guest_enter(); - #ifdef CONFIG_PPC_FPU /* Save userspace FPU state in stack */ enable_kernel_fp(); @@ -698,7 +696,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_load_guest_fp(vcpu); #endif - kvmppc_lazy_ee_enable(); + kvmppc_fix_ee_before_entry(); ret = __kvmppc_vcpu_run(kvm_run, vcpu); @@ -1168,7 +1166,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_enable(); r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); } else { - kvmppc_lazy_ee_enable(); + kvmppc_fix_ee_before_entry(); } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ae63ae4a1a5f..f55e14cd1762 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) kvm_guest_exit(); continue; } - - trace_hardirqs_on(); #endif kvm_guest_enter(); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c0efd16bdfa1..c76ff74a98f2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -516,6 +516,11 @@ struct kvm_vcpu_arch { /* set at EPT violation at this point */ unsigned long exit_qualification; + + /* pv related host specific info */ + struct { + bool pv_unhalted; + } pv; }; struct kvm_lpage_info { diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 06fdbd987e97..94dc8ca434e0 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -23,6 +23,7 @@ #define KVM_FEATURE_ASYNC_PF 4 #define KVM_FEATURE_STEAL_TIME 5 #define KVM_FEATURE_PV_EOI 6 +#define KVM_FEATURE_PV_UNHALT 7 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a20ecb5b6cbf..b110fe6c03d4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -413,7 +413,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_CLOCKSOURCE2) | (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_PV_EOI) | - (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); + (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | + (1 << KVM_FEATURE_PV_UNHALT); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c98f05442325..5439117d5c4c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -689,7 +689,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, break; case APIC_DM_REMRD: - apic_debug("Ignoring delivery mode 3\n"); + result = 1; + vcpu->arch.pv.pv_unhalted = 1; + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); break; case APIC_DM_SMI: diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9651c9937588..6e2d2c8f230b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2781,7 +2781,7 @@ exit: return ret; } -static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code) +static bool page_fault_can_be_fast(u32 error_code) { /* * Do not fix the mmio spte with invalid generation number which @@ -2834,7 +2834,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, bool ret = false; u64 spte = 0ull; - if (!page_fault_can_be_fast(vcpu, error_code)) + if (!page_fault_can_be_fast(error_code)) return false; walk_shadow_page_lockless_begin(vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 57b4e129891a..1f1da43ff2a2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5485,6 +5485,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); if (err == EMULATE_USER_EXIT) { + ++vcpu->stat.mmio_exits; ret = 0; goto out; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 668f19aee6ca..e5ca72a5cdb6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1447,6 +1447,29 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) #endif } +static void kvm_gen_update_masterclock(struct kvm *kvm) +{ +#ifdef CONFIG_X86_64 + int i; + struct kvm_vcpu *vcpu; + struct kvm_arch *ka = &kvm->arch; + + spin_lock(&ka->pvclock_gtod_sync_lock); + kvm_make_mclock_inprogress_request(kvm); + /* no guest entries from this point */ + pvclock_update_vm_gtod_copy(kvm); + + kvm_for_each_vcpu(i, vcpu, kvm) + set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); + + /* guest entries allowed */ + kvm_for_each_vcpu(i, vcpu, kvm) + clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests); + + spin_unlock(&ka->pvclock_gtod_sync_lock); +#endif +} + static int kvm_guest_time_update(struct kvm_vcpu *v) { unsigned long flags, this_tsc_khz; @@ -3796,6 +3819,7 @@ long kvm_arch_vm_ioctl(struct file *filp, delta = user_ns.clock - now_ns; local_irq_enable(); kvm->arch.kvmclock_offset = delta; + kvm_gen_update_masterclock(kvm); break; } case KVM_GET_CLOCK: { @@ -5122,9 +5146,10 @@ restart: inject_emulated_exception(vcpu); r = EMULATE_DONE; } else if (vcpu->arch.pio.count) { - if (!vcpu->arch.pio.in) + if (!vcpu->arch.pio.in) { + /* FIXME: return into emulator if single-stepping. */ vcpu->arch.pio.count = 0; - else { + } else { writeback = false; vcpu->arch.complete_userspace_io = complete_emulated_pio; } @@ -5588,6 +5613,23 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) return 1; } +/* + * kvm_pv_kick_cpu_op: Kick a vcpu. + * + * @apicid - apicid of vcpu to be kicked. + */ +static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) +{ + struct kvm_lapic_irq lapic_irq; + + lapic_irq.shorthand = 0; + lapic_irq.dest_mode = 0; + lapic_irq.dest_id = apicid; + + lapic_irq.delivery_mode = APIC_DM_REMRD; + kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL); +} + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; @@ -5621,6 +5663,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) case KVM_HC_VAPIC_POLL_IRQ: ret = 0; break; + case KVM_HC_KICK_CPU: + kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); + ret = 0; + break; default: ret = -KVM_ENOSYS; break; @@ -5782,29 +5828,6 @@ static void process_nmi(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_EVENT, vcpu); } -static void kvm_gen_update_masterclock(struct kvm *kvm) -{ -#ifdef CONFIG_X86_64 - int i; - struct kvm_vcpu *vcpu; - struct kvm_arch *ka = &kvm->arch; - - spin_lock(&ka->pvclock_gtod_sync_lock); - kvm_make_mclock_inprogress_request(kvm); - /* no guest entries from this point */ - pvclock_update_vm_gtod_copy(kvm); - - kvm_for_each_vcpu(i, vcpu, kvm) - set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); - - /* guest entries allowed */ - kvm_for_each_vcpu(i, vcpu, kvm) - clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests); - - spin_unlock(&ka->pvclock_gtod_sync_lock); -#endif -} - static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { u64 eoi_exit_bitmap[4]; @@ -6043,6 +6066,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) kvm_apic_accept_events(vcpu); switch(vcpu->arch.mp_state) { case KVM_MP_STATE_HALTED: + vcpu->arch.pv.pv_unhalted = false; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; case KVM_MP_STATE_RUNNABLE: @@ -6154,6 +6178,8 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { vcpu->mmio_needed = 0; + + /* FIXME: return into emulator if single-stepping. */ if (vcpu->mmio_is_write) return 1; vcpu->mmio_read_completed = 1; @@ -6342,7 +6368,12 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { kvm_apic_accept_events(vcpu); - mp_state->mp_state = vcpu->arch.mp_state; + if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && + vcpu->arch.pv.pv_unhalted) + mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + else + mp_state->mp_state = vcpu->arch.mp_state; + return 0; } @@ -6863,6 +6894,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) BUG_ON(vcpu->kvm == NULL); kvm = vcpu->kvm; + vcpu->arch.pv.pv_unhalted = false; vcpu->arch.emulate_ctxt.ops = &emulate_ops; if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -7200,6 +7232,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) !vcpu->arch.apf.halted) || !list_empty_careful(&vcpu->async_pf.done) || kvm_apic_has_events(vcpu) + || vcpu->arch.pv.pv_unhalted || atomic_read(&vcpu->arch.nmi_queued) || (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)); diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 5daa2599ed48..e373671652b0 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -200,11 +200,9 @@ config DMA_SHARED_BUFFER APIs extension; the file's descriptor can then be passed on to other driver. -config CMA - bool "Contiguous Memory Allocator" - depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK - select MIGRATION - select MEMORY_ISOLATION +config DMA_CMA + bool "DMA Contiguous Memory Allocator" + depends on HAVE_DMA_CONTIGUOUS && CMA help This enables the Contiguous Memory Allocator which allows drivers to allocate big physically-contiguous blocks of memory for use with @@ -213,17 +211,7 @@ config CMA For more information see <include/linux/dma-contiguous.h>. If unsure, say "n". -if CMA - -config CMA_DEBUG - bool "CMA debug messages (DEVELOPMENT)" - depends on DEBUG_KERNEL - help - Turns on debug messages in CMA. This produces KERN_DEBUG - messages for every CMA call as well as various messages while - processing calls such as dma_alloc_from_contiguous(). - This option does not affect warning and error messages. - +if DMA_CMA comment "Default contiguous memory area size:" config CMA_SIZE_MBYTES diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 48029aa477d9..94e8a80e87f8 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -6,7 +6,7 @@ obj-y := core.o bus.o dd.o syscore.o \ attribute_container.o transport_class.o \ topology.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o -obj-$(CONFIG_CMA) += dma-contiguous.o +obj-$(CONFIG_DMA_CMA) += dma-contiguous.o obj-y += power/ obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 343744e4809c..7e2d15837b02 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -26,7 +26,7 @@ #include <linux/types.h> #include <linux/irqchip/arm-gic.h> -#define VGIC_NR_IRQS 128 +#define VGIC_NR_IRQS 256 #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 01b5c84be828..00141d3325fe 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -57,7 +57,7 @@ struct cma; struct page; struct device; -#ifdef CONFIG_CMA +#ifdef CONFIG_DMA_CMA /* * There is always at least global CMA area and a few optional device diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index acccd08be6c7..99c25338ede8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -667,6 +667,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_RTAS 91 #define KVM_CAP_IRQ_XICS 92 #define KVM_CAP_ARM_EL1_32BIT 93 +#define KVM_CAP_SPAPR_MULTITCE 94 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index cea2c5c72d26..2841f86eae0b 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h @@ -19,6 +19,7 @@ #define KVM_HC_MMU_OP 2 #define KVM_HC_FEATURES 3 #define KVM_HC_PPC_MAP_MAGIC_PAGE 4 +#define KVM_HC_KICK_CPU 5 /* * hypercalls use architecture specific diff --git a/mm/Kconfig b/mm/Kconfig index 8028dcc6615c..6cdd27043303 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -478,6 +478,30 @@ config FRONTSWAP If unsure, say Y to enable frontswap. +config CMA + bool "Contiguous Memory Allocator" + depends on HAVE_MEMBLOCK + select MIGRATION + select MEMORY_ISOLATION + help + This enables the Contiguous Memory Allocator which allows other + subsystems to allocate big physically-contiguous blocks of memory. + CMA reserves a region of memory and allows only movable pages to + be allocated from it. This way, the kernel can use the memory for + pagecache and when a subsystem requests for contiguous area, the + allocated pages are migrated away to serve the contiguous request. + + If unsure, say "n". + +config CMA_DEBUG + bool "CMA debug messages (DEVELOPMENT)" + depends on DEBUG_KERNEL && CMA + help + Turns on debug messages in CMA. This produces KERN_DEBUG + messages for every CMA call as well as various messages while + processing calls such as dma_alloc_from_contiguous(). + This option does not affect warning and error messages. + config ZBUD tristate default n diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 17c5ac7d10ed..685fc72fc751 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -149,7 +149,7 @@ static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) { offset >>= 2; BUG_ON(offset > (VGIC_NR_IRQS / 4)); - if (offset < 4) + if (offset < 8) return x->percpu[cpuid] + offset; else return x->shared + offset - 8; @@ -432,19 +432,13 @@ static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, static u32 vgic_get_target_reg(struct kvm *kvm, int irq) { struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i, c; - unsigned long *bmap; + int i; u32 val = 0; irq -= VGIC_NR_PRIVATE_IRQS; - kvm_for_each_vcpu(c, vcpu, kvm) { - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) - if (test_bit(irq + i, bmap)) - val |= 1 << (c + i * 8); - } + for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) + val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8); return val; } @@ -547,8 +541,12 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { u32 val; - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset >> 1); + u32 *reg; + + offset >>= 1; + reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, + vcpu->vcpu_id, offset); + if (offset & 2) val = *reg >> 16; else diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c6c8bbea1748..bf040c4e02b3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -102,28 +102,8 @@ static bool largepages_enabled = true; bool kvm_is_mmio_pfn(pfn_t pfn) { - if (pfn_valid(pfn)) { - int reserved; - struct page *tail = pfn_to_page(pfn); - struct page *head = compound_trans_head(tail); - reserved = PageReserved(head); - if (head != tail) { - /* - * "head" is not a dangling pointer - * (compound_trans_head takes care of that) - * but the hugepage may have been splitted - * from under us (and we may not hold a - * reference count on the head page so it can - * be reused before we run PageReferenced), so - * we've to check PageTail before returning - * what we just read. - */ - smp_rmb(); - if (PageTail(tail)) - return reserved; - } - return PageReserved(tail); - } + if (pfn_valid(pfn)) + return PageReserved(pfn_to_page(pfn)); return true; } @@ -1896,7 +1876,7 @@ static struct file_operations kvm_vcpu_fops = { */ static int create_vcpu_fd(struct kvm_vcpu *vcpu) { - return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); + return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); } /* @@ -2305,7 +2285,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return ret; } - ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); + ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); if (ret < 0) { ops->destroy(dev); return ret; @@ -2589,7 +2569,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } #endif - r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); + r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC); if (r < 0) kvm_put_kvm(kvm); @@ -2815,8 +2795,8 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) kfree(bus); } -static inline int __kvm_io_bus_sort_cmp(const struct kvm_io_range *r1, - const struct kvm_io_range *r2) +static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, + const struct kvm_io_range *r2) { if (r1->addr < r2->addr) return -1; @@ -2827,7 +2807,7 @@ static inline int __kvm_io_bus_sort_cmp(const struct kvm_io_range *r1, static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) { - return __kvm_io_bus_sort_cmp(p1, p2); + return kvm_io_bus_cmp(p1, p2); } static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, @@ -2863,7 +2843,7 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, off = range - bus->range; - while (off > 0 && __kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0) + while (off > 0 && kvm_io_bus_cmp(&key, &bus->range[off-1]) == 0) off--; return off; @@ -2879,7 +2859,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus, return -EOPNOTSUPP; while (idx < bus->dev_count && - __kvm_io_bus_sort_cmp(range, &bus->range[idx]) == 0) { + kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { if (!kvm_iodevice_write(bus->range[idx].dev, range->addr, range->len, val)) return idx; @@ -2923,7 +2903,7 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && - (__kvm_io_bus_sort_cmp(&range, &bus->range[cookie]) == 0)) + (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len, val)) return cookie; @@ -2945,7 +2925,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, return -EOPNOTSUPP; while (idx < bus->dev_count && - __kvm_io_bus_sort_cmp(range, &bus->range[idx]) == 0) { + kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { if (!kvm_iodevice_read(bus->range[idx].dev, range->addr, range->len, val)) return idx; @@ -2989,7 +2969,7 @@ int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && - (__kvm_io_bus_sort_cmp(&range, &bus->range[cookie]) == 0)) + (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) if (!kvm_iodevice_read(bus->range[cookie].dev, addr, len, val)) return cookie; |