44 files changed, 1314 insertions, 946 deletions
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 83afe65d4966..22ff659bc0fb 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -43,6 +43,10 @@ KVM_FEATURE_CLOCKSOURCE2           ||     3 || kvmclock available at msrs
 KVM_FEATURE_ASYNC_PF               ||     4 || async pf can be enabled by
                                    ||       || writing to msr 0x4b564d02
 ------------------------------------------------------------------------------
+KVM_FEATURE_PV_UNHALT              ||     7 || guest checks this feature bit
+                                   ||       || before enabling paravirtualized
+                                   ||       || spinlock support.
+------------------------------------------------------------------------------
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index ea113b5d87a4..022198e389d7 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -64,3 +64,17 @@ Purpose: To enable communication between the hypervisor and guest there is a
 shared page that contains parts of supervisor visible register state.
 The guest can map this shared page to access its supervisor register through
 memory using this hypercall.
+
+5. KVM_HC_KICK_CPU
+------------------------
+Architecture: x86
+Status: active
+Purpose: Hypercall used to wakeup a vcpu from HLT state
+Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest
+kernel mode for an event to occur (ex: a spinlock to become available) can
+execute HLT instruction once it has busy-waited for more than a threshold
+time-interval. Execution of HLT instruction would cause the hypervisor to put
+the vcpu to sleep until occurence of an appropriate event. Another vcpu of the
+same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
+specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
+is used in the hypercall for future use.
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h
index 3ed37b4d93da..e072bb2ba1b1 100644
--- a/arch/arm/include/asm/dma-contiguous.h
+++ b/arch/arm/include/asm/dma-contiguous.h
@@ -2,7 +2,7 @@
 #define ASMARM_DMA_CONTIGUOUS_H
 
 #ifdef __KERNEL__
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 
 #include <linux/types.h>
 #include <asm-generic/dma-contiguous.h>
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 7f9b1798c6cf..dbddc07a3bbd 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -358,7 +358,7 @@ static int __init atomic_pool_init(void)
 	if (!pages)
 		goto no_pages;
 
-	if (IS_ENABLED(CONFIG_CMA))
+	if (IS_ENABLED(CONFIG_DMA_CMA))
 		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,
 					      atomic_pool_init);
 	else
@@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
 	else if (!(gfp & __GFP_WAIT))
 		addr = __alloc_from_pool(size, &page);
-	else if (!IS_ENABLED(CONFIG_CMA))
+	else if (!IS_ENABLED(CONFIG_DMA_CMA))
 		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
 	else
 		addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
@@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		__dma_free_buffer(page, size);
 	} else if (__free_from_pool(cpu_addr, size)) {
 		return;
-	} else if (!IS_ENABLED(CONFIG_CMA)) {
+	} else if (!IS_ENABLED(CONFIG_DMA_CMA)) {
 		__dma_free_remap(cpu_addr, size);
 		__dma_free_buffer(page, size);
 	} else {
diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/kvm_locore.S
index dca2aa665993..bbace092ad0a 100644
--- a/arch/mips/kvm/kvm_locore.S
+++ b/arch/mips/kvm/kvm_locore.S
@@ -1,13 +1,13 @@
 /*
-* This file is subject to the terms and conditions of the GNU General Public
-* License.  See the file "COPYING" in the main directory of this archive
-* for more details.
-*
-* Main entry point for the guest, exception handling.
-*
-* Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
-* Authors: Sanjay Lal <[email protected]>
-*/
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Main entry point for the guest, exception handling.
+ *
+ * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
+ * Authors: Sanjay Lal <[email protected]>
+ */
 
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
@@ -55,195 +55,193 @@
  * a0: run
  * a1: vcpu
  */
+	.set	noreorder
+	.set	noat
 
 FEXPORT(__kvm_mips_vcpu_run)
-    .set    push
-    .set    noreorder
-    .set    noat
-
-    /* k0/k1 not being used in host kernel context */
-	addiu  		k1,sp, -PT_SIZE
-    LONG_S	    $0, PT_R0(k1)
-    LONG_S     	$1, PT_R1(k1)
-    LONG_S     	$2, PT_R2(k1)
-    LONG_S     	$3, PT_R3(k1)
-
-    LONG_S     	$4, PT_R4(k1)
-    LONG_S     	$5, PT_R5(k1)
-    LONG_S     	$6, PT_R6(k1)
-    LONG_S     	$7, PT_R7(k1)
-
-    LONG_S     	$8,  PT_R8(k1)
-    LONG_S     	$9,  PT_R9(k1)
-    LONG_S     	$10, PT_R10(k1)
-    LONG_S     	$11, PT_R11(k1)
-    LONG_S     	$12, PT_R12(k1)
-    LONG_S     	$13, PT_R13(k1)
-    LONG_S     	$14, PT_R14(k1)
-    LONG_S     	$15, PT_R15(k1)
-    LONG_S     	$16, PT_R16(k1)
-    LONG_S     	$17, PT_R17(k1)
-
-    LONG_S     	$18, PT_R18(k1)
-    LONG_S     	$19, PT_R19(k1)
-    LONG_S     	$20, PT_R20(k1)
-    LONG_S     	$21, PT_R21(k1)
-    LONG_S     	$22, PT_R22(k1)
-    LONG_S     	$23, PT_R23(k1)
-    LONG_S     	$24, PT_R24(k1)
-    LONG_S     	$25, PT_R25(k1)
+	/* k0/k1 not being used in host kernel context */
+	INT_ADDIU k1, sp, -PT_SIZE
+	LONG_S	$0, PT_R0(k1)
+	LONG_S	$1, PT_R1(k1)
+	LONG_S	$2, PT_R2(k1)
+	LONG_S	$3, PT_R3(k1)
+
+	LONG_S	$4, PT_R4(k1)
+	LONG_S	$5, PT_R5(k1)
+	LONG_S	$6, PT_R6(k1)
+	LONG_S	$7, PT_R7(k1)
+
+	LONG_S	$8,  PT_R8(k1)
+	LONG_S	$9,  PT_R9(k1)
+	LONG_S	$10, PT_R10(k1)
+	LONG_S	$11, PT_R11(k1)
+	LONG_S	$12, PT_R12(k1)
+	LONG_S	$13, PT_R13(k1)
+	LONG_S	$14, PT_R14(k1)
+	LONG_S	$15, PT_R15(k1)
+	LONG_S	$16, PT_R16(k1)
+	LONG_S	$17, PT_R17(k1)
+
+	LONG_S	$18, PT_R18(k1)
+	LONG_S	$19, PT_R19(k1)
+	LONG_S	$20, PT_R20(k1)
+	LONG_S	$21, PT_R21(k1)
+	LONG_S	$22, PT_R22(k1)
+	LONG_S	$23, PT_R23(k1)
+	LONG_S	$24, PT_R24(k1)
+	LONG_S	$25, PT_R25(k1)
 
 	/* XXXKYMA k0/k1 not saved, not being used if we got here through an ioctl() */
 
-    LONG_S     	$28, PT_R28(k1)
-    LONG_S     	$29, PT_R29(k1)
-    LONG_S     	$30, PT_R30(k1)
-    LONG_S     	$31, PT_R31(k1)
+	LONG_S	$28, PT_R28(k1)
+	LONG_S	$29, PT_R29(k1)
+	LONG_S	$30, PT_R30(k1)
+	LONG_S	$31, PT_R31(k1)
 
-    /* Save hi/lo */
-	mflo		v0
-	LONG_S		v0, PT_LO(k1)
-	mfhi   		v1
-	LONG_S		v1, PT_HI(k1)
+	/* Save hi/lo */
+	mflo	v0
+	LONG_S	v0, PT_LO(k1)
+	mfhi	v1
+	LONG_S	v1, PT_HI(k1)
 
 	/* Save host status */
-	mfc0		v0, CP0_STATUS
-	LONG_S		v0, PT_STATUS(k1)
+	mfc0	v0, CP0_STATUS
+	LONG_S	v0, PT_STATUS(k1)
 
 	/* Save host ASID, shove it into the BVADDR location */
-	mfc0 		v1,CP0_ENTRYHI
-	andi		v1, 0xff
-	LONG_S		v1, PT_HOST_ASID(k1)
+	mfc0	v1, CP0_ENTRYHI
+	andi	v1, 0xff
+	LONG_S	v1, PT_HOST_ASID(k1)
 
-    /* Save DDATA_LO, will be used to store pointer to vcpu */
-    mfc0        v1, CP0_DDATA_LO
-    LONG_S      v1, PT_HOST_USERLOCAL(k1)
+	/* Save DDATA_LO, will be used to store pointer to vcpu */
+	mfc0	v1, CP0_DDATA_LO
+	LONG_S	v1, PT_HOST_USERLOCAL(k1)
 
-    /* DDATA_LO has pointer to vcpu */
-    mtc0        a1,CP0_DDATA_LO
+	/* DDATA_LO has pointer to vcpu */
+	mtc0	a1, CP0_DDATA_LO
 
-    /* Offset into vcpu->arch */
-	addiu		k1, a1, VCPU_HOST_ARCH
+	/* Offset into vcpu->arch */
+	INT_ADDIU k1, a1, VCPU_HOST_ARCH
 
-    /* Save the host stack to VCPU, used for exception processing when we exit from the Guest */
-    LONG_S      sp, VCPU_HOST_STACK(k1)
+	/*
+	 * Save the host stack to VCPU, used for exception processing
+	 * when we exit from the Guest
+	 */
+	LONG_S	sp, VCPU_HOST_STACK(k1)
 
-    /* Save the kernel gp as well */
-    LONG_S      gp, VCPU_HOST_GP(k1)
+	/* Save the kernel gp as well */
+	LONG_S	gp, VCPU_HOST_GP(k1)
 
 	/* Setup status register for running the guest in UM, interrupts are disabled */
-	li			k0,(ST0_EXL | KSU_USER| ST0_BEV)
-	mtc0		k0,CP0_STATUS
-    ehb
-
-    /* load up the new EBASE */
-    LONG_L      k0, VCPU_GUEST_EBASE(k1)
-    mtc0        k0,CP0_EBASE
-
-    /* Now that the new EBASE has been loaded, unset BEV, set interrupt mask as it was
-     * but make sure that timer interrupts are enabled
-     */
-    li          k0,(ST0_EXL | KSU_USER | ST0_IE)
-    andi        v0, v0, ST0_IM
-    or          k0, k0, v0
-    mtc0        k0,CP0_STATUS
-    ehb
+	li	k0, (ST0_EXL | KSU_USER | ST0_BEV)
+	mtc0	k0, CP0_STATUS
+	ehb
+
+	/* load up the new EBASE */
+	LONG_L	k0, VCPU_GUEST_EBASE(k1)
+	mtc0	k0, CP0_EBASE
+
+	/*
+	 * Now that the new EBASE has been loaded, unset BEV, set
+	 * interrupt mask as it was but make sure that timer interrupts
+	 * are enabled
+	 */
+	li	k0, (ST0_EXL | KSU_USER | ST0_IE)
+	andi	v0, v0, ST0_IM
+	or	k0, k0, v0
+	mtc0	k0, CP0_STATUS
+	ehb
 
 
 	/* Set Guest EPC */
-	LONG_L		t0, VCPU_PC(k1)
-	mtc0		t0, CP0_EPC
+	LONG_L	t0, VCPU_PC(k1)
+	mtc0	t0, CP0_EPC
 
 FEXPORT(__kvm_mips_load_asid)
-    /* Set the ASID for the Guest Kernel */
-    sll         t0, t0, 1                       /* with kseg0 @ 0x40000000, kernel */
-                                                /* addresses shift to 0x80000000 */
-    bltz        t0, 1f                          /* If kernel */
-	addiu       t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
-    addiu       t1, k1, VCPU_GUEST_USER_ASID    /* else user */
+	/* Set the ASID for the Guest Kernel */
+	INT_SLL	t0, t0, 1	/* with kseg0 @ 0x40000000, kernel */
+			        /* addresses shift to 0x80000000 */
+	bltz	t0, 1f		/* If kernel */
+	 INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
+	INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID    /* else user */
 1:
-    /* t1: contains the base of the ASID array, need to get the cpu id  */
-    LONG_L      t2, TI_CPU($28)             /* smp_processor_id */
-    sll         t2, t2, 2                   /* x4 */
-    addu        t3, t1, t2
-    LONG_L      k0, (t3)
-    andi        k0, k0, 0xff
-	mtc0		k0,CP0_ENTRYHI
-    ehb
-
-    /* Disable RDHWR access */
-    mtc0    zero,  CP0_HWRENA
-
-    /* Now load up the Guest Context from VCPU */
-    LONG_L     	$1, VCPU_R1(k1)
-    LONG_L     	$2, VCPU_R2(k1)
-    LONG_L     	$3, VCPU_R3(k1)
-
-    LONG_L     	$4, VCPU_R4(k1)
-    LONG_L     	$5, VCPU_R5(k1)
-    LONG_L     	$6, VCPU_R6(k1)
-    LONG_L     	$7, VCPU_R7(k1)
-
-    LONG_L     	$8,  VCPU_R8(k1)
-    LONG_L     	$9,  VCPU_R9(k1)
-    LONG_L     	$10, VCPU_R10(k1)
-    LONG_L     	$11, VCPU_R11(k1)
-    LONG_L     	$12, VCPU_R12(k1)
-    LONG_L     	$13, VCPU_R13(k1)
-    LONG_L     	$14, VCPU_R14(k1)
-    LONG_L     	$15, VCPU_R15(k1)
-    LONG_L     	$16, VCPU_R16(k1)
-    LONG_L     	$17, VCPU_R17(k1)
-    LONG_L     	$18, VCPU_R18(k1)
-    LONG_L     	$19, VCPU_R19(k1)
-    LONG_L     	$20, VCPU_R20(k1)
-    LONG_L     	$21, VCPU_R21(k1)
-    LONG_L     	$22, VCPU_R22(k1)
-    LONG_L     	$23, VCPU_R23(k1)
-    LONG_L     	$24, VCPU_R24(k1)
-    LONG_L     	$25, VCPU_R25(k1)
-
-    /* k0/k1 loaded up later */
-
-    LONG_L     	$28, VCPU_R28(k1)
-    LONG_L     	$29, VCPU_R29(k1)
-    LONG_L     	$30, VCPU_R30(k1)
-    LONG_L     	$31, VCPU_R31(k1)
-
-    /* Restore hi/lo */
-	LONG_L		k0, VCPU_LO(k1)
-	mtlo		k0
-
-	LONG_L		k0, VCPU_HI(k1)
-	mthi   		k0
+	     /* t1: contains the base of the ASID array, need to get the cpu id  */
+	LONG_L	t2, TI_CPU($28)             /* smp_processor_id */
+	INT_SLL	t2, t2, 2                   /* x4 */
+	REG_ADDU t3, t1, t2
+	LONG_L	k0, (t3)
+	andi	k0, k0, 0xff
+	mtc0	k0, CP0_ENTRYHI
+	ehb
+
+	/* Disable RDHWR access */
+	mtc0	zero, CP0_HWRENA
+
+	/* Now load up the Guest Context from VCPU */
+	LONG_L	$1, VCPU_R1(k1)
+	LONG_L	$2, VCPU_R2(k1)
+	LONG_L	$3, VCPU_R3(k1)
+
+	LONG_L	$4, VCPU_R4(k1)
+	LONG_L	$5, VCPU_R5(k1)
+	LONG_L	$6, VCPU_R6(k1)
+	LONG_L	$7, VCPU_R7(k1)
+
+	LONG_L	$8, VCPU_R8(k1)
+	LONG_L	$9, VCPU_R9(k1)
+	LONG_L	$10, VCPU_R10(k1)
+	LONG_L	$11, VCPU_R11(k1)
+	LONG_L	$12, VCPU_R12(k1)
+	LONG_L	$13, VCPU_R13(k1)
+	LONG_L	$14, VCPU_R14(k1)
+	LONG_L	$15, VCPU_R15(k1)
+	LONG_L	$16, VCPU_R16(k1)
+	LONG_L	$17, VCPU_R17(k1)
+	LONG_L	$18, VCPU_R18(k1)
+	LONG_L	$19, VCPU_R19(k1)
+	LONG_L	$20, VCPU_R20(k1)
+	LONG_L	$21, VCPU_R21(k1)
+	LONG_L	$22, VCPU_R22(k1)
+	LONG_L	$23, VCPU_R23(k1)
+	LONG_L	$24, VCPU_R24(k1)
+	LONG_L	$25, VCPU_R25(k1)
+
+	/* k0/k1 loaded up later */
+
+	LONG_L	$28, VCPU_R28(k1)
+	LONG_L	$29, VCPU_R29(k1)
+	LONG_L	$30, VCPU_R30(k1)
+	LONG_L	$31, VCPU_R31(k1)
+
+	/* Restore hi/lo */
+	LONG_L	k0, VCPU_LO(k1)
+	mtlo	k0
+
+	LONG_L	k0, VCPU_HI(k1)
+	mthi	k0
 
 FEXPORT(__kvm_mips_load_k0k1)
 	/* Restore the guest's k0/k1 registers */
-    LONG_L     	k0, VCPU_R26(k1)
-    LONG_L     	k1, VCPU_R27(k1)
+	LONG_L	k0, VCPU_R26(k1)
+	LONG_L	k1, VCPU_R27(k1)
 
-    /* Jump to guest */
+	/* Jump to guest */
 	eret
-	.set	pop
 
 VECTOR(MIPSX(exception), unknown)
 /*
  * Find out what mode we came from and jump to the proper handler.
  */
-    .set    push
-	.set	noat
-    .set    noreorder
-    mtc0    k0, CP0_ERROREPC    #01: Save guest k0
-    ehb                         #02:
-
-    mfc0    k0, CP0_EBASE       #02: Get EBASE
-    srl     k0, k0, 10          #03: Get rid of CPUNum
-    sll     k0, k0, 10          #04
-    LONG_S  k1, 0x3000(k0)      #05: Save k1 @ offset 0x3000
-    addiu   k0, k0, 0x2000      #06: Exception handler is installed @ offset 0x2000
-	j	k0				        #07: jump to the function
-	nop				        	#08: branch delay slot
-	.set	push
+	mtc0	k0, CP0_ERROREPC	#01: Save guest k0
+	ehb				#02:
+
+	mfc0	k0, CP0_EBASE		#02: Get EBASE
+	INT_SRL	k0, k0, 10		#03: Get rid of CPUNum
+	INT_SLL	k0, k0, 10		#04
+	LONG_S	k1, 0x3000(k0)		#05: Save k1 @ offset 0x3000
+	INT_ADDIU k0, k0, 0x2000		#06: Exception handler is installed @ offset 0x2000
+	j	k0			#07: jump to the function
+	 nop				#08: branch delay slot
 VECTOR_END(MIPSX(exceptionEnd))
 .end MIPSX(exception)
 
@@ -253,329 +251,327 @@ VECTOR_END(MIPSX(exceptionEnd))
  *
  */
 NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra)
-    .set    push
-    .set    noat
-    .set    noreorder
-
-    /* Get the VCPU pointer from DDTATA_LO */
-    mfc0        k1, CP0_DDATA_LO
-	addiu		k1, k1, VCPU_HOST_ARCH
-
-    /* Start saving Guest context to VCPU */
-    LONG_S  $0, VCPU_R0(k1)
-    LONG_S  $1, VCPU_R1(k1)
-    LONG_S  $2, VCPU_R2(k1)
-    LONG_S  $3, VCPU_R3(k1)
-    LONG_S  $4, VCPU_R4(k1)
-    LONG_S  $5, VCPU_R5(k1)
-    LONG_S  $6, VCPU_R6(k1)
-    LONG_S  $7, VCPU_R7(k1)
-    LONG_S  $8, VCPU_R8(k1)
-    LONG_S  $9, VCPU_R9(k1)
-    LONG_S  $10, VCPU_R10(k1)
-    LONG_S  $11, VCPU_R11(k1)
-    LONG_S  $12, VCPU_R12(k1)
-    LONG_S  $13, VCPU_R13(k1)
-    LONG_S  $14, VCPU_R14(k1)
-    LONG_S  $15, VCPU_R15(k1)
-    LONG_S  $16, VCPU_R16(k1)
-    LONG_S  $17,VCPU_R17(k1)
-    LONG_S  $18, VCPU_R18(k1)
-    LONG_S  $19, VCPU_R19(k1)
-    LONG_S  $20, VCPU_R20(k1)
-    LONG_S  $21, VCPU_R21(k1)
-    LONG_S  $22, VCPU_R22(k1)
-    LONG_S  $23, VCPU_R23(k1)
-    LONG_S  $24, VCPU_R24(k1)
-    LONG_S  $25, VCPU_R25(k1)
-
-    /* Guest k0/k1 saved later */
-
-    LONG_S  $28, VCPU_R28(k1)
-    LONG_S  $29, VCPU_R29(k1)
-    LONG_S  $30, VCPU_R30(k1)
-    LONG_S  $31, VCPU_R31(k1)
-
-    /* We need to save hi/lo and restore them on
-     * the way out
-     */
-    mfhi    t0
-    LONG_S  t0, VCPU_HI(k1)
-
-    mflo    t0
-    LONG_S  t0, VCPU_LO(k1)
-
-    /* Finally save guest k0/k1 to VCPU */
-    mfc0    t0, CP0_ERROREPC
-    LONG_S  t0, VCPU_R26(k1)
-
-    /* Get GUEST k1 and save it in VCPU */
-    la      t1, ~0x2ff
-    mfc0    t0, CP0_EBASE
-    and     t0, t0, t1
-    LONG_L  t0, 0x3000(t0)
-    LONG_S  t0, VCPU_R27(k1)
-
-    /* Now that context has been saved, we can use other registers */
-
-    /* Restore vcpu */
-    mfc0        a1, CP0_DDATA_LO
-    move        s1, a1
-
-   /* Restore run (vcpu->run) */
-    LONG_L      a0, VCPU_RUN(a1)
-    /* Save pointer to run in s0, will be saved by the compiler */
-    move        s0, a0
-
-
-    /* Save Host level EPC, BadVaddr and Cause to VCPU, useful to process the exception */
-    mfc0    k0,CP0_EPC
-    LONG_S  k0, VCPU_PC(k1)
-
-    mfc0    k0, CP0_BADVADDR
-    LONG_S  k0, VCPU_HOST_CP0_BADVADDR(k1)
-
-    mfc0    k0, CP0_CAUSE
-    LONG_S  k0, VCPU_HOST_CP0_CAUSE(k1)
-
-    mfc0    k0, CP0_ENTRYHI
-    LONG_S  k0, VCPU_HOST_ENTRYHI(k1)
-
-    /* Now restore the host state just enough to run the handlers */
-
-    /* Swtich EBASE to the one used by Linux */
-    /* load up the host EBASE */
-    mfc0        v0, CP0_STATUS
-
-    .set at
-	or          k0, v0, ST0_BEV
-    .set noat
-
-    mtc0        k0, CP0_STATUS
-    ehb
-
-    LONG_L      k0, VCPU_HOST_EBASE(k1)
-    mtc0        k0,CP0_EBASE
-
-
-    /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
-    .set at
-	and         v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
-    or          v0, v0, ST0_CU0
-    .set noat
-    mtc0        v0, CP0_STATUS
-    ehb
-
-    /* Load up host GP */
-    LONG_L  gp, VCPU_HOST_GP(k1)
-
-    /* Need a stack before we can jump to "C" */
-    LONG_L  sp, VCPU_HOST_STACK(k1)
-
-    /* Saved host state */
-    addiu   sp,sp, -PT_SIZE
+	/* Get the VCPU pointer from DDTATA_LO */
+	mfc0	k1, CP0_DDATA_LO
+	INT_ADDIU k1, k1, VCPU_HOST_ARCH
+
+	/* Start saving Guest context to VCPU */
+	LONG_S	$0, VCPU_R0(k1)
+	LONG_S	$1, VCPU_R1(k1)
+	LONG_S	$2, VCPU_R2(k1)
+	LONG_S	$3, VCPU_R3(k1)
+	LONG_S	$4, VCPU_R4(k1)
+	LONG_S	$5, VCPU_R5(k1)
+	LONG_S	$6, VCPU_R6(k1)
+	LONG_S	$7, VCPU_R7(k1)
+	LONG_S	$8, VCPU_R8(k1)
+	LONG_S	$9, VCPU_R9(k1)
+	LONG_S	$10, VCPU_R10(k1)
+	LONG_S	$11, VCPU_R11(k1)
+	LONG_S	$12, VCPU_R12(k1)
+	LONG_S	$13, VCPU_R13(k1)
+	LONG_S	$14, VCPU_R14(k1)
+	LONG_S	$15, VCPU_R15(k1)
+	LONG_S	$16, VCPU_R16(k1)
+	LONG_S	$17, VCPU_R17(k1)
+	LONG_S	$18, VCPU_R18(k1)
+	LONG_S	$19, VCPU_R19(k1)
+	LONG_S	$20, VCPU_R20(k1)
+	LONG_S	$21, VCPU_R21(k1)
+	LONG_S	$22, VCPU_R22(k1)
+	LONG_S	$23, VCPU_R23(k1)
+	LONG_S	$24, VCPU_R24(k1)
+	LONG_S	$25, VCPU_R25(k1)
+
+	/* Guest k0/k1 saved later */
+
+	LONG_S	$28, VCPU_R28(k1)
+	LONG_S	$29, VCPU_R29(k1)
+	LONG_S	$30, VCPU_R30(k1)
+	LONG_S	$31, VCPU_R31(k1)
+
+	/* We need to save hi/lo and restore them on
+	 * the way out
+	 */
+	mfhi	t0
+	LONG_S	t0, VCPU_HI(k1)
+
+	mflo	t0
+	LONG_S	t0, VCPU_LO(k1)
+
+	/* Finally save guest k0/k1 to VCPU */
+	mfc0	t0, CP0_ERROREPC
+	LONG_S	t0, VCPU_R26(k1)
+
+	/* Get GUEST k1 and save it in VCPU */
+	PTR_LI	t1, ~0x2ff
+	mfc0	t0, CP0_EBASE
+	and	t0, t0, t1
+	LONG_L	t0, 0x3000(t0)
+	LONG_S	t0, VCPU_R27(k1)
+
+	/* Now that context has been saved, we can use other registers */
+
+	/* Restore vcpu */
+	mfc0	a1, CP0_DDATA_LO
+	move	s1, a1
+
+	/* Restore run (vcpu->run) */
+	LONG_L	a0, VCPU_RUN(a1)
+	/* Save pointer to run in s0, will be saved by the compiler */
+	move	s0, a0
+
+	/* Save Host level EPC, BadVaddr and Cause to VCPU, useful to
+	 * process the exception */
+	mfc0	k0,CP0_EPC
+	LONG_S	k0, VCPU_PC(k1)
+
+	mfc0	k0, CP0_BADVADDR
+	LONG_S	k0, VCPU_HOST_CP0_BADVADDR(k1)
+
+	mfc0	k0, CP0_CAUSE
+	LONG_S	k0, VCPU_HOST_CP0_CAUSE(k1)
+
+	mfc0	k0, CP0_ENTRYHI
+	LONG_S	k0, VCPU_HOST_ENTRYHI(k1)
+
+	/* Now restore the host state just enough to run the handlers */
+
+	/* Swtich EBASE to the one used by Linux */
+	/* load up the host EBASE */
+	mfc0	v0, CP0_STATUS
+
+	.set	at
+	or	k0, v0, ST0_BEV
+	.set	noat
+
+	mtc0	k0, CP0_STATUS
+	ehb
+
+	LONG_L	k0, VCPU_HOST_EBASE(k1)
+	mtc0	k0,CP0_EBASE
+
 
-    /* XXXKYMA do we need to load the host ASID, maybe not because the
-     * kernel entries are marked GLOBAL, need to verify
-     */
+	/* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
+	.set	at
+	and	v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
+	or	v0, v0, ST0_CU0
+	.set	noat
+	mtc0	v0, CP0_STATUS
+	ehb
+
+	/* Load up host GP */
+	LONG_L	gp, VCPU_HOST_GP(k1)
+
+	/* Need a stack before we can jump to "C" */
+	LONG_L	sp, VCPU_HOST_STACK(k1)
+
+	/* Saved host state */
+	INT_ADDIU sp, sp, -PT_SIZE
 
-    /* Restore host DDATA_LO */
-    LONG_L      k0, PT_HOST_USERLOCAL(sp)
-    mtc0        k0, CP0_DDATA_LO
+	/* XXXKYMA do we need to load the host ASID, maybe not because the
+	 * kernel entries are marked GLOBAL, need to verify
+	 */
 
-    /* Restore RDHWR access */
-    la      k0, 0x2000000F
-    mtc0    k0,  CP0_HWRENA
+	/* Restore host DDATA_LO */
+	LONG_L	k0, PT_HOST_USERLOCAL(sp)
+	mtc0	k0, CP0_DDATA_LO
 
-    /* Jump to handler */
+	/* Restore RDHWR access */
+	PTR_LI	k0, 0x2000000F
+	mtc0	k0, CP0_HWRENA
+
+	/* Jump to handler */
 FEXPORT(__kvm_mips_jump_to_handler)
-    /* XXXKYMA: not sure if this is safe, how large is the stack?? */
-    /* Now jump to the kvm_mips_handle_exit() to see if we can deal with this in the kernel */
-    la          t9,kvm_mips_handle_exit
-    jalr.hb     t9
-    addiu       sp,sp, -CALLFRAME_SIZ           /* BD Slot */
-
-    /* Return from handler Make sure interrupts are disabled */
-    di
-    ehb
-
-    /* XXXKYMA: k0/k1 could have been blown away if we processed an exception
-     * while we were handling the exception from the guest, reload k1
-     */
-    move        k1, s1
-	addiu		k1, k1, VCPU_HOST_ARCH
-
-    /* Check return value, should tell us if we are returning to the host (handle I/O etc)
-     * or resuming the guest
-     */
-    andi        t0, v0, RESUME_HOST
-    bnez        t0, __kvm_mips_return_to_host
-    nop
+	/* XXXKYMA: not sure if this is safe, how large is the stack??
+	 * Now jump to the kvm_mips_handle_exit() to see if we can deal
+	 * with this in the kernel */
+	PTR_LA	t9, kvm_mips_handle_exit
+	jalr.hb	t9
+	 INT_ADDIU sp, sp, -CALLFRAME_SIZ           /* BD Slot */
+
+	/* Return from handler Make sure interrupts are disabled */
+	di
+	ehb
+
+	/* XXXKYMA: k0/k1 could have been blown away if we processed
+	 * an exception while we were handling the exception from the
+	 * guest, reload k1
+	 */
+
+	move	k1, s1
+	INT_ADDIU k1, k1, VCPU_HOST_ARCH
+
+	/* Check return value, should tell us if we are returning to the
+	 * host (handle I/O etc)or resuming the guest
+	 */
+	andi	t0, v0, RESUME_HOST
+	bnez	t0, __kvm_mips_return_to_host
+	 nop
 
 __kvm_mips_return_to_guest:
-    /* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */
-    mtc0        s1, CP0_DDATA_LO
-
-    /* Load up the Guest EBASE to minimize the window where BEV is set */
-    LONG_L      t0, VCPU_GUEST_EBASE(k1)
-
-    /* Switch EBASE back to the one used by KVM */
-    mfc0        v1, CP0_STATUS
-    .set at
-	or          k0, v1, ST0_BEV
-    .set noat
-    mtc0        k0, CP0_STATUS
-    ehb
-    mtc0        t0,CP0_EBASE
-
-    /* Setup status register for running guest in UM */
-    .set at
-    or     v1, v1, (ST0_EXL | KSU_USER | ST0_IE)
-    and     v1, v1, ~ST0_CU0
-    .set noat
-    mtc0    v1, CP0_STATUS
-    ehb
+	/* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */
+	mtc0	s1, CP0_DDATA_LO
 
+	/* Load up the Guest EBASE to minimize the window where BEV is set */
+	LONG_L	t0, VCPU_GUEST_EBASE(k1)
+
+	/* Switch EBASE back to the one used by KVM */
+	mfc0	v1, CP0_STATUS
+	.set	at
+	or	k0, v1, ST0_BEV
+	.set	noat
+	mtc0	k0, CP0_STATUS
+	ehb
+	mtc0	t0, CP0_EBASE
+
+	/* Setup status register for running guest in UM */
+	.set	at
+	or	v1, v1, (ST0_EXL | KSU_USER | ST0_IE)
+	and	v1, v1, ~ST0_CU0
+	.set	noat
+	mtc0	v1, CP0_STATUS
+	ehb
 
 	/* Set Guest EPC */
-	LONG_L		t0, VCPU_PC(k1)
-	mtc0		t0, CP0_EPC
-
-    /* Set the ASID for the Guest Kernel */
-    sll         t0, t0, 1                       /* with kseg0 @ 0x40000000, kernel */
-                                                /* addresses shift to 0x80000000 */
-    bltz        t0, 1f                          /* If kernel */
-	addiu       t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
-    addiu       t1, k1, VCPU_GUEST_USER_ASID    /* else user */
+	LONG_L	t0, VCPU_PC(k1)
+	mtc0	t0, CP0_EPC
+
+	/* Set the ASID for the Guest Kernel */
+	INT_SLL	t0, t0, 1	/* with kseg0 @ 0x40000000, kernel */
+				/* addresses shift to 0x80000000 */
+	bltz	t0, 1f		/* If kernel */
+	 INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
+	INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID    /* else user */
 1:
-    /* t1: contains the base of the ASID array, need to get the cpu id  */
-    LONG_L      t2, TI_CPU($28)             /* smp_processor_id */
-    sll         t2, t2, 2                   /* x4 */
-    addu        t3, t1, t2
-    LONG_L      k0, (t3)
-    andi        k0, k0, 0xff
-	mtc0		k0,CP0_ENTRYHI
-    ehb
-
-    /* Disable RDHWR access */
-    mtc0    zero,  CP0_HWRENA
-
-    /* load the guest context from VCPU and return */
-    LONG_L  $0, VCPU_R0(k1)
-    LONG_L  $1, VCPU_R1(k1)
-    LONG_L  $2, VCPU_R2(k1)
-    LONG_L  $3, VCPU_R3(k1)
-    LONG_L  $4, VCPU_R4(k1)
-    LONG_L  $5, VCPU_R5(k1)
-    LONG_L  $6, VCPU_R6(k1)
-    LONG_L  $7, VCPU_R7(k1)
-    LONG_L  $8, VCPU_R8(k1)
-    LONG_L  $9, VCPU_R9(k1)
-    LONG_L  $10, VCPU_R10(k1)
-    LONG_L  $11, VCPU_R11(k1)
-    LONG_L  $12, VCPU_R12(k1)
-    LONG_L  $13, VCPU_R13(k1)
-    LONG_L  $14, VCPU_R14(k1)
-    LONG_L  $15, VCPU_R15(k1)
-    LONG_L  $16, VCPU_R16(k1)
-    LONG_L  $17, VCPU_R17(k1)
-    LONG_L  $18, VCPU_R18(k1)
-    LONG_L  $19, VCPU_R19(k1)
-    LONG_L  $20, VCPU_R20(k1)
-    LONG_L  $21, VCPU_R21(k1)
-    LONG_L  $22, VCPU_R22(k1)
-    LONG_L  $23, VCPU_R23(k1)
-    LONG_L  $24, VCPU_R24(k1)
-    LONG_L  $25, VCPU_R25(k1)
-
-    /* $/k1 loaded later */
-    LONG_L  $28, VCPU_R28(k1)
-    LONG_L  $29, VCPU_R29(k1)
-    LONG_L  $30, VCPU_R30(k1)
-    LONG_L  $31, VCPU_R31(k1)
+	/* t1: contains the base of the ASID array, need to get the cpu id  */
+	LONG_L	t2, TI_CPU($28)		/* smp_processor_id */
+	INT_SLL	t2, t2, 2		/* x4 */
+	REG_ADDU t3, t1, t2
+	LONG_L	k0, (t3)
+	andi	k0, k0, 0xff
+	mtc0	k0,CP0_ENTRYHI
+	ehb
+
+	/* Disable RDHWR access */
+	mtc0    zero,  CP0_HWRENA
+
+	/* load the guest context from VCPU and return */
+	LONG_L	$0, VCPU_R0(k1)
+	LONG_L	$1, VCPU_R1(k1)
+	LONG_L	$2, VCPU_R2(k1)
+	LONG_L	$3, VCPU_R3(k1)
+	LONG_L	$4, VCPU_R4(k1)
+	LONG_L	$5, VCPU_R5(k1)
+	LONG_L	$6, VCPU_R6(k1)
+	LONG_L	$7, VCPU_R7(k1)
+	LONG_L	$8, VCPU_R8(k1)
+	LONG_L	$9, VCPU_R9(k1)
+	LONG_L	$10, VCPU_R10(k1)
+	LONG_L	$11, VCPU_R11(k1)
+	LONG_L	$12, VCPU_R12(k1)
+	LONG_L	$13, VCPU_R13(k1)
+	LONG_L	$14, VCPU_R14(k1)
+	LONG_L	$15, VCPU_R15(k1)
+	LONG_L	$16, VCPU_R16(k1)
+	LONG_L	$17, VCPU_R17(k1)
+	LONG_L	$18, VCPU_R18(k1)
+	LONG_L	$19, VCPU_R19(k1)
+	LONG_L	$20, VCPU_R20(k1)
+	LONG_L	$21, VCPU_R21(k1)
+	LONG_L	$22, VCPU_R22(k1)
+	LONG_L	$23, VCPU_R23(k1)
+	LONG_L	$24, VCPU_R24(k1)
+	LONG_L	$25, VCPU_R25(k1)
+
+	/* $/k1 loaded later */
+	LONG_L	$28, VCPU_R28(k1)
+	LONG_L	$29, VCPU_R29(k1)
+	LONG_L	$30, VCPU_R30(k1)
+	LONG_L	$31, VCPU_R31(k1)
 
 FEXPORT(__kvm_mips_skip_guest_restore)
-    LONG_L  k0, VCPU_HI(k1)
-    mthi    k0
+	LONG_L	k0, VCPU_HI(k1)
+	mthi	k0
 
-    LONG_L  k0, VCPU_LO(k1)
-    mtlo    k0
+	LONG_L	k0, VCPU_LO(k1)
+	mtlo	k0
 
-    LONG_L  k0, VCPU_R26(k1)
-    LONG_L  k1, VCPU_R27(k1)
+	LONG_L	k0, VCPU_R26(k1)
+	LONG_L	k1, VCPU_R27(k1)
 
-    eret
+	eret
 
 __kvm_mips_return_to_host:
-    /* EBASE is already pointing to Linux */
-    LONG_L  k1, VCPU_HOST_STACK(k1)
-	addiu  	k1,k1, -PT_SIZE
-
-    /* Restore host DDATA_LO */
-    LONG_L      k0, PT_HOST_USERLOCAL(k1)
-    mtc0        k0, CP0_DDATA_LO
-
-    /* Restore host ASID */
-    LONG_L      k0, PT_HOST_ASID(sp)
-    andi        k0, 0xff
-    mtc0        k0,CP0_ENTRYHI
-    ehb
-
-    /* Load context saved on the host stack */
-    LONG_L  $0, PT_R0(k1)
-    LONG_L  $1, PT_R1(k1)
-
-    /* r2/v0 is the return code, shift it down by 2 (arithmetic) to recover the err code  */
-    sra     k0, v0, 2
-    move    $2, k0
-
-    LONG_L  $3, PT_R3(k1)
-    LONG_L  $4, PT_R4(k1)
-    LONG_L  $5, PT_R5(k1)
-    LONG_L  $6, PT_R6(k1)
-    LONG_L  $7, PT_R7(k1)
-    LONG_L  $8, PT_R8(k1)
-    LONG_L  $9, PT_R9(k1)
-    LONG_L  $10, PT_R10(k1)
-    LONG_L  $11, PT_R11(k1)
-    LONG_L  $12, PT_R12(k1)
-    LONG_L  $13, PT_R13(k1)
-    LONG_L  $14, PT_R14(k1)
-    LONG_L  $15, PT_R15(k1)
-    LONG_L  $16, PT_R16(k1)
-    LONG_L  $17, PT_R17(k1)
-    LONG_L  $18, PT_R18(k1)
-    LONG_L  $19, PT_R19(k1)
-    LONG_L  $20, PT_R20(k1)
-    LONG_L  $21, PT_R21(k1)
-    LONG_L  $22, PT_R22(k1)
-    LONG_L  $23, PT_R23(k1)
-    LONG_L  $24, PT_R24(k1)
-    LONG_L  $25, PT_R25(k1)
-
-    /* Host k0/k1 were not saved */
-
-    LONG_L  $28, PT_R28(k1)
-    LONG_L  $29, PT_R29(k1)
-    LONG_L  $30, PT_R30(k1)
-
-    LONG_L  k0, PT_HI(k1)
-    mthi    k0
-
-    LONG_L  k0, PT_LO(k1)
-    mtlo    k0
-
-    /* Restore RDHWR access */
-    la      k0, 0x2000000F
-    mtc0    k0,  CP0_HWRENA
-
-
-    /* Restore RA, which is the address we will return to */
-    LONG_L  ra, PT_R31(k1)
-    j       ra
-    nop
-
-    .set    pop
+	/* EBASE is already pointing to Linux */
+	LONG_L	k1, VCPU_HOST_STACK(k1)
+	INT_ADDIU k1,k1, -PT_SIZE
+
+	/* Restore host DDATA_LO */
+	LONG_L	k0, PT_HOST_USERLOCAL(k1)
+	mtc0	k0, CP0_DDATA_LO
+
+	/* Restore host ASID */
+	LONG_L	k0, PT_HOST_ASID(sp)
+	andi	k0, 0xff
+	mtc0	k0,CP0_ENTRYHI
+	ehb
+
+	/* Load context saved on the host stack */
+	LONG_L	$0, PT_R0(k1)
+	LONG_L	$1, PT_R1(k1)
+
+	/* r2/v0 is the return code, shift it down by 2 (arithmetic)
+	 * to recover the err code  */
+	INT_SRA	k0, v0, 2
+	move	$2, k0
+
+	LONG_L	$3, PT_R3(k1)
+	LONG_L	$4, PT_R4(k1)
+	LONG_L	$5, PT_R5(k1)
+	LONG_L	$6, PT_R6(k1)
+	LONG_L	$7, PT_R7(k1)
+	LONG_L	$8, PT_R8(k1)
+	LONG_L	$9, PT_R9(k1)
+	LONG_L	$10, PT_R10(k1)
+	LONG_L	$11, PT_R11(k1)
+	LONG_L	$12, PT_R12(k1)
+	LONG_L	$13, PT_R13(k1)
+	LONG_L	$14, PT_R14(k1)
+	LONG_L	$15, PT_R15(k1)
+	LONG_L	$16, PT_R16(k1)
+	LONG_L	$17, PT_R17(k1)
+	LONG_L	$18, PT_R18(k1)
+	LONG_L	$19, PT_R19(k1)
+	LONG_L	$20, PT_R20(k1)
+	LONG_L	$21, PT_R21(k1)
+	LONG_L	$22, PT_R22(k1)
+	LONG_L	$23, PT_R23(k1)
+	LONG_L	$24, PT_R24(k1)
+	LONG_L	$25, PT_R25(k1)
+
+	/* Host k0/k1 were not saved */
+
+	LONG_L	$28, PT_R28(k1)
+	LONG_L	$29, PT_R29(k1)
+	LONG_L	$30, PT_R30(k1)
+
+	LONG_L	k0, PT_HI(k1)
+	mthi	k0
+
+	LONG_L	k0, PT_LO(k1)
+	mtlo	k0
+
+	/* Restore RDHWR access */
+	PTR_LI	k0, 0x2000000F
+	mtc0	k0,  CP0_HWRENA
+
+
+	/* Restore RA, which is the address we will return to */
+	LONG_L  ra, PT_R31(k1)
+	j       ra
+	 nop
+
 VECTOR_END(MIPSX(GuestExceptionEnd))
 .end MIPSX(GuestException)
 
@@ -627,24 +623,23 @@ MIPSX(exceptions):
 
 #define HW_SYNCI_Step       $1
 LEAF(MIPSX(SyncICache))
-    .set    push
+	.set	push
 	.set	mips32r2
-    beq     a1, zero, 20f
-    nop
-    addu    a1, a0, a1
-    rdhwr   v0, HW_SYNCI_Step
-    beq     v0, zero, 20f
-    nop
-
+	beq	a1, zero, 20f
+	 nop
+	REG_ADDU a1, a0, a1
+	rdhwr	v0, HW_SYNCI_Step
+	beq	v0, zero, 20f
+	 nop
 10:
-    synci   0(a0)
-    addu    a0, a0, v0
-    sltu    v1, a0, a1
-    bne     v1, zero, 10b
-    nop
-    sync
+	synci	0(a0)
+	REG_ADDU a0, a0, v0
+	sltu	v1, a0, a1
+	bne	v1, zero, 10b
+	 nop
+	sync
 20:
-    jr.hb   ra
-    nop
-    .set pop
+	jr.hb	ra
+	 nop
+	.set	pop
 END(MIPSX(SyncICache))
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 08891d07aeb6..fa19e2f1a874 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -334,6 +334,27 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+/*
+ * Like kvmppc_get_last_inst(), but for fetching a sc instruction.
+ * Because the sc instruction sets SRR0 to point to the following
+ * instruction, we have to fetch from pc - 4.
+ */
+static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
+{
+	ulong pc = kvmppc_get_pc(vcpu) - 4;
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+	u32 r;
+
+	/* Load the instruction manually if it failed to do so in the
+	 * exit path */
+	if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
+		kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
+
+	r = svcpu->last_inst;
+	svcpu_put(svcpu);
+	return r;
+}
+
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
@@ -446,6 +467,23 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
 	return vcpu->arch.last_inst;
 }
 
+/*
+ * Like kvmppc_get_last_inst(), but for fetching a sc instruction.
+ * Because the sc instruction sets SRR0 to point to the following
+ * instruction, we have to fetch from pc - 4.
+ */
+static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
+{
+	ulong pc = kvmppc_get_pc(vcpu) - 4;
+
+	/* Load the instruction manually if it failed to do so in the
+	 * exit path */
+	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
+		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
+
+	return vcpu->arch.last_inst;
+}
+
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault_dar;
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index a1ecb14e4442..86d638a3b359 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,7 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
-extern int kvm_hpt_order;		/* order of preallocated HPTs */
+extern unsigned long kvm_rma_pages;
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
@@ -100,7 +100,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 			/* (masks depend on page size) */
 			rb |= 0x1000;		/* page encoding in LP field */
 			rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
-			rb |= (va_low & 0xfe);	/* AVAL field (P7 doesn't seem to care) */
+			rb |= ((va_low << 4) & 0xf0);	/* AVAL field (P7 doesn't seem to care) */
 		}
 	} else {
 		/* 4kB page */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index af326cde7cb6..33283532e9d8 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table {
 	struct page *pages[0];
 };
 
-struct kvmppc_linear_info {
-	void		*base_virt;
-	unsigned long	 base_pfn;
-	unsigned long	 npages;
-	struct list_head list;
-	atomic_t	 use_count;
-	int		 type;
+struct kvm_rma_info {
+	atomic_t use_count;
+	unsigned long base_pfn;
 };
 
 /* XICS components, defined in book3s_xics.c */
@@ -246,7 +242,7 @@ struct kvm_arch {
 	int tlbie_lock;
 	unsigned long lpcr;
 	unsigned long rmor;
-	struct kvmppc_linear_info *rma;
+	struct kvm_rma_info *rma;
 	unsigned long vrma_slb_v;
 	int rma_setup_done;
 	int using_mmu_notifiers;
@@ -259,7 +255,7 @@ struct kvm_arch {
 	spinlock_t slot_phys_lock;
 	cpumask_t need_tlb_flush;
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
-	struct kvmppc_linear_info *hpt_li;
+	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a5287fe03d77..b15554a26c20 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -137,10 +137,10 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba, unsigned long tce);
 extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
 				struct kvm_allocate_rma *rma);
-extern struct kvmppc_linear_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvmppc_linear_info *ri);
-extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
-extern void kvm_release_hpt(struct kvmppc_linear_info *li);
+extern struct kvm_rma_info *kvm_alloc_rma(void);
+extern void kvm_release_rma(struct kvm_rma_info *ri);
+extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
+extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
 extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
@@ -261,6 +261,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 struct openpic;
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
+extern void kvm_cma_reserve(void) __init;
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {
 	paca[cpu].kvm_hstate.xics_phys = addr;
@@ -281,13 +282,12 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
 }
 
 extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
-extern void kvm_linear_init(void);
 
 #else
-static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
+static inline void __init kvm_cma_reserve(void)
 {}
 
-static inline void kvm_linear_init(void)
+static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {}
 
 static inline u32 kvmppc_get_xics_latch(void)
@@ -394,10 +394,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
 	}
 }
 
-/* Please call after prepare_to_enter. This function puts the lazy ee state
-   back to normal mode, without actually enabling interrupts. */
-static inline void kvmppc_lazy_ee_enable(void)
+/*
+ * Please call after prepare_to_enter. This function puts the lazy ee and irq
+ * disabled tracking state back to normal mode, without actually enabling
+ * interrupts.
+ */
+static inline void kvmppc_fix_ee_before_entry(void)
 {
+	trace_hardirqs_on();
+
 #ifdef CONFIG_PPC64
 	/* Only need to enable IRQs by hard enabling them after this */
 	local_paca->irq_happened = 0;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c7e8afc2ead0..26098c20936d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -451,6 +451,7 @@ int main(void)
 	DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
 	DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
 #endif
+	DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
 	DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
 	DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
 	DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6));
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 389fb8077cc9..fe6a58c9f0b7 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -229,6 +229,8 @@ void __init early_setup(unsigned long dt_ptr)
 	/* Initialize the hash table or TLB handling */
 	early_init_mmu();
 
+	kvm_cma_reserve();
+
 	/*
 	 * Reserve any gigantic pages requested on the command line.
 	 * memblock needs to have been initialized by the time this is
@@ -609,8 +611,6 @@ void __init setup_arch(char **cmdline_p)
 	/* Initialize the MMU context management stuff */
 	mmu_context_init();
 
-	kvm_linear_init();
-
 	/* Interrupt code needs to be 64K-aligned */
 	if ((unsigned long)_stext & 0xffff)
 		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index eb643f862579..ffaef2cb101a 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV
 	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
 	depends on KVM_BOOK3S_64
 	select MMU_NOTIFIER
+	select CMA
 	---help---
 	  Support running unmodified book3s_64 guest kernels in
 	  virtual machines on POWER7 and PPC970 processors that have
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 008cd856c5b5..6646c952c5e3 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -81,6 +81,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 	book3s_64_vio_hv.o \
 	book3s_hv_ras.o \
 	book3s_hv_builtin.o \
+	book3s_hv_cma.o \
 	$(kvm-book3s_64-builtin-xics-objs-y)
 
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 739bfbadb85e..7e345e00661a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -182,10 +182,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	hva_t ptegp;
 	u64 pteg[16];
 	u64 avpn = 0;
+	u64 v, r;
+	u64 v_val, v_mask;
+	u64 eaddr_mask;
 	int i;
-	u8 key = 0;
+	u8 pp, key = 0;
 	bool found = false;
-	int second = 0;
+	bool second = false;
 	ulong mp_ea = vcpu->arch.magic_page_ea;
 
 	/* Magic page override */
@@ -208,8 +211,16 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 		goto no_seg_found;
 
 	avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
+	v_val = avpn & HPTE_V_AVPN;
+
 	if (slbe->tb)
-		avpn |= SLB_VSID_B_1T;
+		v_val |= SLB_VSID_B_1T;
+	if (slbe->large)
+		v_val |= HPTE_V_LARGE;
+	v_val |= HPTE_V_VALID;
+
+	v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
+		HPTE_V_SECONDARY;
 
 do_second:
 	ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
@@ -227,91 +238,74 @@ do_second:
 		key = 4;
 
 	for (i=0; i<16; i+=2) {
-		u64 v = pteg[i];
-		u64 r = pteg[i+1];
-
-		/* Valid check */
-		if (!(v & HPTE_V_VALID))
-			continue;
-		/* Hash check */
-		if ((v & HPTE_V_SECONDARY) != second)
-			continue;
-
-		/* AVPN compare */
-		if (HPTE_V_COMPARE(avpn, v)) {
-			u8 pp = (r & HPTE_R_PP) | key;
-			int eaddr_mask = 0xFFF;
-
-			gpte->eaddr = eaddr;
-			gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu,
-								    eaddr,
-								    data);
-			if (slbe->large)
-				eaddr_mask = 0xFFFFFF;
-			gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask);
-			gpte->may_execute = ((r & HPTE_R_N) ? false : true);
-			gpte->may_read = false;
-			gpte->may_write = false;
-
-			switch (pp) {
-			case 0:
-			case 1:
-			case 2:
-			case 6:
-				gpte->may_write = true;
-				/* fall through */
-			case 3:
-			case 5:
-			case 7:
-				gpte->may_read = true;
-				break;
-			}
-
-			dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
-				"-> 0x%lx\n",
-				eaddr, avpn, gpte->vpage, gpte->raddr);
+		/* Check all relevant fields of 1st dword */
+		if ((pteg[i] & v_mask) == v_val) {
 			found = true;
 			break;
 		}
 	}
 
-	/* Update PTE R and C bits, so the guest's swapper knows we used the
-	 * page */
-	if (found) {
-		u32 oldr = pteg[i+1];
+	if (!found) {
+		if (second)
+			goto no_page_found;
+		v_val |= HPTE_V_SECONDARY;
+		second = true;
+		goto do_second;
+	}
 
-		if (gpte->may_read) {
-			/* Set the accessed flag */
-			pteg[i+1] |= HPTE_R_R;
-		}
-		if (gpte->may_write) {
-			/* Set the dirty flag */
-			pteg[i+1] |= HPTE_R_C;
-		} else {
-			dprintk("KVM: Mapping read-only page!\n");
-		}
+	v = pteg[i];
+	r = pteg[i+1];
+	pp = (r & HPTE_R_PP) | key;
+	eaddr_mask = 0xFFF;
+
+	gpte->eaddr = eaddr;
+	gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
+	if (slbe->large)
+		eaddr_mask = 0xFFFFFF;
+	gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
+	gpte->may_execute = ((r & HPTE_R_N) ? false : true);
+	gpte->may_read = false;
+	gpte->may_write = false;
+
+	switch (pp) {
+	case 0:
+	case 1:
+	case 2:
+	case 6:
+		gpte->may_write = true;
+		/* fall through */
+	case 3:
+	case 5:
+	case 7:
+		gpte->may_read = true;
+		break;
+	}
 
-		/* Write back into the PTEG */
-		if (pteg[i+1] != oldr)
-			copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
+	dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
+		"-> 0x%lx\n",
+		eaddr, avpn, gpte->vpage, gpte->raddr);
 
-		if (!gpte->may_read)
-			return -EPERM;
-		return 0;
-	} else {
-		dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx "
-			"ptegp=0x%lx)\n",
-			eaddr, to_book3s(vcpu)->sdr1, ptegp);
-		for (i = 0; i < 16; i += 2)
-			dprintk("   %02d: 0x%llx - 0x%llx (0x%llx)\n",
-				i, pteg[i], pteg[i+1], avpn);
-
-		if (!second) {
-			second = HPTE_V_SECONDARY;
-			goto do_second;
-		}
+	/* Update PTE R and C bits, so the guest's swapper knows we used the
+	 * page */
+	if (gpte->may_read) {
+		/* Set the accessed flag */
+		r |= HPTE_R_R;
+	}
+	if (data && gpte->may_write) {
+		/* Set the dirty flag -- XXX even if not writing */
+		r |= HPTE_R_C;
+	}
+
+	/* Write back into the PTEG */
+	if (pteg[i+1] != r) {
+		pteg[i+1] = r;
+		copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
 	}
 
+	if (!gpte->may_read)
+		return -EPERM;
+	return 0;
+
 no_page_found:
 	return -ENOENT;
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 710d31317d81..043eec8461e7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,6 +37,8 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
+#include "book3s_hv_cma.h"
+
 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
 #define MAX_LPID_970	63
 
@@ -52,8 +54,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 {
 	unsigned long hpt;
 	struct revmap_entry *rev;
-	struct kvmppc_linear_info *li;
-	long order = kvm_hpt_order;
+	struct page *page = NULL;
+	long order = KVM_DEFAULT_HPT_ORDER;
 
 	if (htab_orderp) {
 		order = *htab_orderp;
@@ -61,26 +63,23 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 			order = PPC_MIN_HPT_ORDER;
 	}
 
+	kvm->arch.hpt_cma_alloc = 0;
 	/*
-	 * If the user wants a different size from default,
 	 * try first to allocate it from the kernel page allocator.
+	 * We keep the CMA reserved for failed allocation.
 	 */
-	hpt = 0;
-	if (order != kvm_hpt_order) {
-		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
-				       __GFP_NOWARN, order - PAGE_SHIFT);
-		if (!hpt)
-			--order;
-	}
+	hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT |
+			       __GFP_NOWARN, order - PAGE_SHIFT);
 
 	/* Next try to allocate from the preallocated pool */
 	if (!hpt) {
-		li = kvm_alloc_hpt();
-		if (li) {
-			hpt = (ulong)li->base_virt;
-			kvm->arch.hpt_li = li;
-			order = kvm_hpt_order;
-		}
+		VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
+		page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
+		if (page) {
+			hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+			kvm->arch.hpt_cma_alloc = 1;
+		} else
+			--order;
 	}
 
 	/* Lastly try successively smaller sizes from the page allocator */
@@ -118,8 +117,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 	return 0;
 
  out_freehpt:
-	if (kvm->arch.hpt_li)
-		kvm_release_hpt(kvm->arch.hpt_li);
+	if (kvm->arch.hpt_cma_alloc)
+		kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
 	else
 		free_pages(hpt, order - PAGE_SHIFT);
 	return -ENOMEM;
@@ -165,8 +164,9 @@ void kvmppc_free_hpt(struct kvm *kvm)
 {
 	kvmppc_free_lpid(kvm->arch.lpid);
 	vfree(kvm->arch.revmap);
-	if (kvm->arch.hpt_li)
-		kvm_release_hpt(kvm->arch.hpt_li);
+	if (kvm->arch.hpt_cma_alloc)
+		kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
+				1 << (kvm->arch.hpt_order - PAGE_SHIFT));
 	else
 		free_pages(kvm->arch.hpt_virt,
 			   kvm->arch.hpt_order - PAGE_SHIFT);
@@ -1579,7 +1579,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
 	ctx->first_pass = 1;
 
 	rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
-	ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag);
+	ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
 	if (ret < 0) {
 		kvm_put_kvm(kvm);
 		return ret;
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index b2d3f3b2de72..54cf9bc94dad 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -136,7 +136,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	mutex_unlock(&kvm->lock);
 
 	return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
-				stt, O_RDWR);
+				stt, O_RDWR | O_CLOEXEC);
 
 fail:
 	if (stt) {
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 1f6344c4408d..360ce68c9809 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -458,6 +458,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	case SPRN_PMC4_GEKKO:
 	case SPRN_WPAR_GEKKO:
 	case SPRN_MSSSR0:
+	case SPRN_DABR:
 		break;
 unprivileged:
 	default:
@@ -555,6 +556,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 	case SPRN_PMC4_GEKKO:
 	case SPRN_WPAR_GEKKO:
 	case SPRN_MSSSR0:
+	case SPRN_DABR:
 		*spr_val = 0;
 		break;
 	default:
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2efa9dde741a..b0ee3bc9ca76 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -680,13 +680,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 }
 
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
+				  struct kvm_sregs *sregs)
 {
 	int i;
 
-	sregs->pvr = vcpu->arch.pvr;
-
 	memset(sregs, 0, sizeof(struct kvm_sregs));
+	sregs->pvr = vcpu->arch.pvr;
 	for (i = 0; i < vcpu->arch.slb_max; i++) {
 		sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
 		sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
@@ -696,7 +695,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
+				  struct kvm_sregs *sregs)
 {
 	int i, j;
 
@@ -1511,10 +1510,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
 
 static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
 	struct page *page;
+	struct kvm_rma_info *ri = vma->vm_file->private_data;
 
-	if (vmf->pgoff >= ri->npages)
+	if (vmf->pgoff >= kvm_rma_pages)
 		return VM_FAULT_SIGBUS;
 
 	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
@@ -1536,7 +1535,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int kvm_rma_release(struct inode *inode, struct file *filp)
 {
-	struct kvmppc_linear_info *ri = filp->private_data;
+	struct kvm_rma_info *ri = filp->private_data;
 
 	kvm_release_rma(ri);
 	return 0;
@@ -1549,18 +1548,27 @@ static const struct file_operations kvm_rma_fops = {
 
 long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 {
-	struct kvmppc_linear_info *ri;
 	long fd;
+	struct kvm_rma_info *ri;
+	/*
+	 * Only do this on PPC970 in HV mode
+	 */
+	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+	    !cpu_has_feature(CPU_FTR_ARCH_201))
+		return -EINVAL;
+
+	if (!kvm_rma_pages)
+		return -EINVAL;
 
 	ri = kvm_alloc_rma();
 	if (!ri)
 		return -ENOMEM;
 
-	fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR);
+	fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
 	if (fd < 0)
 		kvm_release_rma(ri);
 
-	ret->rma_size = ri->npages << PAGE_SHIFT;
+	ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
 	return fd;
 }
 
@@ -1725,7 +1733,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
 	int err = 0;
 	struct kvm *kvm = vcpu->kvm;
-	struct kvmppc_linear_info *ri = NULL;
+	struct kvm_rma_info *ri = NULL;
 	unsigned long hva;
 	struct kvm_memory_slot *memslot;
 	struct vm_area_struct *vma;
@@ -1803,13 +1811,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 
 	} else {
 		/* Set up to use an RMO region */
-		rma_size = ri->npages;
+		rma_size = kvm_rma_pages;
 		if (rma_size > memslot->npages)
 			rma_size = memslot->npages;
 		rma_size <<= PAGE_SHIFT;
 		rmls = lpcr_rmls(rma_size);
 		err = -EINVAL;
-		if (rmls < 0) {
+		if ((long)rmls < 0) {
 			pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
 			goto out_srcu;
 		}
@@ -1831,14 +1839,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 			/* POWER7 */
 			lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
 			lpcr |= rmls << LPCR_RMLS_SH;
-			kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
+			kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
 		}
 		kvm->arch.lpcr = lpcr;
 		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
 			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
 
 		/* Initialize phys addrs of pages in RMO */
-		npages = ri->npages;
+		npages = kvm_rma_pages;
 		porder = __ilog2(npages);
 		physp = memslot->arch.slot_phys;
 		if (physp) {
@@ -1874,7 +1882,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 	/* Allocate the guest's logical partition ID */
 
 	lpid = kvmppc_alloc_lpid();
-	if (lpid < 0)
+	if ((long)lpid < 0)
 		return -ENOMEM;
 	kvm->arch.lpid = lpid;
 
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ec0a9e5de100..8cd0daebb82d 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -13,33 +13,34 @@
 #include <linux/spinlock.h>
 #include <linux/bootmem.h>
 #include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/sizes.h>
 
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 
-#define KVM_LINEAR_RMA		0
-#define KVM_LINEAR_HPT		1
-
-static void __init kvm_linear_init_one(ulong size, int count, int type);
-static struct kvmppc_linear_info *kvm_alloc_linear(int type);
-static void kvm_release_linear(struct kvmppc_linear_info *ri);
-
-int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
-EXPORT_SYMBOL_GPL(kvm_hpt_order);
-
-/*************** RMA *************/
-
+#include "book3s_hv_cma.h"
+/*
+ * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
+ * should be power of 2.
+ */
+#define HPT_ALIGN_PAGES		((1 << 18) >> PAGE_SHIFT) /* 256k */
+/*
+ * By default we reserve 5% of memory for hash pagetable allocation.
+ */
+static unsigned long kvm_cma_resv_ratio = 5;
 /*
- * This maintains a list of RMAs (real mode areas) for KVM guests to use.
+ * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
  * Each RMA has to be physically contiguous and of a size that the
  * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
  * and other larger sizes.  Since we are unlikely to be allocate that
  * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot for KVM to use.
+ * we preallocate a set of RMAs in early boot using CMA.
+ * should be power of 2.
  */
-static unsigned long kvm_rma_size = 64 << 20;	/* 64MB */
-static unsigned long kvm_rma_count;
+unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
+EXPORT_SYMBOL_GPL(kvm_rma_pages);
 
 /* Work out RMLS (real mode limit selector) field value for a given RMA size.
    Assumes POWER7 or PPC970. */
@@ -69,165 +70,114 @@ static inline int lpcr_rmls(unsigned long rma_size)
 
 static int __init early_parse_rma_size(char *p)
 {
-	if (!p)
-		return 1;
+	unsigned long kvm_rma_size;
 
+	pr_debug("%s(%s)\n", __func__, p);
+	if (!p)
+		return -EINVAL;
 	kvm_rma_size = memparse(p, &p);
-
+	/*
+	 * Check that the requested size is one supported in hardware
+	 */
+	if (lpcr_rmls(kvm_rma_size) < 0) {
+		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
+		return -EINVAL;
+	}
+	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
 	return 0;
 }
 early_param("kvm_rma_size", early_parse_rma_size);
 
-static int __init early_parse_rma_count(char *p)
+struct kvm_rma_info *kvm_alloc_rma()
 {
-	if (!p)
-		return 1;
-
-	kvm_rma_count = simple_strtoul(p, NULL, 0);
-
-	return 0;
-}
-early_param("kvm_rma_count", early_parse_rma_count);
-
-struct kvmppc_linear_info *kvm_alloc_rma(void)
-{
-	return kvm_alloc_linear(KVM_LINEAR_RMA);
+	struct page *page;
+	struct kvm_rma_info *ri;
+
+	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
+	if (!ri)
+		return NULL;
+	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+	if (!page)
+		goto err_out;
+	atomic_set(&ri->use_count, 1);
+	ri->base_pfn = page_to_pfn(page);
+	return ri;
+err_out:
+	kfree(ri);
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_rma);
 
-void kvm_release_rma(struct kvmppc_linear_info *ri)
+void kvm_release_rma(struct kvm_rma_info *ri)
 {
-	kvm_release_linear(ri);
+	if (atomic_dec_and_test(&ri->use_count)) {
+		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+		kfree(ri);
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_release_rma);
 
-/*************** HPT *************/
-
-/*
- * This maintains a list of big linear HPT tables that contain the GVA->HPA
- * memory mappings. If we don't reserve those early on, we might not be able
- * to get a big (usually 16MB) linear memory region from the kernel anymore.
- */
-
-static unsigned long kvm_hpt_count;
-
-static int __init early_parse_hpt_count(char *p)
+static int __init early_parse_kvm_cma_resv(char *p)
 {
+	pr_debug("%s(%s)\n", __func__, p);
 	if (!p)
-		return 1;
-
-	kvm_hpt_count = simple_strtoul(p, NULL, 0);
-
-	return 0;
+		return -EINVAL;
+	return kstrtoul(p, 0, &kvm_cma_resv_ratio);
 }
-early_param("kvm_hpt_count", early_parse_hpt_count);
+early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
 
-struct kvmppc_linear_info *kvm_alloc_hpt(void)
+struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
-	return kvm_alloc_linear(KVM_LINEAR_HPT);
+	unsigned long align_pages = HPT_ALIGN_PAGES;
+
+	/* Old CPUs require HPT aligned on a multiple of its size */
+	if (!cpu_has_feature(CPU_FTR_ARCH_206))
+		align_pages = nr_pages;
+	return kvm_alloc_cma(nr_pages, align_pages);
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
-void kvm_release_hpt(struct kvmppc_linear_info *li)
+void kvm_release_hpt(struct page *page, unsigned long nr_pages)
 {
-	kvm_release_linear(li);
+	kvm_release_cma(page, nr_pages);
 }
 EXPORT_SYMBOL_GPL(kvm_release_hpt);
 
-/*************** generic *************/
-
-static LIST_HEAD(free_linears);
-static DEFINE_SPINLOCK(linear_lock);
-
-static void __init kvm_linear_init_one(ulong size, int count, int type)
-{
-	unsigned long i;
-	unsigned long j, npages;
-	void *linear;
-	struct page *pg;
-	const char *typestr;
-	struct kvmppc_linear_info *linear_info;
-
-	if (!count)
-		return;
-
-	typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
-
-	npages = size >> PAGE_SHIFT;
-	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
-	for (i = 0; i < count; ++i) {
-		linear = alloc_bootmem_align(size, size);
-		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
-			 size >> 20);
-		linear_info[i].base_virt = linear;
-		linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
-		linear_info[i].npages = npages;
-		linear_info[i].type = type;
-		list_add_tail(&linear_info[i].list, &free_linears);
-		atomic_set(&linear_info[i].use_count, 0);
-
-		pg = pfn_to_page(linear_info[i].base_pfn);
-		for (j = 0; j < npages; ++j) {
-			atomic_inc(&pg->_count);
-			++pg;
-		}
-	}
-}
-
-static struct kvmppc_linear_info *kvm_alloc_linear(int type)
-{
-	struct kvmppc_linear_info *ri, *ret;
-
-	ret = NULL;
-	spin_lock(&linear_lock);
-	list_for_each_entry(ri, &free_linears, list) {
-		if (ri->type != type)
-			continue;
-
-		list_del(&ri->list);
-		atomic_inc(&ri->use_count);
-		memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
-		ret = ri;
-		break;
-	}
-	spin_unlock(&linear_lock);
-	return ret;
-}
-
-static void kvm_release_linear(struct kvmppc_linear_info *ri)
-{
-	if (atomic_dec_and_test(&ri->use_count)) {
-		spin_lock(&linear_lock);
-		list_add_tail(&ri->list, &free_linears);
-		spin_unlock(&linear_lock);
-
-	}
-}
-
-/*
- * Called at boot time while the bootmem allocator is active,
- * to allocate contiguous physical memory for the hash page
- * tables for guests.
+/**
+ * kvm_cma_reserve() - reserve area for kvm hash pagetable
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory.
  */
-void __init kvm_linear_init(void)
+void __init kvm_cma_reserve(void)
 {
-	/* HPT */
-	kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
-
-	/* RMA */
-	/* Only do this on PPC970 in HV mode */
-	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-	    !cpu_has_feature(CPU_FTR_ARCH_201))
-		return;
-
-	if (!kvm_rma_size || !kvm_rma_count)
-		return;
-
-	/* Check that the requested size is one supported in hardware */
-	if (lpcr_rmls(kvm_rma_size) < 0) {
-		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
-		return;
+	unsigned long align_size;
+	struct memblock_region *reg;
+	phys_addr_t selected_size = 0;
+	/*
+	 * We cannot use memblock_phys_mem_size() here, because
+	 * memblock_analyze() has not been called yet.
+	 */
+	for_each_memblock(memory, reg)
+		selected_size += memblock_region_memory_end_pfn(reg) -
+				 memblock_region_memory_base_pfn(reg);
+
+	selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
+	if (selected_size) {
+		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+			 (unsigned long)selected_size / SZ_1M);
+		/*
+		 * Old CPUs require HPT aligned on a multiple of its size. So for them
+		 * make the alignment as max size we could request.
+		 */
+		if (!cpu_has_feature(CPU_FTR_ARCH_206))
+			align_size = __rounddown_pow_of_two(selected_size);
+		else
+			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
+
+		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
+		kvm_cma_declare_contiguous(selected_size, align_size);
 	}
-
-	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
new file mode 100644
index 000000000000..d9d3d8553d51
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.c
@@ -0,0 +1,240 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+#define pr_fmt(fmt) "kvm_cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+#include "book3s_hv_cma.h"
+
+struct kvm_cma {
+	unsigned long	base_pfn;
+	unsigned long	count;
+	unsigned long	*bitmap;
+};
+
+static DEFINE_MUTEX(kvm_cma_mutex);
+static struct kvm_cma kvm_cma_area;
+
+/**
+ * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
+ *			          for kvm hash pagetable
+ * @size:  Size of the reserved memory.
+ * @alignment:  Alignment for the contiguous memory area
+ *
+ * This function reserves memory for kvm cma area. It should be
+ * called by arch code when early allocator (memblock or bootmem)
+ * is still activate.
+ */
+long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
+{
+	long base_pfn;
+	phys_addr_t addr;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+	pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
+
+	if (!size)
+		return -EINVAL;
+	/*
+	 * Sanitise input arguments.
+	 * We should be pageblock aligned for CMA.
+	 */
+	alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
+	size = ALIGN(size, alignment);
+	/*
+	 * Reserve memory
+	 * Use __memblock_alloc_base() since
+	 * memblock_alloc_base() panic()s.
+	 */
+	addr = __memblock_alloc_base(size, alignment, 0);
+	if (!addr) {
+		base_pfn = -ENOMEM;
+		goto err;
+	} else
+		base_pfn = PFN_DOWN(addr);
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	cma->base_pfn = base_pfn;
+	cma->count    = size >> PAGE_SHIFT;
+	pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
+	return 0;
+err:
+	pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+	return base_pfn;
+}
+
+/**
+ * kvm_alloc_cma() - allocate pages from contiguous area
+ * @nr_pages: Requested number of pages.
+ * @align_pages: Requested alignment in number of pages
+ *
+ * This function allocates memory buffer for hash pagetable.
+ */
+struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
+{
+	int ret;
+	struct page *page = NULL;
+	struct kvm_cma *cma = &kvm_cma_area;
+	unsigned long chunk_count, nr_chunk;
+	unsigned long mask, pfn, pageno, start = 0;
+
+
+	if (!cma || !cma->count)
+		return NULL;
+
+	pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
+		 (void *)cma, nr_pages, align_pages);
+
+	if (!nr_pages)
+		return NULL;
+	/*
+	 * align mask with chunk size. The bit tracks pages in chunk size
+	 */
+	VM_BUG_ON(!is_power_of_2(align_pages));
+	mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
+	BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
+
+	chunk_count = cma->count >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
+	mutex_lock(&kvm_cma_mutex);
+	for (;;) {
+		pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
+						    start, nr_chunk, mask);
+		if (pageno >= chunk_count)
+			break;
+
+		pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
+		ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
+		if (ret == 0) {
+			bitmap_set(cma->bitmap, pageno, nr_chunk);
+			page = pfn_to_page(pfn);
+			memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
+			break;
+		} else if (ret != -EBUSY) {
+			break;
+		}
+		pr_debug("%s(): memory range at %p is busy, retrying\n",
+			 __func__, pfn_to_page(pfn));
+		/* try again with a bit different memory target */
+		start = pageno + mask + 1;
+	}
+	mutex_unlock(&kvm_cma_mutex);
+	pr_debug("%s(): returned %p\n", __func__, page);
+	return page;
+}
+
+/**
+ * kvm_release_cma() - release allocated pages for hash pagetable
+ * @pages: Allocated pages.
+ * @nr_pages: Number of allocated pages.
+ *
+ * This function releases memory allocated by kvm_alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
+{
+	unsigned long pfn;
+	unsigned long nr_chunk;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+	if (!cma || !pages)
+		return false;
+
+	pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
+
+	pfn = page_to_pfn(pages);
+
+	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+		return false;
+
+	VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
+	nr_chunk = nr_pages >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
+	mutex_lock(&kvm_cma_mutex);
+	bitmap_clear(cma->bitmap,
+		     (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
+		     nr_chunk);
+	free_contig_range(pfn, nr_pages);
+	mutex_unlock(&kvm_cma_mutex);
+
+	return true;
+}
+
+static int __init kvm_cma_activate_area(unsigned long base_pfn,
+					unsigned long count)
+{
+	unsigned long pfn = base_pfn;
+	unsigned i = count >> pageblock_order;
+	struct zone *zone;
+
+	WARN_ON_ONCE(!pfn_valid(pfn));
+	zone = page_zone(pfn_to_page(pfn));
+	do {
+		unsigned j;
+		base_pfn = pfn;
+		for (j = pageblock_nr_pages; j; --j, pfn++) {
+			WARN_ON_ONCE(!pfn_valid(pfn));
+			/*
+			 * alloc_contig_range requires the pfn range
+			 * specified to be in the same zone. Make this
+			 * simple by forcing the entire CMA resv range
+			 * to be in the same zone.
+			 */
+			if (page_zone(pfn_to_page(pfn)) != zone)
+				return -EINVAL;
+		}
+		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+	} while (--i);
+	return 0;
+}
+
+static int __init kvm_cma_init_reserved_areas(void)
+{
+	int bitmap_size, ret;
+	unsigned long chunk_count;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+	pr_debug("%s()\n", __func__);
+	if (!cma->count)
+		return 0;
+	chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
+	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!cma->bitmap)
+		return -ENOMEM;
+
+	ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
+	if (ret)
+		goto error;
+	return 0;
+
+error:
+	kfree(cma->bitmap);
+	return ret;
+}
+core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
new file mode 100644
index 000000000000..655144f75fa5
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.h
@@ -0,0 +1,27 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+
+#ifndef __POWERPC_KVM_CMA_ALLOC_H__
+#define __POWERPC_KVM_CMA_ALLOC_H__
+/*
+ * Both RMA and Hash page allocation will be multiple of 256K.
+ */
+#define KVM_CMA_CHUNK_ORDER	18
+
+extern struct page *kvm_alloc_cma(unsigned long nr_pages,
+				  unsigned long align_pages);
+extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
+extern long kvm_cma_declare_contiguous(phys_addr_t size,
+				       phys_addr_t alignment) __init;
+#endif
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fc25689a9f35..45e30d6e462b 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -383,6 +383,80 @@ static inline int try_lock_tlbie(unsigned int *lock)
 	return old == 0;
 }
 
+/*
+ * tlbie/tlbiel is a bit different on the PPC970 compared to later
+ * processors such as POWER7; the large page bit is in the instruction
+ * not RB, and the top 16 bits and the bottom 12 bits of the VA
+ * in RB must be 0.
+ */
+static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
+			  long npages, int global, bool need_sync)
+{
+	long i;
+
+	if (global) {
+		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+			cpu_relax();
+		if (need_sync)
+			asm volatile("ptesync" : : : "memory");
+		for (i = 0; i < npages; ++i) {
+			unsigned long rb = rbvalues[i];
+
+			if (rb & 1)		/* large page */
+				asm volatile("tlbie %0,1" : :
+					     "r" (rb & 0x0000fffffffff000ul));
+			else
+				asm volatile("tlbie %0,0" : :
+					     "r" (rb & 0x0000fffffffff000ul));
+		}
+		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+		kvm->arch.tlbie_lock = 0;
+	} else {
+		if (need_sync)
+			asm volatile("ptesync" : : : "memory");
+		for (i = 0; i < npages; ++i) {
+			unsigned long rb = rbvalues[i];
+
+			if (rb & 1)		/* large page */
+				asm volatile("tlbiel %0,1" : :
+					     "r" (rb & 0x0000fffffffff000ul));
+			else
+				asm volatile("tlbiel %0,0" : :
+					     "r" (rb & 0x0000fffffffff000ul));
+		}
+		asm volatile("ptesync" : : : "memory");
+	}
+}
+
+static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
+		      long npages, int global, bool need_sync)
+{
+	long i;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+		/* PPC970 tlbie instruction is a bit different */
+		do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
+		return;
+	}
+	if (global) {
+		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+			cpu_relax();
+		if (need_sync)
+			asm volatile("ptesync" : : : "memory");
+		for (i = 0; i < npages; ++i)
+			asm volatile(PPC_TLBIE(%1,%0) : :
+				     "r" (rbvalues[i]), "r" (kvm->arch.lpid));
+		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+		kvm->arch.tlbie_lock = 0;
+	} else {
+		if (need_sync)
+			asm volatile("ptesync" : : : "memory");
+		for (i = 0; i < npages; ++i)
+			asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
+		asm volatile("ptesync" : : : "memory");
+	}
+}
+
 long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 			unsigned long pte_index, unsigned long avpn,
 			unsigned long *hpret)
@@ -408,19 +482,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 	if (v & HPTE_V_VALID) {
 		hpte[0] &= ~HPTE_V_VALID;
 		rb = compute_tlbie_rb(v, hpte[1], pte_index);
-		if (global_invalidates(kvm, flags)) {
-			while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-				cpu_relax();
-			asm volatile("ptesync" : : : "memory");
-			asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
-				     : : "r" (rb), "r" (kvm->arch.lpid));
-			asm volatile("ptesync" : : : "memory");
-			kvm->arch.tlbie_lock = 0;
-		} else {
-			asm volatile("ptesync" : : : "memory");
-			asm volatile("tlbiel %0" : : "r" (rb));
-			asm volatile("ptesync" : : : "memory");
-		}
+		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 		/* Read PTE low word after tlbie to get final R/C values */
 		remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
 	}
@@ -448,12 +510,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 	unsigned long *hp, *hptes[4], tlbrb[4];
 	long int i, j, k, n, found, indexes[4];
 	unsigned long flags, req, pte_index, rcbits;
-	long int local = 0;
+	int global;
 	long int ret = H_SUCCESS;
 	struct revmap_entry *rev, *revs[4];
 
-	if (atomic_read(&kvm->online_vcpus) == 1)
-		local = 1;
+	global = global_invalidates(kvm, 0);
 	for (i = 0; i < 4 && ret == H_SUCCESS; ) {
 		n = 0;
 		for (; i < 4; ++i) {
@@ -529,22 +590,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 			break;
 
 		/* Now that we've collected a batch, do the tlbies */
-		if (!local) {
-			while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
-				cpu_relax();
-			asm volatile("ptesync" : : : "memory");
-			for (k = 0; k < n; ++k)
-				asm volatile(PPC_TLBIE(%1,%0) : :
-					     "r" (tlbrb[k]),
-					     "r" (kvm->arch.lpid));
-			asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-			kvm->arch.tlbie_lock = 0;
-		} else {
-			asm volatile("ptesync" : : : "memory");
-			for (k = 0; k < n; ++k)
-				asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
-			asm volatile("ptesync" : : : "memory");
-		}
+		do_tlbies(kvm, tlbrb, n, global, true);
 
 		/* Read PTE low words after tlbie to get final R/C values */
 		for (k = 0; k < n; ++k) {
@@ -603,19 +649,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 	if (v & HPTE_V_VALID) {
 		rb = compute_tlbie_rb(v, r, pte_index);
 		hpte[0] = v & ~HPTE_V_VALID;
-		if (global_invalidates(kvm, flags)) {
-			while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
-				cpu_relax();
-			asm volatile("ptesync" : : : "memory");
-			asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
-				     : : "r" (rb), "r" (kvm->arch.lpid));
-			asm volatile("ptesync" : : : "memory");
-			kvm->arch.tlbie_lock = 0;
-		} else {
-			asm volatile("ptesync" : : : "memory");
-			asm volatile("tlbiel %0" : : "r" (rb));
-			asm volatile("ptesync" : : : "memory");
-		}
+		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 		/*
 		 * If the host has this page as readonly but the guest
 		 * wants to make it read/write, reduce the permissions.
@@ -686,13 +720,7 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
 
 	hptep[0] &= ~HPTE_V_VALID;
 	rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
-	while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-		cpu_relax();
-	asm volatile("ptesync" : : : "memory");
-	asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
-		     : : "r" (rb), "r" (kvm->arch.lpid));
-	asm volatile("ptesync" : : : "memory");
-	kvm->arch.tlbie_lock = 0;
+	do_tlbies(kvm, &rb, 1, 1, true);
 }
 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
 
@@ -706,12 +734,7 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
 	rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
 	/* modify only the second-last byte, which contains the ref bit */
 	*((char *)hptep + 14) = rbyte;
-	while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-		cpu_relax();
-	asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
-		     : : "r" (rb), "r" (kvm->arch.lpid));
-	asm volatile("ptesync" : : : "memory");
-	kvm->arch.tlbie_lock = 0;
+	do_tlbies(kvm, &rb, 1, 1, false);
 }
 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b02f91e4c70d..60dce5bfab3f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1381,7 +1381,7 @@ hcall_try_real_mode:
 	cmpldi	r3,hcall_real_table_end - hcall_real_table
 	bge	guest_exit_cont
 	LOAD_REG_ADDR(r4, hcall_real_table)
-	lwzx	r3,r3,r4
+	lwax	r3,r3,r4
 	cmpwi	r3,0
 	beq	guest_exit_cont
 	add	r3,r3,r4
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 48cbbf862958..17cfae5497a3 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -92,6 +92,11 @@ kvm_start_lightweight:
 	PPC_LL	r3, VCPU_HFLAGS(r4)
 	rldicl	r3, r3, 0, 63		/* r3 &= 1 */
 	stb	r3, HSTATE_RESTORE_HID5(r13)
+
+	/* Load up guest SPRG3 value, since it's user readable */
+	ld	r3, VCPU_SHARED(r4)
+	ld	r3, VCPU_SHARED_SPRG3(r3)
+	mtspr	SPRN_SPRG3, r3
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 	PPC_LL	r4, VCPU_SHADOW_MSR(r4)	/* get shadow_msr */
@@ -123,6 +128,15 @@ kvmppc_handler_highmem:
 	/* R7 = vcpu */
 	PPC_LL	r7, GPR4(r1)
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * Reload kernel SPRG3 value.
+	 * No need to save guest value as usermode can't modify SPRG3.
+	 */
+	ld	r3, PACA_SPRG3(r13)
+	mtspr	SPRN_SPRG3, r3
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 	PPC_STL	r14, VCPU_GPR(R14)(r7)
 	PPC_STL	r15, VCPU_GPR(R15)(r7)
 	PPC_STL	r16, VCPU_GPR(R16)(r7)
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 19498a567a81..27db1e665959 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -468,7 +468,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 		 * both the traditional FP registers and the added VSX
 		 * registers into thread.fpr[].
 		 */
-		giveup_fpu(current);
+		if (current->thread.regs->msr & MSR_FP)
+			giveup_fpu(current);
 		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
 			vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
 
@@ -483,7 +484,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 
 #ifdef CONFIG_ALTIVEC
 	if (msr & MSR_VEC) {
-		giveup_altivec(current);
+		if (current->thread.regs->msr & MSR_VEC)
+			giveup_altivec(current);
 		memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
 		vcpu->arch.vscr = t->vscr;
 	}
@@ -575,8 +577,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 	printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
 #endif
 
-	current->thread.regs->msr |= msr;
-
 	if (msr & MSR_FP) {
 		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
 			thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
@@ -598,12 +598,32 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 #endif
 	}
 
+	current->thread.regs->msr |= msr;
 	vcpu->arch.guest_owned_ext |= msr;
 	kvmppc_recalc_shadow_msr(vcpu);
 
 	return RESUME_GUEST;
 }
 
+/*
+ * Kernel code using FP or VMX could have flushed guest state to
+ * the thread_struct; if so, get it back now.
+ */
+static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
+{
+	unsigned long lost_ext;
+
+	lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr;
+	if (!lost_ext)
+		return;
+
+	if (lost_ext & MSR_FP)
+		kvmppc_load_up_fpu();
+	if (lost_ext & MSR_VEC)
+		kvmppc_load_up_altivec();
+	current->thread.regs->msr |= lost_ext;
+}
+
 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        unsigned int exit_nr)
 {
@@ -772,7 +792,7 @@ program_interrupt:
 	}
 	case BOOK3S_INTERRUPT_SYSCALL:
 		if (vcpu->arch.papr_enabled &&
-		    (kvmppc_get_last_inst(vcpu) == 0x44000022) &&
+		    (kvmppc_get_last_sc(vcpu) == 0x44000022) &&
 		    !(vcpu->arch.shared->msr & MSR_PR)) {
 			/* SC 1 papr hypercalls */
 			ulong cmd = kvmppc_get_gpr(vcpu, 3);
@@ -890,8 +910,9 @@ program_interrupt:
 			local_irq_enable();
 			r = s;
 		} else {
-			kvmppc_lazy_ee_enable();
+			kvmppc_fix_ee_before_entry();
 		}
+		kvmppc_handle_lost_ext(vcpu);
 	}
 
 	trace_kvm_book3s_reenter(r, vcpu);
@@ -1047,11 +1068,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto free_shadow_vcpu;
 
+	err = -ENOMEM;
 	p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
-	/* the real shared page fills the last 4k of our page */
-	vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
 	if (!p)
 		goto uninit_vcpu;
+	/* the real shared page fills the last 4k of our page */
+	vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
 
 #ifdef CONFIG_PPC_BOOK3S_64
 	/* default to book3s_64 (970fx) */
@@ -1161,7 +1183,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	if (vcpu->arch.shared->msr & MSR_FP)
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 
-	kvmppc_lazy_ee_enable();
+	kvmppc_fix_ee_before_entry();
 
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 94c1dd46b83d..a3a5cb8ee7ea 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -19,6 +19,7 @@
 #include <asm/hvcall.h>
 #include <asm/xics.h>
 #include <asm/debug.h>
+#include <asm/time.h>
 
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index dcc94f016007..17722d82f1d1 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -674,8 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
-	kvm_guest_enter();
-
 #ifdef CONFIG_PPC_FPU
 	/* Save userspace FPU state in stack */
 	enable_kernel_fp();
@@ -698,7 +696,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	kvmppc_load_guest_fp(vcpu);
 #endif
 
-	kvmppc_lazy_ee_enable();
+	kvmppc_fix_ee_before_entry();
 
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
@@ -1168,7 +1166,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			local_irq_enable();
 			r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
 		} else {
-			kvmppc_lazy_ee_enable();
+			kvmppc_fix_ee_before_entry();
 		}
 	}
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ae63ae4a1a5f..f55e14cd1762 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			kvm_guest_exit();
 			continue;
 		}
-
-		trace_hardirqs_on();
 #endif
 
 		kvm_guest_enter();
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c0efd16bdfa1..c76ff74a98f2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -516,6 +516,11 @@ struct kvm_vcpu_arch {
 
 	/* set at EPT violation at this point */
 	unsigned long exit_qualification;
+
+	/* pv related host specific info */
+	struct {
+		bool pv_unhalted;
+	} pv;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 06fdbd987e97..94dc8ca434e0 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -23,6 +23,7 @@
 #define KVM_FEATURE_ASYNC_PF		4
 #define KVM_FEATURE_STEAL_TIME		5
 #define KVM_FEATURE_PV_EOI		6
+#define KVM_FEATURE_PV_UNHALT		7
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a20ecb5b6cbf..b110fe6c03d4 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -413,7 +413,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
 			     (1 << KVM_FEATURE_ASYNC_PF) |
 			     (1 << KVM_FEATURE_PV_EOI) |
-			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
+			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
+			     (1 << KVM_FEATURE_PV_UNHALT);
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c98f05442325..5439117d5c4c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -689,7 +689,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		break;
 
 	case APIC_DM_REMRD:
-		apic_debug("Ignoring delivery mode 3\n");
+		result = 1;
+		vcpu->arch.pv.pv_unhalted = 1;
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
+		kvm_vcpu_kick(vcpu);
 		break;
 
 	case APIC_DM_SMI:
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9651c9937588..6e2d2c8f230b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2781,7 +2781,7 @@ exit:
 	return ret;
 }
 
-static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code)
+static bool page_fault_can_be_fast(u32 error_code)
 {
 	/*
 	 * Do not fix the mmio spte with invalid generation number which
@@ -2834,7 +2834,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 	bool ret = false;
 	u64 spte = 0ull;
 
-	if (!page_fault_can_be_fast(vcpu, error_code))
+	if (!page_fault_can_be_fast(error_code))
 		return false;
 
 	walk_shadow_page_lockless_begin(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 57b4e129891a..1f1da43ff2a2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5485,6 +5485,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 		err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
 
 		if (err == EMULATE_USER_EXIT) {
+			++vcpu->stat.mmio_exits;
 			ret = 0;
 			goto out;
 		}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 668f19aee6ca..e5ca72a5cdb6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1447,6 +1447,29 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
 #endif
 }
 
+static void kvm_gen_update_masterclock(struct kvm *kvm)
+{
+#ifdef CONFIG_X86_64
+	int i;
+	struct kvm_vcpu *vcpu;
+	struct kvm_arch *ka = &kvm->arch;
+
+	spin_lock(&ka->pvclock_gtod_sync_lock);
+	kvm_make_mclock_inprogress_request(kvm);
+	/* no guest entries from this point */
+	pvclock_update_vm_gtod_copy(kvm);
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+
+	/* guest entries allowed */
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
+
+	spin_unlock(&ka->pvclock_gtod_sync_lock);
+#endif
+}
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
 	unsigned long flags, this_tsc_khz;
@@ -3796,6 +3819,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		delta = user_ns.clock - now_ns;
 		local_irq_enable();
 		kvm->arch.kvmclock_offset = delta;
+		kvm_gen_update_masterclock(kvm);
 		break;
 	}
 	case KVM_GET_CLOCK: {
@@ -5122,9 +5146,10 @@ restart:
 		inject_emulated_exception(vcpu);
 		r = EMULATE_DONE;
 	} else if (vcpu->arch.pio.count) {
-		if (!vcpu->arch.pio.in)
+		if (!vcpu->arch.pio.in) {
+			/* FIXME: return into emulator if single-stepping.  */
 			vcpu->arch.pio.count = 0;
-		else {
+		} else {
 			writeback = false;
 			vcpu->arch.complete_userspace_io = complete_emulated_pio;
 		}
@@ -5588,6 +5613,23 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+/*
+ * kvm_pv_kick_cpu_op:  Kick a vcpu.
+ *
+ * @apicid - apicid of vcpu to be kicked.
+ */
+static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
+{
+	struct kvm_lapic_irq lapic_irq;
+
+	lapic_irq.shorthand = 0;
+	lapic_irq.dest_mode = 0;
+	lapic_irq.dest_id = apicid;
+
+	lapic_irq.delivery_mode = APIC_DM_REMRD;
+	kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
@@ -5621,6 +5663,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	case KVM_HC_VAPIC_POLL_IRQ:
 		ret = 0;
 		break;
+	case KVM_HC_KICK_CPU:
+		kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
+		ret = 0;
+		break;
 	default:
 		ret = -KVM_ENOSYS;
 		break;
@@ -5782,29 +5828,6 @@ static void process_nmi(struct kvm_vcpu *vcpu)
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 
-static void kvm_gen_update_masterclock(struct kvm *kvm)
-{
-#ifdef CONFIG_X86_64
-	int i;
-	struct kvm_vcpu *vcpu;
-	struct kvm_arch *ka = &kvm->arch;
-
-	spin_lock(&ka->pvclock_gtod_sync_lock);
-	kvm_make_mclock_inprogress_request(kvm);
-	/* no guest entries from this point */
-	pvclock_update_vm_gtod_copy(kvm);
-
-	kvm_for_each_vcpu(i, vcpu, kvm)
-		set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
-
-	/* guest entries allowed */
-	kvm_for_each_vcpu(i, vcpu, kvm)
-		clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
-
-	spin_unlock(&ka->pvclock_gtod_sync_lock);
-#endif
-}
-
 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 {
 	u64 eoi_exit_bitmap[4];
@@ -6043,6 +6066,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 				kvm_apic_accept_events(vcpu);
 				switch(vcpu->arch.mp_state) {
 				case KVM_MP_STATE_HALTED:
+					vcpu->arch.pv.pv_unhalted = false;
 					vcpu->arch.mp_state =
 						KVM_MP_STATE_RUNNABLE;
 				case KVM_MP_STATE_RUNNABLE:
@@ -6154,6 +6178,8 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 
 	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
 		vcpu->mmio_needed = 0;
+
+		/* FIXME: return into emulator if single-stepping.  */
 		if (vcpu->mmio_is_write)
 			return 1;
 		vcpu->mmio_read_completed = 1;
@@ -6342,7 +6368,12 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
 	kvm_apic_accept_events(vcpu);
-	mp_state->mp_state = vcpu->arch.mp_state;
+	if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
+					vcpu->arch.pv.pv_unhalted)
+		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+	else
+		mp_state->mp_state = vcpu->arch.mp_state;
+
 	return 0;
 }
 
@@ -6863,6 +6894,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	BUG_ON(vcpu->kvm == NULL);
 	kvm = vcpu->kvm;
 
+	vcpu->arch.pv.pv_unhalted = false;
 	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
 	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -7200,6 +7232,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 		!vcpu->arch.apf.halted)
 		|| !list_empty_careful(&vcpu->async_pf.done)
 		|| kvm_apic_has_events(vcpu)
+		|| vcpu->arch.pv.pv_unhalted
 		|| atomic_read(&vcpu->arch.nmi_queued) ||
 		(kvm_arch_interrupt_allowed(vcpu) &&
 		 kvm_cpu_has_interrupt(vcpu));
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 5daa2599ed48..e373671652b0 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -200,11 +200,9 @@ config DMA_SHARED_BUFFER
 	  APIs extension; the file's descriptor can then be passed on to other
 	  driver.
 
-config CMA
-	bool "Contiguous Memory Allocator"
-	depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK
-	select MIGRATION
-	select MEMORY_ISOLATION
+config DMA_CMA
+	bool "DMA Contiguous Memory Allocator"
+	depends on HAVE_DMA_CONTIGUOUS && CMA
 	help
 	  This enables the Contiguous Memory Allocator which allows drivers
 	  to allocate big physically-contiguous blocks of memory for use with
@@ -213,17 +211,7 @@ config CMA
 	  For more information see <include/linux/dma-contiguous.h>.
 	  If unsure, say "n".
 
-if CMA
-
-config CMA_DEBUG
-	bool "CMA debug messages (DEVELOPMENT)"
-	depends on DEBUG_KERNEL
-	help
-	  Turns on debug messages in CMA.  This produces KERN_DEBUG
-	  messages for every CMA call as well as various messages while
-	  processing calls such as dma_alloc_from_contiguous().
-	  This option does not affect warning and error messages.
-
+if  DMA_CMA
 comment "Default contiguous memory area size:"
 
 config CMA_SIZE_MBYTES
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 48029aa477d9..94e8a80e87f8 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -6,7 +6,7 @@ obj-y			:= core.o bus.o dd.o syscore.o \
 			   attribute_container.o transport_class.o \
 			   topology.o
 obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
-obj-$(CONFIG_CMA) += dma-contiguous.o
+obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 343744e4809c..7e2d15837b02 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -26,7 +26,7 @@
 #include <linux/types.h>
 #include <linux/irqchip/arm-gic.h>
 
-#define VGIC_NR_IRQS		128
+#define VGIC_NR_IRQS		256
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 01b5c84be828..00141d3325fe 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -57,7 +57,7 @@ struct cma;
 struct page;
 struct device;
 
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 
 /*
  * There is always at least global CMA area and a few optional device
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index acccd08be6c7..99c25338ede8 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -667,6 +667,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_RTAS 91
 #define KVM_CAP_IRQ_XICS 92
 #define KVM_CAP_ARM_EL1_32BIT 93
+#define KVM_CAP_SPAPR_MULTITCE 94
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index cea2c5c72d26..2841f86eae0b 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -19,6 +19,7 @@
 #define KVM_HC_MMU_OP			2
 #define KVM_HC_FEATURES			3
 #define KVM_HC_PPC_MAP_MAGIC_PAGE	4
+#define KVM_HC_KICK_CPU			5
 
 /*
  * hypercalls use architecture specific
diff --git a/mm/Kconfig b/mm/Kconfig
index 8028dcc6615c..6cdd27043303 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -478,6 +478,30 @@ config FRONTSWAP
 
 	  If unsure, say Y to enable frontswap.
 
+config CMA
+	bool "Contiguous Memory Allocator"
+	depends on HAVE_MEMBLOCK
+	select MIGRATION
+	select MEMORY_ISOLATION
+	help
+	  This enables the Contiguous Memory Allocator which allows other
+	  subsystems to allocate big physically-contiguous blocks of memory.
+	  CMA reserves a region of memory and allows only movable pages to
+	  be allocated from it. This way, the kernel can use the memory for
+	  pagecache and when a subsystem requests for contiguous area, the
+	  allocated pages are migrated away to serve the contiguous request.
+
+	  If unsure, say "n".
+
+config CMA_DEBUG
+	bool "CMA debug messages (DEVELOPMENT)"
+	depends on DEBUG_KERNEL && CMA
+	help
+	  Turns on debug messages in CMA.  This produces KERN_DEBUG
+	  messages for every CMA call as well as various messages while
+	  processing calls such as dma_alloc_from_contiguous().
+	  This option does not affect warning and error messages.
+
 config ZBUD
 	tristate
 	default n
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 17c5ac7d10ed..685fc72fc751 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -149,7 +149,7 @@ static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
 {
 	offset >>= 2;
 	BUG_ON(offset > (VGIC_NR_IRQS / 4));
-	if (offset < 4)
+	if (offset < 8)
 		return x->percpu[cpuid] + offset;
 	else
 		return x->shared + offset - 8;
@@ -432,19 +432,13 @@ static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
 static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int i, c;
-	unsigned long *bmap;
+	int i;
 	u32 val = 0;
 
 	irq -= VGIC_NR_PRIVATE_IRQS;
 
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
-		for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
-			if (test_bit(irq + i, bmap))
-				val |= 1 << (c + i * 8);
-	}
+	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
+		val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
 
 	return val;
 }
@@ -547,8 +541,12 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
 				struct kvm_exit_mmio *mmio, phys_addr_t offset)
 {
 	u32 val;
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
-				       vcpu->vcpu_id, offset >> 1);
+	u32 *reg;
+
+	offset >>= 1;
+	reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
+				  vcpu->vcpu_id, offset);
+
 	if (offset & 2)
 		val = *reg >> 16;
 	else
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c6c8bbea1748..bf040c4e02b3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,28 +102,8 @@ static bool largepages_enabled = true;
 
 bool kvm_is_mmio_pfn(pfn_t pfn)
 {
-	if (pfn_valid(pfn)) {
-		int reserved;
-		struct page *tail = pfn_to_page(pfn);
-		struct page *head = compound_trans_head(tail);
-		reserved = PageReserved(head);
-		if (head != tail) {
-			/*
-			 * "head" is not a dangling pointer
-			 * (compound_trans_head takes care of that)
-			 * but the hugepage may have been splitted
-			 * from under us (and we may not hold a
-			 * reference count on the head page so it can
-			 * be reused before we run PageReferenced), so
-			 * we've to check PageTail before returning
-			 * what we just read.
-			 */
-			smp_rmb();
-			if (PageTail(tail))
-				return reserved;
-		}
-		return PageReserved(tail);
-	}
+	if (pfn_valid(pfn))
+		return PageReserved(pfn_to_page(pfn));
 
 	return true;
 }
@@ -1896,7 +1876,7 @@ static struct file_operations kvm_vcpu_fops = {
  */
 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 {
-	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
+	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
 }
 
 /*
@@ -2305,7 +2285,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 		return ret;
 	}
 
-	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR);
+	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
 	if (ret < 0) {
 		ops->destroy(dev);
 		return ret;
@@ -2589,7 +2569,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 		return r;
 	}
 #endif
-	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
+	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
 	if (r < 0)
 		kvm_put_kvm(kvm);
 
@@ -2815,8 +2795,8 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 	kfree(bus);
 }
 
-static inline int __kvm_io_bus_sort_cmp(const struct kvm_io_range *r1,
-                                        const struct kvm_io_range *r2)
+static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1,
+                                 const struct kvm_io_range *r2)
 {
 	if (r1->addr < r2->addr)
 		return -1;
@@ -2827,7 +2807,7 @@ static inline int __kvm_io_bus_sort_cmp(const struct kvm_io_range *r1,
 
 static int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
 {
-	return __kvm_io_bus_sort_cmp(p1, p2);
+	return kvm_io_bus_cmp(p1, p2);
 }
 
 static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
@@ -2863,7 +2843,7 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
 
 	off = range - bus->range;
 
-	while (off > 0 && __kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0)
+	while (off > 0 && kvm_io_bus_cmp(&key, &bus->range[off-1]) == 0)
 		off--;
 
 	return off;
@@ -2879,7 +2859,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus,
 		return -EOPNOTSUPP;
 
 	while (idx < bus->dev_count &&
-		__kvm_io_bus_sort_cmp(range, &bus->range[idx]) == 0) {
+		kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
 		if (!kvm_iodevice_write(bus->range[idx].dev, range->addr,
 					range->len, val))
 			return idx;
@@ -2923,7 +2903,7 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 
 	/* First try the device referenced by cookie. */
 	if ((cookie >= 0) && (cookie < bus->dev_count) &&
-	    (__kvm_io_bus_sort_cmp(&range, &bus->range[cookie]) == 0))
+	    (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0))
 		if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len,
 					val))
 			return cookie;
@@ -2945,7 +2925,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
 		return -EOPNOTSUPP;
 
 	while (idx < bus->dev_count &&
-		__kvm_io_bus_sort_cmp(range, &bus->range[idx]) == 0) {
+		kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
 		if (!kvm_iodevice_read(bus->range[idx].dev, range->addr,
 				       range->len, val))
 			return idx;
@@ -2989,7 +2969,7 @@ int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 
 	/* First try the device referenced by cookie. */
 	if ((cookie >= 0) && (cookie < bus->dev_count) &&
-	    (__kvm_io_bus_sort_cmp(&range, &bus->range[cookie]) == 0))
+	    (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0))
 		if (!kvm_iodevice_read(bus->range[cookie].dev, addr, len,
 				       val))
 			return cookie;