25 files changed, 850 insertions, 374 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index eb3abf8ac44e..586b786b3edf 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -7,6 +7,9 @@ obj-$(CONFIG_KVM) += kvm/
 # Xen paravirtualization support
 obj-$(CONFIG_XEN) += xen/
 
+# Hyper-V paravirtualization support
+obj-$(CONFIG_HYPERVISOR_GUEST) += hyperv/
+
 # lguest paravirtualization support
 obj-$(CONFIG_LGUEST_GUEST) += lguest/
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f8fbfc5a98ba..874c1238dffd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -46,6 +46,7 @@ config X86
 	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_DISCARD_MEMBLOCK
 	select ARCH_HAS_ACPI_TABLE_UPGRADE	if ACPI
+	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
@@ -53,7 +54,10 @@ config X86
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_MMIO_FLUSH
 	select ARCH_HAS_PMEM_API		if X86_64
+	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_SG_CHAIN
+	select ARCH_HAS_STRICT_KERNEL_RWX
+	select ARCH_HAS_STRICT_MODULE_RWX
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
@@ -309,9 +313,6 @@ config ARCH_SUPPORTS_UPROBES
 config FIX_EARLYCON_MEM
 	def_bool y
 
-config DEBUG_RODATA
-	def_bool y
-
 config PGTABLE_LEVELS
 	int
 	default 4 if X86_64
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 783099f2ac72..c4cba00dbdee 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -109,17 +109,6 @@ config DEBUG_WX
 
 	  If in doubt, say "Y".
 
-config DEBUG_SET_MODULE_RONX
-	bool "Set loadable kernel module data as NX and text as RO"
-	depends on MODULES
-	---help---
-	  This option helps catch unintended modifications to loadable
-	  kernel module's text and read-only data. It also prevents execution
-	  of module data. Such protection may interfere with run-time code
-	  patching and dynamic kernel tracing - and they might also protect
-	  against certain classes of kernel exploits.
-	  If in doubt, say "N".
-
 config DOUBLEFAULT
 	default y
 	bool "Enable doublefault exception handler" if EXPERT
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
new file mode 100644
index 000000000000..171ae09864d7
--- /dev/null
+++ b/arch/x86/hyperv/Makefile
@@ -0,0 +1 @@
+obj-y		:= hv_init.o
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
new file mode 100644
index 000000000000..db64baf0e500
--- /dev/null
+++ b/arch/x86/hyperv/hv_init.c
@@ -0,0 +1,277 @@
+/*
+ * X86 specific Hyper-V initialization code.
+ *
+ * Copyright (C) 2016, Microsoft, Inc.
+ *
+ * Author : K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ */
+
+#include <linux/types.h>
+#include <asm/hypervisor.h>
+#include <asm/hyperv.h>
+#include <asm/mshyperv.h>
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/clockchips.h>
+
+
+#ifdef CONFIG_X86_64
+
+static struct ms_hyperv_tsc_page *tsc_pg;
+
+static u64 read_hv_clock_tsc(struct clocksource *arg)
+{
+	u64 current_tick;
+
+	if (tsc_pg->tsc_sequence != 0) {
+		/*
+		 * Use the tsc page to compute the value.
+		 */
+
+		while (1) {
+			u64 tmp;
+			u32 sequence = tsc_pg->tsc_sequence;
+			u64 cur_tsc;
+			u64 scale = tsc_pg->tsc_scale;
+			s64 offset = tsc_pg->tsc_offset;
+
+			rdtscll(cur_tsc);
+			/* current_tick = ((cur_tsc *scale) >> 64) + offset */
+			asm("mulq %3"
+				: "=d" (current_tick), "=a" (tmp)
+				: "a" (cur_tsc), "r" (scale));
+
+			current_tick += offset;
+			if (tsc_pg->tsc_sequence == sequence)
+				return current_tick;
+
+			if (tsc_pg->tsc_sequence != 0)
+				continue;
+			/*
+			 * Fallback using MSR method.
+			 */
+			break;
+		}
+	}
+	rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
+	return current_tick;
+}
+
+static struct clocksource hyperv_cs_tsc = {
+		.name		= "hyperv_clocksource_tsc_page",
+		.rating		= 400,
+		.read		= read_hv_clock_tsc,
+		.mask		= CLOCKSOURCE_MASK(64),
+		.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+#endif
+
+static u64 read_hv_clock_msr(struct clocksource *arg)
+{
+	u64 current_tick;
+	/*
+	 * Read the partition counter to get the current tick count. This count
+	 * is set to 0 when the partition is created and is incremented in
+	 * 100 nanosecond units.
+	 */
+	rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
+	return current_tick;
+}
+
+static struct clocksource hyperv_cs_msr = {
+	.name		= "hyperv_clocksource_msr",
+	.rating		= 400,
+	.read		= read_hv_clock_msr,
+	.mask		= CLOCKSOURCE_MASK(64),
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static void *hypercall_pg;
+struct clocksource *hyperv_cs;
+EXPORT_SYMBOL_GPL(hyperv_cs);
+
+/*
+ * This function is to be invoked early in the boot sequence after the
+ * hypervisor has been detected.
+ *
+ * 1. Setup the hypercall page.
+ * 2. Register Hyper-V specific clocksource.
+ */
+void hyperv_init(void)
+{
+	u64 guest_id;
+	union hv_x64_msr_hypercall_contents hypercall_msr;
+
+	if (x86_hyper != &x86_hyper_ms_hyperv)
+		return;
+
+	/*
+	 * Setup the hypercall page and enable hypercalls.
+	 * 1. Register the guest ID
+	 * 2. Enable the hypercall and register the hypercall page
+	 */
+	guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
+	wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
+
+	hypercall_pg  = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
+	if (hypercall_pg == NULL) {
+		wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
+		return;
+	}
+
+	rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+	hypercall_msr.enable = 1;
+	hypercall_msr.guest_physical_address = vmalloc_to_pfn(hypercall_pg);
+	wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+
+	/*
+	 * Register Hyper-V specific clocksource.
+	 */
+#ifdef CONFIG_X86_64
+	if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
+		union hv_x64_msr_hypercall_contents tsc_msr;
+
+		tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+		if (!tsc_pg)
+			goto register_msr_cs;
+
+		hyperv_cs = &hyperv_cs_tsc;
+
+		rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
+
+		tsc_msr.enable = 1;
+		tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
+
+		wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
+		clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
+		return;
+	}
+#endif
+	/*
+	 * For 32 bit guests just use the MSR based mechanism for reading
+	 * the partition counter.
+	 */
+
+register_msr_cs:
+	hyperv_cs = &hyperv_cs_msr;
+	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
+		clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
+}
+
+/*
+ * This routine is called before kexec/kdump, it does the required cleanup.
+ */
+void hyperv_cleanup(void)
+{
+	union hv_x64_msr_hypercall_contents hypercall_msr;
+
+	/* Reset our OS id */
+	wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
+
+	/* Reset the hypercall page */
+	hypercall_msr.as_uint64 = 0;
+	wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+
+	/* Reset the TSC page */
+	hypercall_msr.as_uint64 = 0;
+	wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
+}
+EXPORT_SYMBOL_GPL(hyperv_cleanup);
+
+/*
+ * hv_do_hypercall- Invoke the specified hypercall
+ */
+u64 hv_do_hypercall(u64 control, void *input, void *output)
+{
+	u64 input_address = (input) ? virt_to_phys(input) : 0;
+	u64 output_address = (output) ? virt_to_phys(output) : 0;
+#ifdef CONFIG_X86_64
+	u64 hv_status = 0;
+
+	if (!hypercall_pg)
+		return (u64)ULLONG_MAX;
+
+	__asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
+	__asm__ __volatile__("call *%3" : "=a" (hv_status) :
+			     "c" (control), "d" (input_address),
+			     "m" (hypercall_pg));
+
+	return hv_status;
+
+#else
+
+	u32 control_hi = control >> 32;
+	u32 control_lo = control & 0xFFFFFFFF;
+	u32 hv_status_hi = 1;
+	u32 hv_status_lo = 1;
+	u32 input_address_hi = input_address >> 32;
+	u32 input_address_lo = input_address & 0xFFFFFFFF;
+	u32 output_address_hi = output_address >> 32;
+	u32 output_address_lo = output_address & 0xFFFFFFFF;
+
+	if (!hypercall_pg)
+		return (u64)ULLONG_MAX;
+
+	__asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
+			      "=a"(hv_status_lo) : "d" (control_hi),
+			      "a" (control_lo), "b" (input_address_hi),
+			      "c" (input_address_lo), "D"(output_address_hi),
+			      "S"(output_address_lo), "m" (hypercall_pg));
+
+	return hv_status_lo | ((u64)hv_status_hi << 32);
+#endif /* !x86_64 */
+}
+EXPORT_SYMBOL_GPL(hv_do_hypercall);
+
+void hyperv_report_panic(struct pt_regs *regs)
+{
+	static bool panic_reported;
+
+	/*
+	 * We prefer to report panic on 'die' chain as we have proper
+	 * registers to report, but if we miss it (e.g. on BUG()) we need
+	 * to report it on 'panic'.
+	 */
+	if (panic_reported)
+		return;
+	panic_reported = true;
+
+	wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip);
+	wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax);
+	wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx);
+	wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx);
+	wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx);
+
+	/*
+	 * Let Hyper-V know there is crash data available
+	 */
+	wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY);
+}
+EXPORT_SYMBOL_GPL(hyperv_report_panic);
+
+bool hv_is_hypercall_page_setup(void)
+{
+	union hv_x64_msr_hypercall_contents hypercall_msr;
+
+	/* Check if the hypercall page is setup */
+	hypercall_msr.as_uint64 = 0;
+	rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+
+	if (!hypercall_msr.enable)
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(hv_is_hypercall_page_setup);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index aaf59b7da98a..7c9c895432a9 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -3,8 +3,28 @@
 
 #include <linux/types.h>
 #include <linux/interrupt.h>
+#include <linux/clocksource.h>
 #include <asm/hyperv.h>
 
+/*
+ * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
+ * is set by CPUID(HVCPUID_VERSION_FEATURES).
+ */
+enum hv_cpuid_function {
+	HVCPUID_VERSION_FEATURES		= 0x00000001,
+	HVCPUID_VENDOR_MAXFUNCTION		= 0x40000000,
+	HVCPUID_INTERFACE			= 0x40000001,
+
+	/*
+	 * The remaining functions depend on the value of
+	 * HVCPUID_INTERFACE
+	 */
+	HVCPUID_VERSION				= 0x40000002,
+	HVCPUID_FEATURES			= 0x40000003,
+	HVCPUID_ENLIGHTENMENT_INFO		= 0x40000004,
+	HVCPUID_IMPLEMENTATION_LIMITS		= 0x40000005,
+};
+
 struct ms_hyperv_info {
 	u32 features;
 	u32 misc_features;
@@ -13,6 +33,128 @@ struct ms_hyperv_info {
 
 extern struct ms_hyperv_info ms_hyperv;
 
+/*
+ * Declare the MSR used to setup pages used to communicate with the hypervisor.
+ */
+union hv_x64_msr_hypercall_contents {
+	u64 as_uint64;
+	struct {
+		u64 enable:1;
+		u64 reserved:11;
+		u64 guest_physical_address:52;
+	};
+};
+
+/*
+ * TSC page layout.
+ */
+
+struct ms_hyperv_tsc_page {
+	volatile u32 tsc_sequence;
+	u32 reserved1;
+	volatile u64 tsc_scale;
+	volatile s64 tsc_offset;
+	u64 reserved2[509];
+};
+
+/*
+ * The guest OS needs to register the guest ID with the hypervisor.
+ * The guest ID is a 64 bit entity and the structure of this ID is
+ * specified in the Hyper-V specification:
+ *
+ * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
+ *
+ * While the current guideline does not specify how Linux guest ID(s)
+ * need to be generated, our plan is to publish the guidelines for
+ * Linux and other guest operating systems that currently are hosted
+ * on Hyper-V. The implementation here conforms to this yet
+ * unpublished guidelines.
+ *
+ *
+ * Bit(s)
+ * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
+ * 62:56 - Os Type; Linux is 0x100
+ * 55:48 - Distro specific identification
+ * 47:16 - Linux kernel version number
+ * 15:0  - Distro specific identification
+ *
+ *
+ */
+
+#define HV_LINUX_VENDOR_ID              0x8100
+
+/*
+ * Generate the guest ID based on the guideline described above.
+ */
+
+static inline  __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version,
+				       __u64 d_info2)
+{
+	__u64 guest_id = 0;
+
+	guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48);
+	guest_id |= (d_info1 << 48);
+	guest_id |= (kernel_version << 16);
+	guest_id |= d_info2;
+
+	return guest_id;
+}
+
+
+/* Free the message slot and signal end-of-message if required */
+static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
+{
+	/*
+	 * On crash we're reading some other CPU's message page and we need
+	 * to be careful: this other CPU may already had cleared the header
+	 * and the host may already had delivered some other message there.
+	 * In case we blindly write msg->header.message_type we're going
+	 * to lose it. We can still lose a message of the same type but
+	 * we count on the fact that there can only be one
+	 * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages
+	 * on crash.
+	 */
+	if (cmpxchg(&msg->header.message_type, old_msg_type,
+		    HVMSG_NONE) != old_msg_type)
+		return;
+
+	/*
+	 * Make sure the write to MessageType (ie set to
+	 * HVMSG_NONE) happens before we read the
+	 * MessagePending and EOMing. Otherwise, the EOMing
+	 * will not deliver any more messages since there is
+	 * no empty slot
+	 */
+	mb();
+
+	if (msg->header.message_flags.msg_pending) {
+		/*
+		 * This will cause message queue rescan to
+		 * possibly deliver another msg from the
+		 * hypervisor
+		 */
+		wrmsrl(HV_X64_MSR_EOM, 0);
+	}
+}
+
+#define hv_get_current_tick(tick) rdmsrl(HV_X64_MSR_TIME_REF_COUNT, tick)
+#define hv_init_timer(timer, tick) wrmsrl(timer, tick)
+#define hv_init_timer_config(config, val) wrmsrl(config, val)
+
+#define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val)
+#define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val)
+
+#define hv_get_siefp(val) rdmsrl(HV_X64_MSR_SIEFP, val)
+#define hv_set_siefp(val) wrmsrl(HV_X64_MSR_SIEFP, val)
+
+#define hv_get_synic_state(val) rdmsrl(HV_X64_MSR_SCONTROL, val)
+#define hv_set_synic_state(val) wrmsrl(HV_X64_MSR_SCONTROL, val)
+
+#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index)
+
+#define hv_get_synint_state(int_num, val) rdmsrl(int_num, val)
+#define hv_set_synint_state(int_num, val) wrmsrl(int_num, val)
+
 void hyperv_callback_vector(void);
 #ifdef CONFIG_TRACING
 #define trace_hyperv_callback_vector hyperv_callback_vector
@@ -25,4 +167,13 @@ void hv_setup_kexec_handler(void (*handler)(void));
 void hv_remove_kexec_handler(void);
 void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
 void hv_remove_crash_handler(void);
+
+#if IS_ENABLED(CONFIG_HYPERV)
+extern struct clocksource *hyperv_cs;
+
+void hyperv_init(void);
+void hyperv_report_panic(struct pt_regs *regs);
+bool hv_is_hypercall_page_setup(void);
+void hyperv_cleanup(void);
+#endif
 #endif
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a12a047184ee..f6d20f6cca12 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -472,6 +472,13 @@ HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
 	return _hypercall2(int, xenpmu_op, op, arg);
 }
 
+static inline int
+HYPERVISOR_dm_op(
+	domid_t dom, unsigned int nr_bufs, void *bufs)
+{
+	return _hypercall3(int, dm_op, dom, nr_bufs, bufs);
+}
+
 static inline void
 MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
 {
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 9b1a91834ac8..3a20ccf787b8 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -73,6 +73,9 @@
   */
 #define HV_X64_MSR_STAT_PAGES_AVAILABLE		(1 << 8)
 
+/* Crash MSR available */
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
+
 /*
  * Feature identification: EBX indicates which flags were specified at
  * partition creation. The format is the same as the partition creation
@@ -144,6 +147,11 @@
  */
 #define HV_X64_RELAXED_TIMING_RECOMMENDED	(1 << 5)
 
+/*
+ * Crash notification flag.
+ */
+#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
+
 /* MSR used to identify the guest OS. */
 #define HV_X64_MSR_GUEST_OS_ID			0x40000000
 
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7ff007ed899d..ae32838cac5f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -724,11 +724,12 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 	return 0;
 }
 
-int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
+		 int *pcpu)
 {
 	int cpu;
 
-	cpu = acpi_register_lapic(physid, U32_MAX, ACPI_MADT_ENABLED);
+	cpu = acpi_register_lapic(physid, acpi_id, ACPI_MADT_ENABLED);
 	if (cpu < 0) {
 		pr_info(PREFIX "Unable to map lapic to logical cpu number\n");
 		return cpu;
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index af15f4444330..8233a630280f 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -12,7 +12,6 @@
 #include <linux/sched.h>
 
 #include <acpi/processor.h>
-#include <asm/acpi.h>
 #include <asm/mwait.h>
 #include <asm/special_insns.h>
 
@@ -89,7 +88,8 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
 	retval = 0;
 	/* If the HW does not support any sub-states in this C-state */
 	if (num_cstate_subtype == 0) {
-		pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW (0x%x)\n", cx->address, edx_part);
+		pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW (0x%x)\n",
+				cx->address, edx_part);
 		retval = -1;
 		goto out;
 	}
@@ -104,8 +104,8 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
 	if (!mwait_supported[cstate_type]) {
 		mwait_supported[cstate_type] = 1;
 		printk(KERN_DEBUG
-			"Monitor-Mwait will be used to enter C-%d "
-			"state\n", cx->type);
+			"Monitor-Mwait will be used to enter C-%d state\n",
+			cx->type);
 	}
 	snprintf(cx->desc,
 			ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
@@ -166,6 +166,7 @@ EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter);
 static int __init ffh_cstate_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
+
 	if (c->x86_vendor != X86_VENDOR_INTEL)
 		return -1;
 
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 65e20c97e04b..b5375b9497b3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -133,26 +133,6 @@ static uint32_t  __init ms_hyperv_platform(void)
 	return 0;
 }
 
-static u64 read_hv_clock(struct clocksource *arg)
-{
-	u64 current_tick;
-	/*
-	 * Read the partition counter to get the current tick count. This count
-	 * is set to 0 when the partition is created and is incremented in
-	 * 100 nanosecond units.
-	 */
-	rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
-	return current_tick;
-}
-
-static struct clocksource hyperv_cs = {
-	.name		= "hyperv_clocksource",
-	.rating		= 400, /* use this when running on Hyperv*/
-	.read		= read_hv_clock,
-	.mask		= CLOCKSOURCE_MASK(64),
-	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
 static unsigned char hv_get_nmi_reason(void)
 {
 	return 0;
@@ -180,6 +160,11 @@ static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
 
 static void __init ms_hyperv_init_platform(void)
 {
+	int hv_host_info_eax;
+	int hv_host_info_ebx;
+	int hv_host_info_ecx;
+	int hv_host_info_edx;
+
 	/*
 	 * Extract the features and hints
 	 */
@@ -190,6 +175,21 @@ static void __init ms_hyperv_init_platform(void)
 	pr_info("HyperV: features 0x%x, hints 0x%x\n",
 		ms_hyperv.features, ms_hyperv.hints);
 
+	/*
+	 * Extract host information.
+	 */
+	if (cpuid_eax(HVCPUID_VENDOR_MAXFUNCTION) >= HVCPUID_VERSION) {
+		hv_host_info_eax = cpuid_eax(HVCPUID_VERSION);
+		hv_host_info_ebx = cpuid_ebx(HVCPUID_VERSION);
+		hv_host_info_ecx = cpuid_ecx(HVCPUID_VERSION);
+		hv_host_info_edx = cpuid_edx(HVCPUID_VERSION);
+
+		pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n",
+			hv_host_info_eax, hv_host_info_ebx >> 16,
+			hv_host_info_ebx & 0xFFFF, hv_host_info_ecx,
+			hv_host_info_edx >> 24, hv_host_info_edx & 0xFFFFFF);
+	}
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
 		/*
@@ -208,9 +208,6 @@ static void __init ms_hyperv_init_platform(void)
 			     "hv_nmi_unknown");
 #endif
 
-	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
-		clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
-
 #ifdef CONFIG_X86_IO_APIC
 	no_timer_check = 1;
 #endif
@@ -227,6 +224,13 @@ static void __init ms_hyperv_init_platform(void)
 	 */
 	if (efi_enabled(EFI_BOOT))
 		x86_platform.get_nmi_reason = hv_get_nmi_reason;
+
+#if IS_ENABLED(CONFIG_HYPERV)
+	/*
+	 * Setup the hook to get control post apic initialization.
+	 */
+	x86_platform.apic_post_init = hyperv_init;
+#endif
 }
 
 const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index bb660e53cbd6..32322ce9b405 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1067,13 +1067,13 @@ common_load:
 
 		ilen = prog - temp;
 		if (ilen > BPF_MAX_INSN_SIZE) {
-			pr_err("bpf_jit_compile fatal insn size error\n");
+			pr_err("bpf_jit: fatal insn size error\n");
 			return -EFAULT;
 		}
 
 		if (image) {
 			if (unlikely(proglen + ilen > oldproglen)) {
-				pr_err("bpf_jit_compile fatal error\n");
+				pr_err("bpf_jit: fatal error\n");
 				return -EFAULT;
 			}
 			memcpy(image + proglen, temp, ilen);
@@ -1085,10 +1085,6 @@ common_load:
 	return proglen;
 }
 
-void bpf_jit_compile(struct bpf_prog *prog)
-{
-}
-
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	struct bpf_binary_header *header = NULL;
@@ -1169,7 +1165,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
 	if (image) {
 		bpf_flush_icache(header, image + proglen);
-		set_memory_ro((unsigned long)header, header->pages);
+		bpf_jit_binary_lock_ro(header);
 		prog->bpf_func = (void *)image;
 		prog->jited = 1;
 	} else {
@@ -1184,18 +1180,3 @@ out:
 					   tmp : orig_prog);
 	return prog;
 }
-
-void bpf_jit_free(struct bpf_prog *fp)
-{
-	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
-	struct bpf_binary_header *header = (void *)addr;
-
-	if (!fp->jited)
-		goto free_filter;
-
-	set_memory_rw(addr, header->pages);
-	bpf_jit_binary_free(header);
-
-free_filter:
-	bpf_prog_unlock_free(fp);
-}
diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c
index 1693107a518e..0d17c0aafeb1 100644
--- a/arch/x86/platform/goldfish/goldfish.c
+++ b/arch/x86/platform/goldfish/goldfish.c
@@ -42,10 +42,22 @@ static struct resource goldfish_pdev_bus_resources[] = {
 	}
 };
 
+static bool goldfish_enable __initdata;
+
+static int __init goldfish_setup(char *str)
+{
+	goldfish_enable = true;
+	return 0;
+}
+__setup("goldfish", goldfish_setup);
+
 static int __init goldfish_init(void)
 {
+	if (!goldfish_enable)
+		return -ENODEV;
+
 	platform_device_register_simple("goldfish_pdev_bus", -1,
-						goldfish_pdev_bus_resources, 2);
+					goldfish_pdev_bus_resources, 2);
 	return 0;
 }
 device_initcall(goldfish_init);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index c7b15f3e2cf3..76b6dbd627df 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -53,5 +53,5 @@ config XEN_DEBUG_FS
 
 config XEN_PVH
 	bool "Support for running as a PVH guest"
-	depends on X86_64 && XEN && XEN_PVHVM
+	depends on XEN && XEN_PVHVM && ACPI
 	def_bool n
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index e47e52787d32..cb0164aee156 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
 obj-$(CONFIG_XEN_DOM0)		+= vga.o
 obj-$(CONFIG_SWIOTLB_XEN)	+= pci-swiotlb-xen.o
 obj-$(CONFIG_XEN_EFI)		+= efi.o
+obj-$(CONFIG_XEN_PVH)	 	+= xen-pvh.o
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 44c88ad1841a..bcea81f36fc5 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -145,7 +145,7 @@ static void xen_silent_inquire(int apicid)
 static int xen_cpu_present_to_apicid(int cpu)
 {
 	if (cpu_present(cpu))
-		return xen_get_apic_id(xen_apic_read(APIC_ID));
+		return cpu_data(cpu).apicid;
 	else
 		return BAD_APICID;
 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 51ef95232725..ec1d5c46e58f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -45,6 +45,7 @@
 #include <xen/interface/memory.h>
 #include <xen/interface/nmi.h>
 #include <xen/interface/xen-mca.h>
+#include <xen/interface/hvm/start_info.h>
 #include <xen/features.h>
 #include <xen/page.h>
 #include <xen/hvm.h>
@@ -176,6 +177,20 @@ struct tls_descs {
  */
 static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
 
+#ifdef CONFIG_XEN_PVH
+/*
+ * PVH variables.
+ *
+ * xen_pvh and pvh_bootparams need to live in data segment since they
+ * are used after startup_{32|64}, which clear .bss, are invoked.
+ */
+bool xen_pvh __attribute__((section(".data"))) = 0;
+struct boot_params pvh_bootparams __attribute__((section(".data")));
+
+struct hvm_start_info pvh_start_info;
+unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
+#endif
+
 static void clamp_max_cpus(void)
 {
 #ifdef CONFIG_SMP
@@ -1138,10 +1153,11 @@ void xen_setup_vcpu_info_placement(void)
 		xen_vcpu_setup(cpu);
 	}
 
-	/* xen_vcpu_setup managed to place the vcpu_info within the
-	 * percpu area for all cpus, so make use of it. Note that for
-	 * PVH we want to use native IRQ mechanism. */
-	if (have_vcpu_info_placement && !xen_pvh_domain()) {
+	/*
+	 * xen_vcpu_setup managed to place the vcpu_info within the
+	 * percpu area for all cpus, so make use of it.
+	 */
+	if (have_vcpu_info_placement) {
 		pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
 		pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
 		pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
@@ -1413,49 +1429,9 @@ static void __init xen_boot_params_init_edd(void)
  * Set up the GDT and segment registers for -fstack-protector.  Until
  * we do this, we have to be careful not to call any stack-protected
  * function, which is most of the kernel.
- *
- * Note, that it is __ref because the only caller of this after init
- * is PVH which is not going to use xen_load_gdt_boot or other
- * __init functions.
  */
-static void __ref xen_setup_gdt(int cpu)
+static void xen_setup_gdt(int cpu)
 {
-	if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef CONFIG_X86_64
-		unsigned long dummy;
-
-		load_percpu_segment(cpu); /* We need to access per-cpu area */
-		switch_to_new_gdt(cpu); /* GDT and GS set */
-
-		/* We are switching of the Xen provided GDT to our HVM mode
-		 * GDT. The new GDT has  __KERNEL_CS with CS.L = 1
-		 * and we are jumping to reload it.
-		 */
-		asm volatile ("pushq %0\n"
-			      "leaq 1f(%%rip),%0\n"
-			      "pushq %0\n"
-			      "lretq\n"
-			      "1:\n"
-			      : "=&r" (dummy) : "0" (__KERNEL_CS));
-
-		/*
-		 * While not needed, we also set the %es, %ds, and %fs
-		 * to zero. We don't care about %ss as it is NULL.
-		 * Strictly speaking this is not needed as Xen zeros those
-		 * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE)
-		 *
-		 * Linux zeros them in cpu_init() and in secondary_startup_64
-		 * (for BSP).
-		 */
-		loadsegment(es, 0);
-		loadsegment(ds, 0);
-		loadsegment(fs, 0);
-#else
-		/* PVH: TODO Implement. */
-		BUG();
-#endif
-		return; /* PVH does not need any PV GDT ops. */
-	}
 	pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
 	pv_cpu_ops.load_gdt = xen_load_gdt_boot;
 
@@ -1466,59 +1442,6 @@ static void __ref xen_setup_gdt(int cpu)
 	pv_cpu_ops.load_gdt = xen_load_gdt;
 }
 
-#ifdef CONFIG_XEN_PVH
-/*
- * A PV guest starts with default flags that are not set for PVH, set them
- * here asap.
- */
-static void xen_pvh_set_cr_flags(int cpu)
-{
-
-	/* Some of these are setup in 'secondary_startup_64'. The others:
-	 * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests
-	 * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */
-	write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM);
-
-	if (!cpu)
-		return;
-	/*
-	 * For BSP, PSE PGE are set in probe_page_size_mask(), for APs
-	 * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu().
-	*/
-	if (boot_cpu_has(X86_FEATURE_PSE))
-		cr4_set_bits_and_update_boot(X86_CR4_PSE);
-
-	if (boot_cpu_has(X86_FEATURE_PGE))
-		cr4_set_bits_and_update_boot(X86_CR4_PGE);
-}
-
-/*
- * Note, that it is ref - because the only caller of this after init
- * is PVH which is not going to use xen_load_gdt_boot or other
- * __init functions.
- */
-void __ref xen_pvh_secondary_vcpu_init(int cpu)
-{
-	xen_setup_gdt(cpu);
-	xen_pvh_set_cr_flags(cpu);
-}
-
-static void __init xen_pvh_early_guest_init(void)
-{
-	if (!xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
-	BUG_ON(!xen_feature(XENFEAT_hvm_callback_vector));
-
-	xen_pvh_early_cpu_init(0, false);
-	xen_pvh_set_cr_flags(0);
-
-#ifdef CONFIG_X86_32
-	BUG(); /* PVH: Implement proper support. */
-#endif
-}
-#endif    /* CONFIG_XEN_PVH */
-
 static void __init xen_dom0_set_legacy_features(void)
 {
 	x86_platform.legacy.rtc = 1;
@@ -1555,24 +1478,17 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	xen_domain_type = XEN_PV_DOMAIN;
 
 	xen_setup_features();
-#ifdef CONFIG_XEN_PVH
-	xen_pvh_early_guest_init();
-#endif
+
 	xen_setup_machphys_mapping();
 
 	/* Install Xen paravirt ops */
 	pv_info = xen_info;
 	pv_init_ops = xen_init_ops;
-	if (!xen_pvh_domain()) {
-		pv_cpu_ops = xen_cpu_ops;
+	pv_cpu_ops = xen_cpu_ops;
 
-		x86_platform.get_nmi_reason = xen_get_nmi_reason;
-	}
+	x86_platform.get_nmi_reason = xen_get_nmi_reason;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		x86_init.resources.memory_setup = xen_auto_xlated_memory_setup;
-	else
-		x86_init.resources.memory_setup = xen_memory_setup;
+	x86_init.resources.memory_setup = xen_memory_setup;
 	x86_init.oem.arch_setup = xen_arch_setup;
 	x86_init.oem.banner = xen_banner;
 
@@ -1665,18 +1581,15 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	/* set the limit of our address space */
 	xen_reserve_top();
 
-	/* PVH: runs at default kernel iopl of 0 */
-	if (!xen_pvh_domain()) {
-		/*
-		 * We used to do this in xen_arch_setup, but that is too late
-		 * on AMD were early_cpu_init (run before ->arch_setup()) calls
-		 * early_amd_init which pokes 0xcf8 port.
-		 */
-		set_iopl.iopl = 1;
-		rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
-		if (rc != 0)
-			xen_raw_printk("physdev_op failed %d\n", rc);
-	}
+	/*
+	 * We used to do this in xen_arch_setup, but that is too late
+	 * on AMD were early_cpu_init (run before ->arch_setup()) calls
+	 * early_amd_init which pokes 0xcf8 port.
+	 */
+	set_iopl.iopl = 1;
+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
+	if (rc != 0)
+		xen_raw_printk("physdev_op failed %d\n", rc);
 
 #ifdef CONFIG_X86_32
 	/* set up basic CPUID stuff */
@@ -1758,6 +1671,102 @@ asmlinkage __visible void __init xen_start_kernel(void)
 #endif
 }
 
+#ifdef CONFIG_XEN_PVH
+
+static void xen_pvh_arch_setup(void)
+{
+#ifdef CONFIG_ACPI
+	/* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */
+	if (nr_ioapics == 0)
+		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
+#endif
+}
+
+static void __init init_pvh_bootparams(void)
+{
+	struct xen_memory_map memmap;
+	unsigned int i;
+	int rc;
+
+	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
+
+	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map);
+	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map);
+	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+	if (rc) {
+		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
+		BUG();
+	}
+
+	if (memmap.nr_entries < E820MAX - 1) {
+		pvh_bootparams.e820_map[memmap.nr_entries].addr =
+			ISA_START_ADDRESS;
+		pvh_bootparams.e820_map[memmap.nr_entries].size =
+			ISA_END_ADDRESS - ISA_START_ADDRESS;
+		pvh_bootparams.e820_map[memmap.nr_entries].type =
+			E820_RESERVED;
+		memmap.nr_entries++;
+	} else
+		xen_raw_printk("Warning: Can fit ISA range into e820\n");
+
+	sanitize_e820_map(pvh_bootparams.e820_map,
+			  ARRAY_SIZE(pvh_bootparams.e820_map),
+			  &memmap.nr_entries);
+
+	pvh_bootparams.e820_entries = memmap.nr_entries;
+	for (i = 0; i < pvh_bootparams.e820_entries; i++)
+		e820_add_region(pvh_bootparams.e820_map[i].addr,
+				pvh_bootparams.e820_map[i].size,
+				pvh_bootparams.e820_map[i].type);
+
+	pvh_bootparams.hdr.cmd_line_ptr =
+		pvh_start_info.cmdline_paddr;
+
+	/* The first module is always ramdisk. */
+	if (pvh_start_info.nr_modules) {
+		struct hvm_modlist_entry *modaddr =
+			__va(pvh_start_info.modlist_paddr);
+		pvh_bootparams.hdr.ramdisk_image = modaddr->paddr;
+		pvh_bootparams.hdr.ramdisk_size = modaddr->size;
+	}
+
+	/*
+	 * See Documentation/x86/boot.txt.
+	 *
+	 * Version 2.12 supports Xen entry point but we will use default x86/PC
+	 * environment (i.e. hardware_subarch 0).
+	 */
+	pvh_bootparams.hdr.version = 0x212;
+	pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
+}
+
+/*
+ * This routine (and those that it might call) should not use
+ * anything that lives in .bss since that segment will be cleared later.
+ */
+void __init xen_prepare_pvh(void)
+{
+	u32 msr;
+	u64 pfn;
+
+	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
+		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
+				pvh_start_info.magic);
+		BUG();
+	}
+
+	xen_pvh = 1;
+
+	msr = cpuid_ebx(xen_cpuid_base() + 2);
+	pfn = __pa(hypercall_page);
+	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+
+	init_pvh_bootparams();
+
+	x86_init.oem.arch_setup = xen_pvh_arch_setup;
+}
+#endif
+
 void __ref xen_hvm_init_shared_info(void)
 {
 	int cpu;
@@ -1797,20 +1806,29 @@ void __ref xen_hvm_init_shared_info(void)
 static void __init init_hvm_pv_info(void)
 {
 	int major, minor;
-	uint32_t eax, ebx, ecx, edx, pages, msr, base;
-	u64 pfn;
+	uint32_t eax, ebx, ecx, edx, base;
 
 	base = xen_cpuid_base();
-	cpuid(base + 1, &eax, &ebx, &ecx, &edx);
+	eax = cpuid_eax(base + 1);
 
 	major = eax >> 16;
 	minor = eax & 0xffff;
 	printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
 
-	cpuid(base + 2, &pages, &msr, &ecx, &edx);
+	xen_domain_type = XEN_HVM_DOMAIN;
 
-	pfn = __pa(hypercall_page);
-	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+	/* PVH set up hypercall page in xen_prepare_pvh(). */
+	if (xen_pvh_domain())
+		pv_info.name = "Xen PVH";
+	else {
+		u64 pfn;
+		uint32_t msr;
+
+		pv_info.name = "Xen HVM";
+		msr = cpuid_ebx(base + 2);
+		pfn = __pa(hypercall_page);
+		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+	}
 
 	xen_setup_features();
 
@@ -1819,10 +1837,6 @@ static void __init init_hvm_pv_info(void)
 		this_cpu_write(xen_vcpu_id, ebx);
 	else
 		this_cpu_write(xen_vcpu_id, smp_processor_id());
-
-	pv_info.name = "Xen HVM";
-
-	xen_domain_type = XEN_HVM_DOMAIN;
 }
 #endif
 
@@ -1910,6 +1924,9 @@ static void __init xen_hvm_guest_init(void)
 	x86_init.irqs.intr_init = xen_init_IRQ;
 	xen_hvm_init_time_ops();
 	xen_hvm_init_mmu_ops();
+
+	if (xen_pvh_domain())
+		machine_ops.emergency_restart = xen_emergency_restart;
 #ifdef CONFIG_KEXEC_CORE
 	machine_ops.shutdown = xen_hvm_shutdown;
 	machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 7d5afdb417cc..f6740b5b1738 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1792,10 +1792,6 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot,
 	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
 	pte_t pte = pfn_pte(pfn, prot);
 
-	/* For PVH no need to set R/O or R/W to pin them or unpin them. */
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
 		BUG();
 }
@@ -1902,8 +1898,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
  * level2_ident_pgt, and level2_kernel_pgt.  This means that only the
  * kernel has a physical mapping to start with - but that's enough to
  * get __va working.  We need to fill in the rest of the physical
- * mapping once some sort of allocator has been set up.  NOTE: for
- * PVH, the page tables are native.
+ * mapping once some sort of allocator has been set up.
  */
 void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
@@ -2812,16 +2807,6 @@ static int do_remap_gfn(struct vm_area_struct *vma,
 
 	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
 
-	if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef CONFIG_XEN_PVH
-		/* We need to update the local page tables and the xen HAP */
-		return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
-						 prot, domid, pages);
-#else
-		return -EINVAL;
-#endif
-        }
-
 	rmd.mfn = gfn;
 	rmd.prot = prot;
 	/* We use the err_ptr to indicate if there we are doing a contiguous
@@ -2915,10 +2900,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
 	if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
 		return 0;
 
-#ifdef CONFIG_XEN_PVH
-	return xen_xlate_unmap_gfn_range(vma, numpgs, pages);
-#else
 	return -EINVAL;
-#endif
 }
 EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 90d1b83cf35f..33a783c77d96 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -73,8 +73,8 @@ bool xen_has_pv_devices(void)
 	if (!xen_domain())
 		return false;
 
-	/* PV domains always have them. */
-	if (xen_pv_domain())
+	/* PV and PVH domains always have them. */
+	if (xen_pv_domain() || xen_pvh_domain())
 		return true;
 
 	/* And user has xen_platform_pci=0 set in guest config as
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index f3f7b41116f7..a8c306cf8868 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -915,39 +915,6 @@ char * __init xen_memory_setup(void)
 }
 
 /*
- * Machine specific memory setup for auto-translated guests.
- */
-char * __init xen_auto_xlated_memory_setup(void)
-{
-	struct xen_memory_map memmap;
-	int i;
-	int rc;
-
-	memmap.nr_entries = ARRAY_SIZE(xen_e820_map);
-	set_xen_guest_handle(memmap.buffer, xen_e820_map);
-
-	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
-	if (rc < 0)
-		panic("No memory map (%d)\n", rc);
-
-	xen_e820_map_entries = memmap.nr_entries;
-
-	sanitize_e820_map(xen_e820_map, ARRAY_SIZE(xen_e820_map),
-			  &xen_e820_map_entries);
-
-	for (i = 0; i < xen_e820_map_entries; i++)
-		e820_add_region(xen_e820_map[i].addr, xen_e820_map[i].size,
-				xen_e820_map[i].type);
-
-	/* Remove p2m info, it is not needed. */
-	xen_start_info->mfn_list = 0;
-	xen_start_info->first_p2m_pfn = 0;
-	xen_start_info->nr_p2m_frames = 0;
-
-	return "Xen";
-}
-
-/*
  * Set the bit indicating "nosegneg" library variants should be used.
  * We only need to bother in pure 32-bit mode; compat 32-bit processes
  * can have un-truncated segments, so wrapping around is allowed.
@@ -1032,8 +999,8 @@ void __init xen_pvmmu_arch_setup(void)
 void __init xen_arch_setup(void)
 {
 	xen_panic_handler_init();
-	if (!xen_feature(XENFEAT_auto_translated_physmap))
-		xen_pvmmu_arch_setup();
+
+	xen_pvmmu_arch_setup();
 
 #ifdef CONFIG_ACPI
 	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 311acad7dad2..0dee6f59ea82 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -99,18 +99,8 @@ static void cpu_bringup(void)
 	local_irq_enable();
 }
 
-/*
- * Note: cpu parameter is only relevant for PVH. The reason for passing it
- * is we can't do smp_processor_id until the percpu segments are loaded, for
- * which we need the cpu number! So we pass it in rdi as first parameter.
- */
-asmlinkage __visible void cpu_bringup_and_idle(int cpu)
+asmlinkage __visible void cpu_bringup_and_idle(void)
 {
-#ifdef CONFIG_XEN_PVH
-	if (xen_feature(XENFEAT_auto_translated_physmap) &&
-	    xen_feature(XENFEAT_supervisor_mode_kernel))
-		xen_pvh_secondary_vcpu_init(cpu);
-#endif
 	cpu_bringup();
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
@@ -404,61 +394,47 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	gdt = get_cpu_gdt_table(cpu);
 
 #ifdef CONFIG_X86_32
-	/* Note: PVH is not yet supported on x86_32. */
 	ctxt->user_regs.fs = __KERNEL_PERCPU;
 	ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
 #endif
 	memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
 
-	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
-		ctxt->flags = VGCF_IN_KERNEL;
-		ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
-		ctxt->user_regs.ds = __USER_DS;
-		ctxt->user_regs.es = __USER_DS;
-		ctxt->user_regs.ss = __KERNEL_DS;
+	ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
+	ctxt->flags = VGCF_IN_KERNEL;
+	ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
+	ctxt->user_regs.ds = __USER_DS;
+	ctxt->user_regs.es = __USER_DS;
+	ctxt->user_regs.ss = __KERNEL_DS;
 
-		xen_copy_trap_info(ctxt->trap_ctxt);
+	xen_copy_trap_info(ctxt->trap_ctxt);
 
-		ctxt->ldt_ents = 0;
+	ctxt->ldt_ents = 0;
 
-		BUG_ON((unsigned long)gdt & ~PAGE_MASK);
+	BUG_ON((unsigned long)gdt & ~PAGE_MASK);
 
-		gdt_mfn = arbitrary_virt_to_mfn(gdt);
-		make_lowmem_page_readonly(gdt);
-		make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
+	gdt_mfn = arbitrary_virt_to_mfn(gdt);
+	make_lowmem_page_readonly(gdt);
+	make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
 
-		ctxt->gdt_frames[0] = gdt_mfn;
-		ctxt->gdt_ents      = GDT_ENTRIES;
+	ctxt->gdt_frames[0] = gdt_mfn;
+	ctxt->gdt_ents      = GDT_ENTRIES;
 
-		ctxt->kernel_ss = __KERNEL_DS;
-		ctxt->kernel_sp = idle->thread.sp0;
+	ctxt->kernel_ss = __KERNEL_DS;
+	ctxt->kernel_sp = idle->thread.sp0;
 
 #ifdef CONFIG_X86_32
-		ctxt->event_callback_cs     = __KERNEL_CS;
-		ctxt->failsafe_callback_cs  = __KERNEL_CS;
+	ctxt->event_callback_cs     = __KERNEL_CS;
+	ctxt->failsafe_callback_cs  = __KERNEL_CS;
 #else
-		ctxt->gs_base_kernel = per_cpu_offset(cpu);
-#endif
-		ctxt->event_callback_eip    =
-					(unsigned long)xen_hypervisor_callback;
-		ctxt->failsafe_callback_eip =
-					(unsigned long)xen_failsafe_callback;
-		ctxt->user_regs.cs = __KERNEL_CS;
-		per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
-	}
-#ifdef CONFIG_XEN_PVH
-	else {
-		/*
-		 * The vcpu comes on kernel page tables which have the NX pte
-		 * bit set. This means before DS/SS is touched, NX in
-		 * EFER must be set. Hence the following assembly glue code.
-		 */
-		ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
-		ctxt->user_regs.rdi = cpu;
-		ctxt->user_regs.rsi = true;  /* entry == true */
-	}
+	ctxt->gs_base_kernel = per_cpu_offset(cpu);
 #endif
+	ctxt->event_callback_eip    =
+		(unsigned long)xen_hypervisor_callback;
+	ctxt->failsafe_callback_eip =
+		(unsigned long)xen_failsafe_callback;
+	ctxt->user_regs.cs = __KERNEL_CS;
+	per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
+
 	ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
 	ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
 	if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index c5c16dc4f694..9beef333584a 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -21,12 +21,4 @@ static inline int xen_smp_intr_init(unsigned int cpu)
 static inline void xen_smp_intr_free(unsigned int cpu) {}
 #endif /* CONFIG_SMP */
 
-#ifdef CONFIG_XEN_PVH
-extern void xen_pvh_early_cpu_init(int cpu, bool entry);
-#else
-static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
-{
-}
-#endif
-
 #endif
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 7f8d8abf4c1a..37794e42b67d 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -16,25 +16,6 @@
 #include <xen/interface/xen-mca.h>
 #include <asm/xen/interface.h>
 
-#ifdef CONFIG_XEN_PVH
-#define PVH_FEATURES_STR  "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel"
-/* Note the lack of 'hvm_callback_vector'. Older hypervisor will
- * balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in
- * XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore.
- */
-#define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \
-		      (1 << XENFEAT_auto_translated_physmap) | \
-		      (1 << XENFEAT_supervisor_mode_kernel) | \
-		      (1 << XENFEAT_hvm_callback_vector))
-/* The XENFEAT_writable_page_tables is not stricly necessary as we set that
- * up regardless whether this CONFIG option is enabled or not, but it
- * clarifies what the right flags need to be.
- */
-#else
-#define PVH_FEATURES_STR  ""
-#define PVH_FEATURES (0)
-#endif
-
 	__INIT
 ENTRY(startup_xen)
 	cld
@@ -54,41 +35,6 @@ ENTRY(startup_xen)
 
 	__FINIT
 
-#ifdef CONFIG_XEN_PVH
-/*
- * xen_pvh_early_cpu_init() - early PVH VCPU initialization
- * @cpu:   this cpu number (%rdi)
- * @entry: true if this is a secondary vcpu coming up on this entry
- *         point, false if this is the boot CPU being initialized for
- *         the first time (%rsi)
- *
- * Note: This is called as a function on the boot CPU, and is the entry point
- *       on the secondary CPU.
- */
-ENTRY(xen_pvh_early_cpu_init)
-	mov     %rsi, %r11
-
-	/* Gather features to see if NX implemented. */
-	mov     $0x80000001, %eax
-	cpuid
-	mov     %edx, %esi
-
-	mov     $MSR_EFER, %ecx
-	rdmsr
-	bts     $_EFER_SCE, %eax
-
-	bt      $20, %esi
-	jnc     1f      	/* No NX, skip setting it */
-	bts     $_EFER_NX, %eax
-1:	wrmsr
-#ifdef CONFIG_SMP
-	cmp     $0, %r11b
-	jne     cpu_bringup_and_idle
-#endif
-	ret
-
-#endif /* CONFIG_XEN_PVH */
-
 .pushsection .text
 	.balign PAGE_SIZE
 ENTRY(hypercall_page)
@@ -114,10 +60,10 @@ ENTRY(hypercall_page)
 #endif
 	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          _ASM_PTR startup_xen)
 	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
-	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR)
-	ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) |
-						(1 << XENFEAT_writable_page_tables) |
-						(1 << XENFEAT_dom0))
+	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,
+		.ascii "!writable_page_tables|pae_pgdir_above_4gb")
+	ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
+		.long (1 << XENFEAT_writable_page_tables) | (1 << XENFEAT_dom0))
 	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
 	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
 	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index ac0a2b0f9e62..f6a41c41ebc7 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -146,5 +146,4 @@ __visible void xen_adjust_exception_frame(void);
 
 extern int xen_panic_handler_init(void);
 
-void xen_pvh_secondary_vcpu_init(int cpu);
 #endif /* XEN_OPS_H */
diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
new file mode 100644
index 000000000000..5e246716d58f
--- /dev/null
+++ b/arch/x86/xen/xen-pvh.S
@@ -0,0 +1,161 @@
+/*
+ * Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+	.code32
+	.text
+#define _pa(x)          ((x) - __START_KERNEL_map)
+
+#include <linux/elfnote.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/asm.h>
+#include <asm/boot.h>
+#include <asm/processor-flags.h>
+#include <asm/msr.h>
+#include <xen/interface/elfnote.h>
+
+	__HEAD
+
+/*
+ * Entry point for PVH guests.
+ *
+ * Xen ABI specifies the following register state when we come here:
+ *
+ * - `ebx`: contains the physical memory address where the loader has placed
+ *          the boot start info structure.
+ * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared.
+ * - `cr4`: all bits are cleared.
+ * - `cs `: must be a 32-bit read/execute code segment with a base of ‘0’
+ *          and a limit of ‘0xFFFFFFFF’. The selector value is unspecified.
+ * - `ds`, `es`: must be a 32-bit read/write data segment with a base of
+ *               ‘0’ and a limit of ‘0xFFFFFFFF’. The selector values are all
+ *               unspecified.
+ * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit
+ *         of '0x67'.
+ * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared.
+ *             Bit 8 (TF) must be cleared. Other bits are all unspecified.
+ *
+ * All other processor registers and flag bits are unspecified. The OS is in
+ * charge of setting up it's own stack, GDT and IDT.
+ */
+
+ENTRY(pvh_start_xen)
+	cld
+
+	lgdt (_pa(gdt))
+
+	mov $(__BOOT_DS),%eax
+	mov %eax,%ds
+	mov %eax,%es
+	mov %eax,%ss
+
+	/* Stash hvm_start_info. */
+	mov $_pa(pvh_start_info), %edi
+	mov %ebx, %esi
+	mov _pa(pvh_start_info_sz), %ecx
+	shr $2,%ecx
+	rep
+	movsl
+
+	mov $_pa(early_stack_end), %esp
+
+	/* Enable PAE mode. */
+	mov %cr4, %eax
+	orl $X86_CR4_PAE, %eax
+	mov %eax, %cr4
+
+#ifdef CONFIG_X86_64
+	/* Enable Long mode. */
+	mov $MSR_EFER, %ecx
+	rdmsr
+	btsl $_EFER_LME, %eax
+	wrmsr
+
+	/* Enable pre-constructed page tables. */
+	mov $_pa(init_level4_pgt), %eax
+	mov %eax, %cr3
+	mov $(X86_CR0_PG | X86_CR0_PE), %eax
+	mov %eax, %cr0
+
+	/* Jump to 64-bit mode. */
+	ljmp $__KERNEL_CS, $_pa(1f)
+
+	/* 64-bit entry point. */
+	.code64
+1:
+	call xen_prepare_pvh
+
+	/* startup_64 expects boot_params in %rsi. */
+	mov $_pa(pvh_bootparams), %rsi
+	mov $_pa(startup_64), %rax
+	jmp *%rax
+
+#else /* CONFIG_X86_64 */
+
+	call mk_early_pgtbl_32
+
+	mov $_pa(initial_page_table), %eax
+	mov %eax, %cr3
+
+	mov %cr0, %eax
+	or $(X86_CR0_PG | X86_CR0_PE), %eax
+	mov %eax, %cr0
+
+	ljmp $__BOOT_CS, $1f
+1:
+	call xen_prepare_pvh
+	mov $_pa(pvh_bootparams), %esi
+
+	/* startup_32 doesn't expect paging and PAE to be on. */
+	ljmp $__BOOT_CS, $_pa(2f)
+2:
+	mov %cr0, %eax
+	and $~X86_CR0_PG, %eax
+	mov %eax, %cr0
+	mov %cr4, %eax
+	and $~X86_CR4_PAE, %eax
+	mov %eax, %cr4
+
+	ljmp $__BOOT_CS, $_pa(startup_32)
+#endif
+END(pvh_start_xen)
+
+	.section ".init.data","aw"
+	.balign 8
+gdt:
+	.word gdt_end - gdt_start
+	.long _pa(gdt_start)
+	.word 0
+gdt_start:
+	.quad 0x0000000000000000            /* NULL descriptor */
+	.quad 0x0000000000000000            /* reserved */
+#ifdef CONFIG_X86_64
+	.quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* __KERNEL_CS */
+#else
+	.quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* __KERNEL_CS */
+#endif
+	.quad GDT_ENTRY(0xc092, 0, 0xfffff) /* __KERNEL_DS */
+gdt_end:
+
+	.balign 4
+early_stack:
+	.fill 256, 1, 0
+early_stack_end:
+
+	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
+	             _ASM_PTR (pvh_start_xen - __START_KERNEL_map))