aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/futex.h9
-rw-r--r--arch/arm64/kvm/guest.c7
-rw-r--r--arch/arm64/kvm/hyp/entry.S23
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S1
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c17
-rw-r--r--arch/arm64/mm/hugetlbpage.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c1
-rw-r--r--arch/riscv/include/asm/csr.h3
-rw-r--r--arch/riscv/include/asm/hwcap.h22
-rw-r--r--arch/riscv/include/asm/set_memory.h8
-rw-r--r--arch/riscv/kernel/cpu_ops.c4
-rw-r--r--arch/riscv/kernel/cpufeature.c83
-rw-r--r--arch/riscv/kernel/smp.c2
-rw-r--r--arch/riscv/kernel/vdso/Makefile2
-rw-r--r--arch/riscv/kernel/vdso/note.S12
-rw-r--r--arch/riscv/mm/init.c19
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/kvm/priv.c4
-rw-r--r--arch/sparc/include/asm/page_32.h12
-rw-r--r--arch/sparc/include/asm/pgalloc_32.h11
-rw-r--r--arch/sparc/include/asm/pgtable_32.h40
-rw-r--r--arch/sparc/include/asm/pgtsrmmu.h36
-rw-r--r--arch/sparc/include/asm/viking.h5
-rw-r--r--arch/sparc/kernel/head_32.S8
-rw-r--r--arch/sparc/kernel/ptrace_32.c233
-rw-r--r--arch/sparc/kernel/ptrace_64.c17
-rw-r--r--arch/sparc/kernel/sys_sparc32.c1
-rw-r--r--arch/sparc/mm/hypersparc.S3
-rw-r--r--arch/sparc/mm/init_32.c1
-rw-r--r--arch/sparc/mm/srmmu.c106
-rw-r--r--arch/sparc/mm/viking.S5
-rw-r--r--arch/x86/entry/calling.h40
-rw-r--r--arch/x86/entry/entry_64.S14
-rw-r--r--arch/x86/include/asm/ftrace.h5
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/include/asm/unwind.h2
-rw-r--r--arch/x86/kernel/apic/apic.c27
-rw-r--r--arch/x86/kernel/dumpstack_64.c3
-rw-r--r--arch/x86/kernel/unwind_frame.c3
-rw-r--r--arch/x86/kernel/unwind_orc.c113
-rw-r--r--arch/x86/kvm/ioapic.c10
-rw-r--r--arch/x86/kvm/svm/sev.c2
-rw-r--r--arch/x86/kvm/svm/svm.c2
-rw-r--r--arch/x86/kvm/vmx/nested.c2
-rw-r--r--arch/x86/kvm/vmx/vmenter.S3
-rw-r--r--arch/x86/kvm/x86.c21
-rw-r--r--arch/x86/mm/pat/set_memory.c12
47 files changed, 522 insertions, 439 deletions
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index e133da303a98..a9151884bc85 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -165,8 +165,13 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
preempt_enable();
#endif
- if (!ret)
- *oval = oldval;
+ /*
+ * Store unconditionally. If ret != 0 the extra store is the least
+ * of the worries but GCC cannot figure out that __futex_atomic_op()
+ * is either setting ret to -EFAULT or storing the old value in
+ * oldval which results in a uninitialized warning at the call site.
+ */
+ *oval = oldval;
return ret;
}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 23ebe51410f0..50a279d3ddd7 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -200,6 +200,13 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
}
memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+
+ if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
+ int i;
+
+ for (i = 0; i < 16; i++)
+ *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i);
+ }
out:
return err;
}
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index d22d0534dd60..90186cf6473e 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -18,6 +18,7 @@
#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x)
#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8)
.text
.pushsection .hyp.text, "ax"
@@ -47,6 +48,16 @@
ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
.endm
+.macro save_sp_el0 ctxt, tmp
+ mrs \tmp, sp_el0
+ str \tmp, [\ctxt, #CPU_SP_EL0_OFFSET]
+.endm
+
+.macro restore_sp_el0 ctxt, tmp
+ ldr \tmp, [\ctxt, #CPU_SP_EL0_OFFSET]
+ msr sp_el0, \tmp
+.endm
+
/*
* u64 __guest_enter(struct kvm_vcpu *vcpu,
* struct kvm_cpu_context *host_ctxt);
@@ -60,6 +71,9 @@ SYM_FUNC_START(__guest_enter)
// Store the host regs
save_callee_saved_regs x1
+ // Save the host's sp_el0
+ save_sp_el0 x1, x2
+
// Now the host state is stored if we have a pending RAS SError it must
// affect the host. If any asynchronous exception is pending we defer
// the guest entry. The DSB isn't necessary before v8.2 as any SError
@@ -83,6 +97,9 @@ alternative_else_nop_endif
// when this feature is enabled for kernel code.
ptrauth_switch_to_guest x29, x0, x1, x2
+ // Restore the guest's sp_el0
+ restore_sp_el0 x29, x0
+
// Restore guest regs x0-x17
ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)]
ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)]
@@ -130,6 +147,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
// Store the guest regs x18-x29, lr
save_callee_saved_regs x1
+ // Store the guest's sp_el0
+ save_sp_el0 x1, x2
+
get_host_ctxt x2, x3
// Macro ptrauth_switch_to_guest format:
@@ -139,6 +159,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
// when this feature is enabled for kernel code.
ptrauth_switch_to_host x1, x2, x3, x4, x5
+ // Restore the hosts's sp_el0
+ restore_sp_el0 x2, x3
+
// Now restore the host regs
restore_callee_saved_regs x2
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index c2a13ab3c471..9c5cfb04170e 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -198,7 +198,6 @@ SYM_CODE_END(__hyp_panic)
.macro invalid_vector label, target = __hyp_panic
.align 2
SYM_CODE_START(\label)
-\label:
b \target
SYM_CODE_END(\label)
.endm
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 75b1925763f1..6d2df9fe0b5d 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -15,8 +15,9 @@
/*
* Non-VHE: Both host and guest must save everything.
*
- * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and pstate,
- * which are handled as part of the el2 return state) on every switch.
+ * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
+ * pstate, which are handled as part of the el2 return state) on every
+ * switch (sp_el0 is being dealt with in the assembly code).
* tpidr_el0 and tpidrro_el0 only need to be switched when going
* to host userspace or a different VCPU. EL1 registers only need to be
* switched when potentially going to run a different VCPU. The latter two
@@ -26,12 +27,6 @@
static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
-
- /*
- * The host arm64 Linux uses sp_el0 to point to 'current' and it must
- * therefore be saved/restored on every entry/exit to/from the guest.
- */
- ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
}
static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
@@ -99,12 +94,6 @@ NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe);
static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
-
- /*
- * The host arm64 Linux uses sp_el0 to point to 'current' and it must
- * therefore be saved/restored on every entry/exit to/from the guest.
- */
- write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
}
static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index bbeb6a5a6ba6..0be3355e3499 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -230,6 +230,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
ptep = (pte_t *)pudp;
} else if (sz == (CONT_PTE_SIZE)) {
pmdp = pmd_alloc(mm, pudp, addr);
+ if (!pmdp)
+ return NULL;
WARN_ON(addr & (sz - 1));
/*
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index e15166b0a16d..ad2f172c26a6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -521,6 +521,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 8e18d2c64399..cec462e198ce 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -51,13 +51,10 @@
#define CAUSE_IRQ_FLAG (_AC(1, UL) << (__riscv_xlen - 1))
/* Interrupt causes (minus the high bit) */
-#define IRQ_U_SOFT 0
#define IRQ_S_SOFT 1
#define IRQ_M_SOFT 3
-#define IRQ_U_TIMER 4
#define IRQ_S_TIMER 5
#define IRQ_M_TIMER 7
-#define IRQ_U_EXT 8
#define IRQ_S_EXT 9
#define IRQ_M_EXT 11
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 1bb0cd04aec3..5ce50468aff1 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -8,6 +8,7 @@
#ifndef _ASM_RISCV_HWCAP_H
#define _ASM_RISCV_HWCAP_H
+#include <linux/bits.h>
#include <uapi/asm/hwcap.h>
#ifndef __ASSEMBLY__
@@ -22,6 +23,27 @@ enum {
};
extern unsigned long elf_hwcap;
+
+#define RISCV_ISA_EXT_a ('a' - 'a')
+#define RISCV_ISA_EXT_c ('c' - 'a')
+#define RISCV_ISA_EXT_d ('d' - 'a')
+#define RISCV_ISA_EXT_f ('f' - 'a')
+#define RISCV_ISA_EXT_h ('h' - 'a')
+#define RISCV_ISA_EXT_i ('i' - 'a')
+#define RISCV_ISA_EXT_m ('m' - 'a')
+#define RISCV_ISA_EXT_s ('s' - 'a')
+#define RISCV_ISA_EXT_u ('u' - 'a')
+
+#define RISCV_ISA_EXT_MAX 64
+
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
+
+#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext) \
+ __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
#endif
#endif /* _ASM_RISCV_HWCAP_H */
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index c38df4771c09..4c5bae7ca01c 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -22,14 +22,6 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
#endif
-#ifdef CONFIG_STRICT_KERNEL_RWX
-void set_kernel_text_ro(void);
-void set_kernel_text_rw(void);
-#else
-static inline void set_kernel_text_ro(void) { }
-static inline void set_kernel_text_rw(void) { }
-#endif
-
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c
index c4c33bf02369..0ec22354018c 100644
--- a/arch/riscv/kernel/cpu_ops.c
+++ b/arch/riscv/kernel/cpu_ops.c
@@ -15,8 +15,8 @@
const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
-void *__cpu_up_stack_pointer[NR_CPUS];
-void *__cpu_up_task_pointer[NR_CPUS];
+void *__cpu_up_stack_pointer[NR_CPUS] __section(.data);
+void *__cpu_up_task_pointer[NR_CPUS] __section(.data);
extern const struct cpu_operations cpu_ops_sbi;
extern const struct cpu_operations cpu_ops_spinwait;
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index a5ad00043104..ac202f44a670 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -6,6 +6,7 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/bitmap.h>
#include <linux/of.h>
#include <asm/processor.h>
#include <asm/hwcap.h>
@@ -13,15 +14,57 @@
#include <asm/switch_to.h>
unsigned long elf_hwcap __read_mostly;
+
+/* Host ISA bitmap */
+static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
+
#ifdef CONFIG_FPU
bool has_fpu __read_mostly;
#endif
+/**
+ * riscv_isa_extension_base() - Get base extension word
+ *
+ * @isa_bitmap: ISA bitmap to use
+ * Return: base extension word as unsigned long value
+ *
+ * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
+ */
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap)
+{
+ if (!isa_bitmap)
+ return riscv_isa[0];
+ return isa_bitmap[0];
+}
+EXPORT_SYMBOL_GPL(riscv_isa_extension_base);
+
+/**
+ * __riscv_isa_extension_available() - Check whether given extension
+ * is available or not
+ *
+ * @isa_bitmap: ISA bitmap to use
+ * @bit: bit position of the desired extension
+ * Return: true or false
+ *
+ * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
+ */
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit)
+{
+ const unsigned long *bmap = (isa_bitmap) ? isa_bitmap : riscv_isa;
+
+ if (bit >= RISCV_ISA_EXT_MAX)
+ return false;
+
+ return test_bit(bit, bmap) ? true : false;
+}
+EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
+
void riscv_fill_hwcap(void)
{
struct device_node *node;
const char *isa;
- size_t i;
+ char print_str[BITS_PER_LONG + 1];
+ size_t i, j, isa_len;
static unsigned long isa2hwcap[256] = {0};
isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
@@ -33,8 +76,11 @@ void riscv_fill_hwcap(void)
elf_hwcap = 0;
+ bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX);
+
for_each_of_cpu_node(node) {
unsigned long this_hwcap = 0;
+ unsigned long this_isa = 0;
if (riscv_of_processor_hartid(node) < 0)
continue;
@@ -44,8 +90,24 @@ void riscv_fill_hwcap(void)
continue;
}
- for (i = 0; i < strlen(isa); ++i)
+ i = 0;
+ isa_len = strlen(isa);
+#if IS_ENABLED(CONFIG_32BIT)
+ if (!strncmp(isa, "rv32", 4))
+ i += 4;
+#elif IS_ENABLED(CONFIG_64BIT)
+ if (!strncmp(isa, "rv64", 4))
+ i += 4;
+#endif
+ for (; i < isa_len; ++i) {
this_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
+ /*
+ * TODO: X, Y and Z extension parsing for Host ISA
+ * bitmap will be added in-future.
+ */
+ if ('a' <= isa[i] && isa[i] < 'x')
+ this_isa |= (1UL << (isa[i] - 'a'));
+ }
/*
* All "okay" hart should have same isa. Set HWCAP based on
@@ -56,6 +118,11 @@ void riscv_fill_hwcap(void)
elf_hwcap &= this_hwcap;
else
elf_hwcap = this_hwcap;
+
+ if (riscv_isa[0])
+ riscv_isa[0] &= this_isa;
+ else
+ riscv_isa[0] = this_isa;
}
/* We don't support systems with F but without D, so mask those out
@@ -65,7 +132,17 @@ void riscv_fill_hwcap(void)
elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
}
- pr_info("elf_hwcap is 0x%lx\n", elf_hwcap);
+ memset(print_str, 0, sizeof(print_str));
+ for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+ if (riscv_isa[0] & BIT_MASK(i))
+ print_str[j++] = (char)('a' + i);
+ pr_info("riscv: ISA extensions %s\n", print_str);
+
+ memset(print_str, 0, sizeof(print_str));
+ for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+ if (elf_hwcap & BIT_MASK(i))
+ print_str[j++] = (char)('a' + i);
+ pr_info("riscv: ELF capabilities %s\n", print_str);
#ifdef CONFIG_FPU
if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D))
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index e0a6293093f1..a65a8fa0c22d 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -10,6 +10,7 @@
#include <linux/cpu.h>
#include <linux/interrupt.h>
+#include <linux/module.h>
#include <linux/profile.h>
#include <linux/smp.h>
#include <linux/sched.h>
@@ -63,6 +64,7 @@ void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
for_each_cpu(cpu, in)
cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
}
+EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask);
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index a4ee3a0e7d20..4c8b2a4a6a70 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -12,7 +12,7 @@ vdso-syms += getcpu
vdso-syms += flush_icache
# Files to link into the vdso
-obj-vdso = $(patsubst %, %.o, $(vdso-syms))
+obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o
diff --git a/arch/riscv/kernel/vdso/note.S b/arch/riscv/kernel/vdso/note.S
new file mode 100644
index 000000000000..2a956c942211
--- /dev/null
+++ b/arch/riscv/kernel/vdso/note.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/elfnote.h>
+#include <linux/version.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b55be44ff9bd..27a334106708 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -150,7 +150,8 @@ void __init setup_bootmem(void)
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
set_max_mapnr(PFN_DOWN(mem_size));
- max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ max_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ max_low_pfn = max_pfn;
#ifdef CONFIG_BLK_DEV_INITRD
setup_initrd();
@@ -501,22 +502,6 @@ static inline void setup_vm_final(void)
#endif /* CONFIG_MMU */
#ifdef CONFIG_STRICT_KERNEL_RWX
-void set_kernel_text_rw(void)
-{
- unsigned long text_start = (unsigned long)_text;
- unsigned long text_end = (unsigned long)_etext;
-
- set_memory_rw(text_start, (text_end - text_start) >> PAGE_SHIFT);
-}
-
-void set_kernel_text_ro(void)
-{
- unsigned long text_start = (unsigned long)_text;
- unsigned long text_end = (unsigned long)_etext;
-
- set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
-}
-
void mark_rodata_ro(void)
{
unsigned long text_start = (unsigned long)_text;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 5dcf9ff12828..d05bb040fd42 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -545,6 +545,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_AIS:
case KVM_CAP_S390_AIS_MIGRATION:
case KVM_CAP_S390_VCPU_RESETS:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
case KVM_CAP_S390_HPAGE_1M:
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 69a824f9ef0b..893893642415 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -626,10 +626,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
* available for the guest are AQIC and TAPQ with the t bit set
* since we do not set IC.3 (FIII) we currently will only intercept
* the AQIC function code.
+ * Note: running nested under z/VM can result in intercepts for other
+ * function codes, e.g. PQAP(QCI). We do not support this and bail out.
*/
reg0 = vcpu->run->s.regs.gprs[0];
fc = (reg0 >> 24) & 0xff;
- if (WARN_ON_ONCE(fc != 0x03))
+ if (fc != 0x03)
return -EOPNOTSUPP;
/* PQAP instruction is allowed for guest kernel only */
diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h
index 478260002836..fff8861df107 100644
--- a/arch/sparc/include/asm/page_32.h
+++ b/arch/sparc/include/asm/page_32.h
@@ -54,7 +54,7 @@ extern struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS+1];
*/
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long iopte; } iopte_t;
-typedef struct { unsigned long pmdv[16]; } pmd_t;
+typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long ctxd; } ctxd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
@@ -62,7 +62,7 @@ typedef struct { unsigned long iopgprot; } iopgprot_t;
#define pte_val(x) ((x).pte)
#define iopte_val(x) ((x).iopte)
-#define pmd_val(x) ((x).pmdv[0])
+#define pmd_val(x) ((x).pmd)
#define pgd_val(x) ((x).pgd)
#define ctxd_val(x) ((x).ctxd)
#define pgprot_val(x) ((x).pgprot)
@@ -82,7 +82,7 @@ typedef struct { unsigned long iopgprot; } iopgprot_t;
*/
typedef unsigned long pte_t;
typedef unsigned long iopte_t;
-typedef struct { unsigned long pmdv[16]; } pmd_t;
+typedef unsigned long pmd_t;
typedef unsigned long pgd_t;
typedef unsigned long ctxd_t;
typedef unsigned long pgprot_t;
@@ -90,14 +90,14 @@ typedef unsigned long iopgprot_t;
#define pte_val(x) (x)
#define iopte_val(x) (x)
-#define pmd_val(x) ((x).pmdv[0])
+#define pmd_val(x) (x)
#define pgd_val(x) (x)
#define ctxd_val(x) (x)
#define pgprot_val(x) (x)
#define iopgprot_val(x) (x)
#define __pte(x) (x)
-#define __pmd(x) ((pmd_t) { { (x) }, })
+#define __pmd(x) (x)
#define __iopte(x) (x)
#define __pgd(x) (x)
#define __ctxd(x) (x)
@@ -106,7 +106,7 @@ typedef unsigned long iopgprot_t;
#endif
-typedef struct page *pgtable_t;
+typedef pte_t *pgtable_t;
#define TASK_UNMAPPED_BASE 0x50000000
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index eae0c92ec422..b772384871e9 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -50,23 +50,24 @@ static inline void free_pmd_fast(pmd_t * pmd)
#define pmd_free(mm, pmd) free_pmd_fast(pmd)
#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
-void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
-#define pmd_pgtable(pmd) pmd_page(pmd)
+#define pmd_populate(mm, pmd, pte) pmd_set(pmd, pte)
+#define pmd_pgtable(pmd) (pgtable_t)__pmd_page(pmd)
void pmd_set(pmd_t *pmdp, pte_t *ptep);
-#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
+#define pmd_populate_kernel pmd_populate
pgtable_t pte_alloc_one(struct mm_struct *mm);
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
- return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
+ return srmmu_get_nocache(SRMMU_PTE_TABLE_SIZE,
+ SRMMU_PTE_TABLE_SIZE);
}
static inline void free_pte_fast(pte_t *pte)
{
- srmmu_free_nocache(pte, PTE_SIZE);
+ srmmu_free_nocache(pte, SRMMU_PTE_TABLE_SIZE);
}
#define pte_free_kernel(mm, pte) free_pte_fast(pte)
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 0de659ae0ba4..c5625b2aa331 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -11,6 +11,16 @@
#include <linux/const.h>
+#define PMD_SHIFT 18
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+#define PMD_ALIGN(__addr) (((__addr) + ~PMD_MASK) & PMD_MASK)
+
+#define PGDIR_SHIFT 24
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+#define PGDIR_ALIGN(__addr) (((__addr) + ~PGDIR_MASK) & PGDIR_MASK)
+
#ifndef __ASSEMBLY__
#include <asm-generic/pgtable-nopud.h>
@@ -34,17 +44,10 @@ unsigned long __init bootmem_init(unsigned long *pages_avail);
#define pmd_ERROR(e) __builtin_trap()
#define pgd_ERROR(e) __builtin_trap()
-#define PMD_SHIFT 22
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-#define PMD_ALIGN(__addr) (((__addr) + ~PMD_MASK) & PMD_MASK)
-#define PGDIR_SHIFT SRMMU_PGDIR_SHIFT
-#define PGDIR_SIZE SRMMU_PGDIR_SIZE
-#define PGDIR_MASK SRMMU_PGDIR_MASK
-#define PTRS_PER_PTE 1024
-#define PTRS_PER_PMD SRMMU_PTRS_PER_PMD
-#define PTRS_PER_PGD SRMMU_PTRS_PER_PGD
-#define USER_PTRS_PER_PGD PAGE_OFFSET / SRMMU_PGDIR_SIZE
+#define PTRS_PER_PTE 64
+#define PTRS_PER_PMD 64
+#define PTRS_PER_PGD 256
+#define USER_PTRS_PER_PGD PAGE_OFFSET / PGDIR_SIZE
#define FIRST_USER_ADDRESS 0UL
#define PTE_SIZE (PTRS_PER_PTE*4)
@@ -132,6 +135,17 @@ static inline struct page *pmd_page(pmd_t pmd)
return pfn_to_page((pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4));
}
+static inline unsigned long __pmd_page(pmd_t pmd)
+{
+ unsigned long v;
+
+ if (srmmu_device_memory(pmd_val(pmd)))
+ BUG();
+
+ v = pmd_val(pmd) & SRMMU_PTD_PMASK;
+ return (unsigned long)__nocache_va(v << 4);
+}
+
static inline unsigned long pud_page_vaddr(pud_t pud)
{
if (srmmu_device_memory(pud_val(pud))) {
@@ -179,9 +193,7 @@ static inline int pmd_none(pmd_t pmd)
static inline void pmd_clear(pmd_t *pmdp)
{
- int i;
- for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++)
- set_pte((pte_t *)&pmdp->pmdv[i], __pte(0));
+ set_pte((pte_t *)&pmd_val(*pmdp), __pte(0));
}
static inline int pud_none(pud_t pud)
diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h
index 32a508897501..7708d015712b 100644
--- a/arch/sparc/include/asm/pgtsrmmu.h
+++ b/arch/sparc/include/asm/pgtsrmmu.h
@@ -17,39 +17,9 @@
/* Number of contexts is implementation-dependent; 64k is the most we support */
#define SRMMU_MAX_CONTEXTS 65536
-/* PMD_SHIFT determines the size of the area a second-level page table entry can map */
-#define SRMMU_REAL_PMD_SHIFT 18
-#define SRMMU_REAL_PMD_SIZE (1UL << SRMMU_REAL_PMD_SHIFT)
-#define SRMMU_REAL_PMD_MASK (~(SRMMU_REAL_PMD_SIZE-1))
-#define SRMMU_REAL_PMD_ALIGN(__addr) (((__addr)+SRMMU_REAL_PMD_SIZE-1)&SRMMU_REAL_PMD_MASK)
-
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define SRMMU_PGDIR_SHIFT 24
-#define SRMMU_PGDIR_SIZE (1UL << SRMMU_PGDIR_SHIFT)
-#define SRMMU_PGDIR_MASK (~(SRMMU_PGDIR_SIZE-1))
-#define SRMMU_PGDIR_ALIGN(addr) (((addr)+SRMMU_PGDIR_SIZE-1)&SRMMU_PGDIR_MASK)
-
-#define SRMMU_REAL_PTRS_PER_PTE 64
-#define SRMMU_REAL_PTRS_PER_PMD 64
-#define SRMMU_PTRS_PER_PGD 256
-
-#define SRMMU_REAL_PTE_TABLE_SIZE (SRMMU_REAL_PTRS_PER_PTE*4)
-#define SRMMU_PMD_TABLE_SIZE (SRMMU_REAL_PTRS_PER_PMD*4)
-#define SRMMU_PGD_TABLE_SIZE (SRMMU_PTRS_PER_PGD*4)
-
-/*
- * To support pagetables in highmem, Linux introduces APIs which
- * return struct page* and generally manipulate page tables when
- * they are not mapped into kernel space. Our hardware page tables
- * are smaller than pages. We lump hardware tabes into big, page sized
- * software tables.
- *
- * PMD_SHIFT determines the size of the area a second-level page table entry
- * can map, and our pmd_t is 16 times larger than normal. The values which
- * were once defined here are now generic for 4c and srmmu, so they're
- * found in pgtable.h.
- */
-#define SRMMU_PTRS_PER_PMD 4
+#define SRMMU_PTE_TABLE_SIZE (PTRS_PER_PTE*4)
+#define SRMMU_PMD_TABLE_SIZE (PTRS_PER_PMD*4)
+#define SRMMU_PGD_TABLE_SIZE (PTRS_PER_PGD*4)
/* Definition of the values in the ET field of PTD's and PTE's */
#define SRMMU_ET_MASK 0x3
diff --git a/arch/sparc/include/asm/viking.h b/arch/sparc/include/asm/viking.h
index 0bbefd184221..08ffc605035f 100644
--- a/arch/sparc/include/asm/viking.h
+++ b/arch/sparc/include/asm/viking.h
@@ -10,6 +10,7 @@
#include <asm/asi.h>
#include <asm/mxcc.h>
+#include <asm/pgtable.h>
#include <asm/pgtsrmmu.h>
/* Bits in the SRMMU control register for GNU/Viking modules.
@@ -227,7 +228,7 @@ static inline unsigned long viking_hwprobe(unsigned long vaddr)
: "=r" (val)
: "r" (vaddr | 0x200), "i" (ASI_M_FLUSH_PROBE));
if ((val & SRMMU_ET_MASK) == SRMMU_ET_PTE) {
- vaddr &= ~SRMMU_PGDIR_MASK;
+ vaddr &= ~PGDIR_MASK;
vaddr >>= PAGE_SHIFT;
return val | (vaddr << 8);
}
@@ -237,7 +238,7 @@ static inline unsigned long viking_hwprobe(unsigned long vaddr)
: "=r" (val)
: "r" (vaddr | 0x100), "i" (ASI_M_FLUSH_PROBE));
if ((val & SRMMU_ET_MASK) == SRMMU_ET_PTE) {
- vaddr &= ~SRMMU_REAL_PMD_MASK;
+ vaddr &= ~PMD_MASK;
vaddr >>= PAGE_SHIFT;
return val | (vaddr << 8);
}
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
index e55f2c075165..be30c8d4cc73 100644
--- a/arch/sparc/kernel/head_32.S
+++ b/arch/sparc/kernel/head_32.S
@@ -24,7 +24,7 @@
#include <asm/winmacro.h>
#include <asm/thread_info.h> /* TI_UWINMASK */
#include <asm/errno.h>
-#include <asm/pgtsrmmu.h> /* SRMMU_PGDIR_SHIFT */
+#include <asm/pgtable.h> /* PGDIR_SHIFT */
#include <asm/export.h>
.data
@@ -273,7 +273,7 @@ not_a_sun4:
lda [%o1] ASI_M_BYPASS, %o2 ! This is the 0x0 16MB pgd
/* Calculate to KERNBASE entry. */
- add %o1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %o3
+ add %o1, KERNBASE >> (PGDIR_SHIFT - 2), %o3
/* Poke the entry into the calculated address. */
sta %o2, [%o3] ASI_M_BYPASS
@@ -317,7 +317,7 @@ srmmu_not_viking:
sll %g1, 0x8, %g1 ! make phys addr for l1 tbl
lda [%g1] ASI_M_BYPASS, %g2 ! get level1 entry for 0x0
- add %g1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %g3
+ add %g1, KERNBASE >> (PGDIR_SHIFT - 2), %g3
sta %g2, [%g3] ASI_M_BYPASS ! place at KERNBASE entry
b go_to_highmem
nop ! wheee....
@@ -341,7 +341,7 @@ leon_remap:
sll %g1, 0x8, %g1 ! make phys addr for l1 tbl
lda [%g1] ASI_M_BYPASS, %g2 ! get level1 entry for 0x0
- add %g1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %g3
+ add %g1, KERNBASE >> (PGDIR_SHIFT - 2), %g3
sta %g2, [%g3] ASI_M_BYPASS ! place at KERNBASE entry
b go_to_highmem
nop ! wheee....
diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c
index 16b50afe7b52..646dd58169ec 100644
--- a/arch/sparc/kernel/ptrace_32.c
+++ b/arch/sparc/kernel/ptrace_32.c
@@ -46,82 +46,79 @@ enum sparc_regset {
REGSET_FP,
};
+static int regwindow32_get(struct task_struct *target,
+ const struct pt_regs *regs,
+ u32 *uregs)
+{
+ unsigned long reg_window = regs->u_regs[UREG_I6];
+ int size = 16 * sizeof(u32);
+
+ if (target == current) {
+ if (copy_from_user(uregs, (void __user *)reg_window, size))
+ return -EFAULT;
+ } else {
+ if (access_process_vm(target, reg_window, uregs, size,
+ FOLL_FORCE) != size)
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static int regwindow32_set(struct task_struct *target,
+ const struct pt_regs *regs,
+ u32 *uregs)
+{
+ unsigned long reg_window = regs->u_regs[UREG_I6];
+ int size = 16 * sizeof(u32);
+
+ if (target == current) {
+ if (copy_to_user((void __user *)reg_window, uregs, size))
+ return -EFAULT;
+ } else {
+ if (access_process_vm(target, reg_window, uregs, size,
+ FOLL_FORCE | FOLL_WRITE) != size)
+ return -EFAULT;
+ }
+ return 0;
+}
+
static int genregs32_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
const struct pt_regs *regs = target->thread.kregs;
- unsigned long __user *reg_window;
- unsigned long *k = kbuf;
- unsigned long __user *u = ubuf;
- unsigned long reg;
+ u32 uregs[16];
+ int ret;
if (target == current)
flush_user_windows();
- pos /= sizeof(reg);
- count /= sizeof(reg);
-
- if (kbuf) {
- for (; count > 0 && pos < 16; count--)
- *k++ = regs->u_regs[pos++];
-
- reg_window = (unsigned long __user *) regs->u_regs[UREG_I6];
- reg_window -= 16;
- for (; count > 0 && pos < 32; count--) {
- if (get_user(*k++, &reg_window[pos++]))
- return -EFAULT;
- }
- } else {
- for (; count > 0 && pos < 16; count--) {
- if (put_user(regs->u_regs[pos++], u++))
- return -EFAULT;
- }
-
- reg_window = (unsigned long __user *) regs->u_regs[UREG_I6];
- reg_window -= 16;
- for (; count > 0 && pos < 32; count--) {
- if (get_user(reg, &reg_window[pos++]) ||
- put_user(reg, u++))
- return -EFAULT;
- }
- }
- while (count > 0) {
- switch (pos) {
- case 32: /* PSR */
- reg = regs->psr;
- break;
- case 33: /* PC */
- reg = regs->pc;
- break;
- case 34: /* NPC */
- reg = regs->npc;
- break;
- case 35: /* Y */
- reg = regs->y;
- break;
- case 36: /* WIM */
- case 37: /* TBR */
- reg = 0;
- break;
- default:
- goto finish;
- }
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ regs->u_regs,
+ 0, 16 * sizeof(u32));
+ if (ret || !count)
+ return ret;
- if (kbuf)
- *k++ = reg;
- else if (put_user(reg, u++))
+ if (pos < 32 * sizeof(u32)) {
+ if (regwindow32_get(target, regs, uregs))
return -EFAULT;
- pos++;
- count--;
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ uregs,
+ 16 * sizeof(u32), 32 * sizeof(u32));
+ if (ret || !count)
+ return ret;
}
-finish:
- pos *= sizeof(reg);
- count *= sizeof(reg);
- return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- 38 * sizeof(reg), -1);
+ uregs[0] = regs->psr;
+ uregs[1] = regs->pc;
+ uregs[2] = regs->npc;
+ uregs[3] = regs->y;
+ uregs[4] = 0; /* WIM */
+ uregs[5] = 0; /* TBR */
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ uregs,
+ 32 * sizeof(u32), 38 * sizeof(u32));
}
static int genregs32_set(struct task_struct *target,
@@ -130,82 +127,58 @@ static int genregs32_set(struct task_struct *target,
const void *kbuf, const void __user *ubuf)
{
struct pt_regs *regs = target->thread.kregs;
- unsigned long __user *reg_window;
- const unsigned long *k = kbuf;
- const unsigned long __user *u = ubuf;
- unsigned long reg;
+ u32 uregs[16];
+ u32 psr;
+ int ret;
if (target == current)
flush_user_windows();
- pos /= sizeof(reg);
- count /= sizeof(reg);
-
- if (kbuf) {
- for (; count > 0 && pos < 16; count--)
- regs->u_regs[pos++] = *k++;
-
- reg_window = (unsigned long __user *) regs->u_regs[UREG_I6];
- reg_window -= 16;
- for (; count > 0 && pos < 32; count--) {
- if (put_user(*k++, &reg_window[pos++]))
- return -EFAULT;
- }
- } else {
- for (; count > 0 && pos < 16; count--) {
- if (get_user(reg, u++))
- return -EFAULT;
- regs->u_regs[pos++] = reg;
- }
-
- reg_window = (unsigned long __user *) regs->u_regs[UREG_I6];
- reg_window -= 16;
- for (; count > 0 && pos < 32; count--) {
- if (get_user(reg, u++) ||
- put_user(reg, &reg_window[pos++]))
- return -EFAULT;
- }
- }
- while (count > 0) {
- unsigned long psr;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ regs->u_regs,
+ 0, 16 * sizeof(u32));
+ if (ret || !count)
+ return ret;
- if (kbuf)
- reg = *k++;
- else if (get_user(reg, u++))
+ if (pos < 32 * sizeof(u32)) {
+ if (regwindow32_get(target, regs, uregs))
return -EFAULT;
-
- switch (pos) {
- case 32: /* PSR */
- psr = regs->psr;
- psr &= ~(PSR_ICC | PSR_SYSCALL);
- psr |= (reg & (PSR_ICC | PSR_SYSCALL));
- regs->psr = psr;
- break;
- case 33: /* PC */
- regs->pc = reg;
- break;
- case 34: /* NPC */
- regs->npc = reg;
- break;
- case 35: /* Y */
- regs->y = reg;
- break;
- case 36: /* WIM */
- case 37: /* TBR */
- break;
- default:
- goto finish;
- }
-
- pos++;
- count--;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ uregs,
+ 16 * sizeof(u32), 32 * sizeof(u32));
+ if (ret)
+ return ret;
+ if (regwindow32_set(target, regs, uregs))
+ return -EFAULT;
+ if (!count)
+ return 0;
}
-finish:
- pos *= sizeof(reg);
- count *= sizeof(reg);
-
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &psr,
+ 32 * sizeof(u32), 33 * sizeof(u32));
+ if (ret)
+ return ret;
+ regs->psr = (regs->psr & ~(PSR_ICC | PSR_SYSCALL)) |
+ (psr & (PSR_ICC | PSR_SYSCALL));
+ if (!count)
+ return 0;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &regs->pc,
+ 33 * sizeof(u32), 34 * sizeof(u32));
+ if (ret || !count)
+ return ret;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &regs->npc,
+ 34 * sizeof(u32), 35 * sizeof(u32));
+ if (ret || !count)
+ return ret;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &regs->y,
+ 35 * sizeof(u32), 36 * sizeof(u32));
+ if (ret || !count)
+ return ret;
return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
- 38 * sizeof(reg), -1);
+ 36 * sizeof(u32), 38 * sizeof(u32));
}
static int fpregs32_get(struct task_struct *target,
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index c9d41a96468f..3f5930bfab06 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -572,19 +572,13 @@ static int genregs32_get(struct task_struct *target,
for (; count > 0 && pos < 32; count--) {
if (access_process_vm(target,
(unsigned long)
- &reg_window[pos],
+ &reg_window[pos++],
&reg, sizeof(reg),
FOLL_FORCE)
!= sizeof(reg))
return -EFAULT;
- if (access_process_vm(target,
- (unsigned long) u,
- &reg, sizeof(reg),
- FOLL_FORCE | FOLL_WRITE)
- != sizeof(reg))
+ if (put_user(reg, u++))
return -EFAULT;
- pos++;
- u++;
}
}
}
@@ -684,12 +678,7 @@ static int genregs32_set(struct task_struct *target,
}
} else {
for (; count > 0 && pos < 32; count--) {
- if (access_process_vm(target,
- (unsigned long)
- u,
- &reg, sizeof(reg),
- FOLL_FORCE)
- != sizeof(reg))
+ if (get_user(reg, u++))
return -EFAULT;
if (access_process_vm(target,
(unsigned long)
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index b5da3bfdc225..f84a02ab6bf9 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -22,7 +22,6 @@
#include <linux/msg.h>
#include <linux/shm.h>
#include <linux/uio.h>
-#include <linux/nfs_fs.h>
#include <linux/quota.h>
#include <linux/poll.h>
#include <linux/personality.h>
diff --git a/arch/sparc/mm/hypersparc.S b/arch/sparc/mm/hypersparc.S
index 66885a8dc50a..6c2521e85a42 100644
--- a/arch/sparc/mm/hypersparc.S
+++ b/arch/sparc/mm/hypersparc.S
@@ -10,6 +10,7 @@
#include <asm/asm-offsets.h>
#include <asm/asi.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#include <asm/pgtsrmmu.h>
#include <linux/init.h>
@@ -293,7 +294,7 @@ hypersparc_flush_tlb_range:
cmp %o3, -1
be hypersparc_flush_tlb_range_out
#endif
- sethi %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4
+ sethi %hi(~((1 << PGDIR_SHIFT) - 1)), %o4
sta %o3, [%g1] ASI_M_MMUREGS
and %o1, %o4, %o1
add %o1, 0x200, %o1
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 906eda1158b4..3cb3dffcbcdc 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -193,6 +193,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
/* Reserve the kernel text/data/bss. */
size = (start_pfn << PAGE_SHIFT) - phys_base;
memblock_reserve(phys_base, size);
+ memblock_add(phys_base, size);
size = memblock_phys_mem_size() - memblock_reserved_size();
*pages_avail = (size >> PAGE_SHIFT) - high_pages;
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index b7c94de70cca..116d19a390f2 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -136,26 +136,8 @@ static void msi_set_sync(void)
void pmd_set(pmd_t *pmdp, pte_t *ptep)
{
- unsigned long ptp; /* Physical address, shifted right by 4 */
- int i;
-
- ptp = __nocache_pa(ptep) >> 4;
- for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) {
- set_pte((pte_t *)&pmdp->pmdv[i], __pte(SRMMU_ET_PTD | ptp));
- ptp += (SRMMU_REAL_PTRS_PER_PTE * sizeof(pte_t) >> 4);
- }
-}
-
-void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep)
-{
- unsigned long ptp; /* Physical address, shifted right by 4 */
- int i;
-
- ptp = page_to_pfn(ptep) << (PAGE_SHIFT-4); /* watch for overflow */
- for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) {
- set_pte((pte_t *)&pmdp->pmdv[i], __pte(SRMMU_ET_PTD | ptp));
- ptp += (SRMMU_REAL_PTRS_PER_PTE * sizeof(pte_t) >> 4);
- }
+ unsigned long ptp = __nocache_pa(ptep) >> 4;
+ set_pte((pte_t *)&pmd_val(*pmdp), __pte(SRMMU_ET_PTD | ptp));
}
/* Find an entry in the third-level page table.. */
@@ -163,7 +145,7 @@ pte_t *pte_offset_kernel(pmd_t *dir, unsigned long address)
{
void *pte;
- pte = __nocache_va((dir->pmdv[0] & SRMMU_PTD_PMASK) << 4);
+ pte = __nocache_va((pmd_val(*dir) & SRMMU_PTD_PMASK) << 4);
return (pte_t *) pte +
((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
}
@@ -175,18 +157,18 @@ pte_t *pte_offset_kernel(pmd_t *dir, unsigned long address)
*/
static void *__srmmu_get_nocache(int size, int align)
{
- int offset;
+ int offset, minsz = 1 << SRMMU_NOCACHE_BITMAP_SHIFT;
unsigned long addr;
- if (size < SRMMU_NOCACHE_BITMAP_SHIFT) {
+ if (size < minsz) {
printk(KERN_ERR "Size 0x%x too small for nocache request\n",
size);
- size = SRMMU_NOCACHE_BITMAP_SHIFT;
+ size = minsz;
}
- if (size & (SRMMU_NOCACHE_BITMAP_SHIFT - 1)) {
- printk(KERN_ERR "Size 0x%x unaligned int nocache request\n",
+ if (size & (minsz - 1)) {
+ printk(KERN_ERR "Size 0x%x unaligned in nocache request\n",
size);
- size += SRMMU_NOCACHE_BITMAP_SHIFT - 1;
+ size += minsz - 1;
}
BUG_ON(align > SRMMU_NOCACHE_ALIGN_MAX);
@@ -376,31 +358,33 @@ pgd_t *get_pgd_fast(void)
*/
pgtable_t pte_alloc_one(struct mm_struct *mm)
{
- unsigned long pte;
+ pte_t *ptep;
struct page *page;
- if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
- return NULL;
- page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
- if (!pgtable_pte_page_ctor(page)) {
- __free_page(page);
+ if ((ptep = pte_alloc_one_kernel(mm)) == 0)
return NULL;
+ page = pfn_to_page(__nocache_pa((unsigned long)ptep) >> PAGE_SHIFT);
+ spin_lock(&mm->page_table_lock);
+ if (page_ref_inc_return(page) == 2 && !pgtable_pte_page_ctor(page)) {
+ page_ref_dec(page);
+ ptep = NULL;
}
- return page;
+ spin_unlock(&mm->page_table_lock);
+
+ return ptep;
}
-void pte_free(struct mm_struct *mm, pgtable_t pte)
+void pte_free(struct mm_struct *mm, pgtable_t ptep)
{
- unsigned long p;
+ struct page *page;
- pgtable_pte_page_dtor(pte);
- p = (unsigned long)page_address(pte); /* Cached address (for test) */
- if (p == 0)
- BUG();
- p = page_to_pfn(pte) << PAGE_SHIFT; /* Physical address */
+ page = pfn_to_page(__nocache_pa((unsigned long)ptep) >> PAGE_SHIFT);
+ spin_lock(&mm->page_table_lock);
+ if (page_ref_dec_return(page) == 1)
+ pgtable_pte_page_dtor(page);
+ spin_unlock(&mm->page_table_lock);
- /* free non cached virtual address*/
- srmmu_free_nocache(__nocache_va(p), PTE_SIZE);
+ srmmu_free_nocache(ptep, SRMMU_PTE_TABLE_SIZE);
}
/* context handling - a dynamically sized pool is used */
@@ -822,13 +806,13 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
what = 0;
addr = start - PAGE_SIZE;
- if (!(start & ~(SRMMU_REAL_PMD_MASK))) {
- if (srmmu_probe(addr + SRMMU_REAL_PMD_SIZE) == probed)
+ if (!(start & ~(PMD_MASK))) {
+ if (srmmu_probe(addr + PMD_SIZE) == probed)
what = 1;
}
- if (!(start & ~(SRMMU_PGDIR_MASK))) {
- if (srmmu_probe(addr + SRMMU_PGDIR_SIZE) == probed)
+ if (!(start & ~(PGDIR_MASK))) {
+ if (srmmu_probe(addr + PGDIR_SIZE) == probed)
what = 2;
}
@@ -837,7 +821,7 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
pudp = pud_offset(p4dp, start);
if (what == 2) {
*(pgd_t *)__nocache_fix(pgdp) = __pgd(probed);
- start += SRMMU_PGDIR_SIZE;
+ start += PGDIR_SIZE;
continue;
}
if (pud_none(*(pud_t *)__nocache_fix(pudp))) {
@@ -849,6 +833,11 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
pud_set(__nocache_fix(pudp), pmdp);
}
pmdp = pmd_offset(__nocache_fix(pgdp), start);
+ if (what == 1) {
+ *(pmd_t *)__nocache_fix(pmdp) = __pmd(probed);
+ start += PMD_SIZE;
+ continue;
+ }
if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
if (ptep == NULL)
@@ -856,19 +845,6 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
memset(__nocache_fix(ptep), 0, PTE_SIZE);
pmd_set(__nocache_fix(pmdp), ptep);
}
- if (what == 1) {
- /* We bend the rule where all 16 PTPs in a pmd_t point
- * inside the same PTE page, and we leak a perfectly
- * good hardware PTE piece. Alternatives seem worse.
- */
- unsigned int x; /* Index of HW PMD in soft cluster */
- unsigned long *val;
- x = (start >> PMD_SHIFT) & 15;
- val = &pmdp->pmdv[x];
- *(unsigned long *)__nocache_fix(val) = probed;
- start += SRMMU_REAL_PMD_SIZE;
- continue;
- }
ptep = pte_offset_kernel(__nocache_fix(pmdp), start);
*(pte_t *)__nocache_fix(ptep) = __pte(probed);
start += PAGE_SIZE;
@@ -890,9 +866,9 @@ static void __init do_large_mapping(unsigned long vaddr, unsigned long phys_base
/* Map sp_bank entry SP_ENTRY, starting at virtual address VBASE. */
static unsigned long __init map_spbank(unsigned long vbase, int sp_entry)
{
- unsigned long pstart = (sp_banks[sp_entry].base_addr & SRMMU_PGDIR_MASK);
- unsigned long vstart = (vbase & SRMMU_PGDIR_MASK);
- unsigned long vend = SRMMU_PGDIR_ALIGN(vbase + sp_banks[sp_entry].num_bytes);
+ unsigned long pstart = (sp_banks[sp_entry].base_addr & PGDIR_MASK);
+ unsigned long vstart = (vbase & PGDIR_MASK);
+ unsigned long vend = PGDIR_ALIGN(vbase + sp_banks[sp_entry].num_bytes);
/* Map "low" memory only */
const unsigned long min_vaddr = PAGE_OFFSET;
const unsigned long max_vaddr = PAGE_OFFSET + SRMMU_MAXMEM;
@@ -905,7 +881,7 @@ static unsigned long __init map_spbank(unsigned long vbase, int sp_entry)
while (vstart < vend) {
do_large_mapping(vstart, pstart);
- vstart += SRMMU_PGDIR_SIZE; pstart += SRMMU_PGDIR_SIZE;
+ vstart += PGDIR_SIZE; pstart += PGDIR_SIZE;
}
return vstart;
}
diff --git a/arch/sparc/mm/viking.S b/arch/sparc/mm/viking.S
index adaef6e7b8cf..48f062de7a7f 100644
--- a/arch/sparc/mm/viking.S
+++ b/arch/sparc/mm/viking.S
@@ -13,6 +13,7 @@
#include <asm/asi.h>
#include <asm/mxcc.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#include <asm/pgtsrmmu.h>
#include <asm/viking.h>
@@ -157,7 +158,7 @@ viking_flush_tlb_range:
cmp %o3, -1
be 2f
#endif
- sethi %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4
+ sethi %hi(~((1 << PGDIR_SHIFT) - 1)), %o4
sta %o3, [%g1] ASI_M_MMUREGS
and %o1, %o4, %o1
add %o1, 0x200, %o1
@@ -243,7 +244,7 @@ sun4dsmp_flush_tlb_range:
ld [%o0 + VMA_VM_MM], %o0
ld [%o0 + AOFF_mm_context], %o3
lda [%g1] ASI_M_MMUREGS, %g5
- sethi %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4
+ sethi %hi(~((1 << PGDIR_SHIFT) - 1)), %o4
sta %o3, [%g1] ASI_M_MMUREGS
and %o1, %o4, %o1
add %o1, 0x200, %o1
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 0789e13ece90..1c7f13bb6728 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
- /*
- * Push registers and sanitize registers of values that a
- * speculation attack might otherwise want to exploit. The
- * lower registers are likely clobbered well before they
- * could be put to use in a speculative execution gadget.
- * Interleave XOR with PUSH for better uop scheduling:
- */
.if \save_ret
pushq %rsi /* pt_regs->si */
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
@@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with
pushq %rsi /* pt_regs->si */
.endif
pushq \rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
pushq %rcx /* pt_regs->cx */
- xorl %ecx, %ecx /* nospec cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
- xorl %r8d, %r8d /* nospec r8 */
pushq %r9 /* pt_regs->r9 */
- xorl %r9d, %r9d /* nospec r9 */
pushq %r10 /* pt_regs->r10 */
- xorl %r10d, %r10d /* nospec r10 */
pushq %r11 /* pt_regs->r11 */
- xorl %r11d, %r11d /* nospec r11*/
pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx*/
pushq %rbp /* pt_regs->rbp */
- xorl %ebp, %ebp /* nospec rbp*/
pushq %r12 /* pt_regs->r12 */
- xorl %r12d, %r12d /* nospec r12*/
pushq %r13 /* pt_regs->r13 */
- xorl %r13d, %r13d /* nospec r13*/
pushq %r14 /* pt_regs->r14 */
- xorl %r14d, %r14d /* nospec r14*/
pushq %r15 /* pt_regs->r15 */
- xorl %r15d, %r15d /* nospec r15*/
UNWIND_HINT_REGS
+
.if \save_ret
pushq %rsi /* return address on top of stack */
.endif
+
+ /*
+ * Sanitize registers of values that a speculation attack might
+ * otherwise want to exploit. The lower registers are likely clobbered
+ * well before they could be put to use in a speculative execution
+ * gadget.
+ */
+ xorl %edx, %edx /* nospec dx */
+ xorl %ecx, %ecx /* nospec cx */
+ xorl %r8d, %r8d /* nospec r8 */
+ xorl %r9d, %r9d /* nospec r9 */
+ xorl %r10d, %r10d /* nospec r10 */
+ xorl %r11d, %r11d /* nospec r11 */
+ xorl %ebx, %ebx /* nospec rbx */
+ xorl %ebp, %ebp /* nospec rbp */
+ xorl %r12d, %r12d /* nospec r12 */
+ xorl %r13d, %r13d /* nospec r13 */
+ xorl %r14d, %r14d /* nospec r14 */
+ xorl %r15d, %r15d /* nospec r15 */
+
.endm
.macro POP_REGS pop_rdi=1 skip_r11rcx=0
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 0e9504fabe52..3063aa9090f9 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -249,7 +249,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
*/
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
- UNWIND_HINT_EMPTY
POP_REGS pop_rdi=0 skip_r11rcx=1
/*
@@ -258,6 +257,7 @@ syscall_return_via_sysret:
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+ UNWIND_HINT_EMPTY
pushq RSP-RDI(%rdi) /* RSP */
pushq (%rdi) /* RDI */
@@ -279,8 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64)
* %rdi: prev task
* %rsi: next task
*/
-SYM_CODE_START(__switch_to_asm)
- UNWIND_HINT_FUNC
+SYM_FUNC_START(__switch_to_asm)
/*
* Save callee-saved registers
* This must match the order in inactive_task_frame
@@ -321,7 +320,7 @@ SYM_CODE_START(__switch_to_asm)
popq %rbp
jmp __switch_to
-SYM_CODE_END(__switch_to_asm)
+SYM_FUNC_END(__switch_to_asm)
/*
* A newly forked process directly context switches into this address.
@@ -512,7 +511,7 @@ SYM_CODE_END(spurious_entries_start)
* +----------------------------------------------------+
*/
SYM_CODE_START(interrupt_entry)
- UNWIND_HINT_FUNC
+ UNWIND_HINT_IRET_REGS offset=16
ASM_CLAC
cld
@@ -544,9 +543,9 @@ SYM_CODE_START(interrupt_entry)
pushq 5*8(%rdi) /* regs->eflags */
pushq 4*8(%rdi) /* regs->cs */
pushq 3*8(%rdi) /* regs->ip */
+ UNWIND_HINT_IRET_REGS
pushq 2*8(%rdi) /* regs->orig_ax */
pushq 8(%rdi) /* return address */
- UNWIND_HINT_FUNC
movq (%rdi), %rdi
jmp 2f
@@ -637,6 +636,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+ UNWIND_HINT_EMPTY
/* Copy the IRET frame to the trampoline stack. */
pushq 6*8(%rdi) /* SS */
@@ -1739,7 +1739,7 @@ SYM_CODE_START(rewind_stack_do_exit)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
leaq -PTREGS_SIZE(%rax), %rsp
- UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
+ UNWIND_HINT_REGS
call do_exit
SYM_CODE_END(rewind_stack_do_exit)
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 85be2f506272..70b96cae5b42 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -61,11 +61,12 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
{
/*
* Compare the symbol name with the system call name. Skip the
- * "__x64_sys", "__ia32_sys" or simple "sys" prefix.
+ * "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix.
*/
return !strcmp(sym + 3, name + 3) ||
(!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
- (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
+ (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) ||
+ (!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3));
}
#ifndef COMPILE_OFFSETS
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 42a2d0d3984a..0dea9f122bb9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1663,8 +1663,8 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
{
/* We can only post Fixed and LowPrio IRQs */
- return (irq->delivery_mode == dest_Fixed ||
- irq->delivery_mode == dest_LowestPrio);
+ return (irq->delivery_mode == APIC_DM_FIXED ||
+ irq->delivery_mode == APIC_DM_LOWEST);
}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 499578f7e6d7..70fc159ebe69 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -19,7 +19,7 @@ struct unwind_state {
#if defined(CONFIG_UNWINDER_ORC)
bool signal, full_regs;
unsigned long sp, bp, ip;
- struct pt_regs *regs;
+ struct pt_regs *regs, *prev_regs;
#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
bool got_irq;
unsigned long *bp, *orig_sp, ip;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 81b9c63dae1b..e53dda210cd7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
* According to Intel, MFENCE can do the serialization here.
*/
asm volatile("mfence" : : : "memory");
-
- printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
return;
}
@@ -546,7 +544,7 @@ static struct clock_event_device lapic_clockevent = {
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-static u32 hsx_deadline_rev(void)
+static __init u32 hsx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x3a; /* EP */
@@ -556,7 +554,7 @@ static u32 hsx_deadline_rev(void)
return ~0U;
}
-static u32 bdx_deadline_rev(void)
+static __init u32 bdx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x00000011;
@@ -568,7 +566,7 @@ static u32 bdx_deadline_rev(void)
return ~0U;
}
-static u32 skx_deadline_rev(void)
+static __init u32 skx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x03: return 0x01000136;
@@ -581,7 +579,7 @@ static u32 skx_deadline_rev(void)
return ~0U;
}
-static const struct x86_cpu_id deadline_match[] = {
+static const struct x86_cpu_id deadline_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev),
@@ -603,18 +601,19 @@ static const struct x86_cpu_id deadline_match[] = {
{},
};
-static void apic_check_deadline_errata(void)
+static __init bool apic_validate_deadline_timer(void)
{
const struct x86_cpu_id *m;
u32 rev;
- if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
- boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return;
+ if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return false;
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return true;
m = x86_match_cpu(deadline_match);
if (!m)
- return;
+ return true;
/*
* Function pointers will have the MSB set due to address layout,
@@ -626,11 +625,12 @@ static void apic_check_deadline_errata(void)
rev = (u32)m->driver_data;
if (boot_cpu_data.microcode >= rev)
- return;
+ return true;
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
"please update microcode to version: 0x%x (or later)\n", rev);
+ return false;
}
/*
@@ -2092,7 +2092,8 @@ void __init init_apic_mappings(void)
{
unsigned int new_apicid;
- apic_check_deadline_errata();
+ if (apic_validate_deadline_timer())
+ pr_debug("TSC deadline timer available\n");
if (x2apic_mode) {
boot_cpu_physical_apicid = read_apic_id();
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 87b97897a881..460ae7f66818 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -183,7 +183,8 @@ recursion_check:
*/
if (visit_mask) {
if (*visit_mask & (1UL << info->type)) {
- printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
+ if (task == current)
+ printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown;
}
*visit_mask |= 1UL << info->type;
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index a224b5ab103f..54226110bc7f 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -344,6 +344,9 @@ bad_address:
if (IS_ENABLED(CONFIG_X86_32))
goto the_end;
+ if (state->task != current)
+ goto the_end;
+
if (state->regs) {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index e9cc182aa97e..5b0bd8581fe6 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -8,19 +8,21 @@
#include <asm/orc_lookup.h>
#define orc_warn(fmt, ...) \
- printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
+ printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
+
+#define orc_warn_current(args...) \
+({ \
+ if (state->task == current) \
+ orc_warn(args); \
+})
extern int __start_orc_unwind_ip[];
extern int __stop_orc_unwind_ip[];
extern struct orc_entry __start_orc_unwind[];
extern struct orc_entry __stop_orc_unwind[];
-static DEFINE_MUTEX(sort_mutex);
-int *cur_orc_ip_table = __start_orc_unwind_ip;
-struct orc_entry *cur_orc_table = __start_orc_unwind;
-
-unsigned int lookup_num_blocks;
-bool orc_init;
+static bool orc_init __ro_after_init;
+static unsigned int lookup_num_blocks __ro_after_init;
static inline unsigned long orc_ip(const int *ip)
{
@@ -142,9 +144,6 @@ static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
- if (!orc_init)
- return NULL;
-
if (ip == 0)
return &null_orc_entry;
@@ -189,6 +188,10 @@ static struct orc_entry *orc_find(unsigned long ip)
#ifdef CONFIG_MODULES
+static DEFINE_MUTEX(sort_mutex);
+static int *cur_orc_ip_table = __start_orc_unwind_ip;
+static struct orc_entry *cur_orc_table = __start_orc_unwind;
+
static void orc_sort_swap(void *_a, void *_b, int size)
{
struct orc_entry *orc_a, *orc_b;
@@ -381,9 +384,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
return true;
}
+/*
+ * If state->regs is non-NULL, and points to a full pt_regs, just get the reg
+ * value from state->regs.
+ *
+ * Otherwise, if state->regs just points to IRET regs, and the previous frame
+ * had full regs, it's safe to get the value from the previous regs. This can
+ * happen when early/late IRQ entry code gets interrupted by an NMI.
+ */
+static bool get_reg(struct unwind_state *state, unsigned int reg_off,
+ unsigned long *val)
+{
+ unsigned int reg = reg_off/8;
+
+ if (!state->regs)
+ return false;
+
+ if (state->full_regs) {
+ *val = ((unsigned long *)state->regs)[reg];
+ return true;
+ }
+
+ if (state->prev_regs) {
+ *val = ((unsigned long *)state->prev_regs)[reg];
+ return true;
+ }
+
+ return false;
+}
+
bool unwind_next_frame(struct unwind_state *state)
{
- unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp;
+ unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
enum stack_type prev_type = state->stack_info.type;
struct orc_entry *orc;
bool indirect = false;
@@ -445,43 +477,39 @@ bool unwind_next_frame(struct unwind_state *state)
break;
case ORC_REG_R10:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R10 at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
+ orc_warn_current("missing R10 value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->r10;
break;
case ORC_REG_R13:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R13 at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
+ orc_warn_current("missing R13 value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->r13;
break;
case ORC_REG_DI:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DI at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) {
+ orc_warn_current("missing RDI value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->di;
break;
case ORC_REG_DX:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DX at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
+ orc_warn_current("missing DX value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->dx;
break;
default:
- orc_warn("unknown SP base reg %d for ip %pB\n",
+ orc_warn("unknown SP base reg %d at %pB\n",
orc->sp_reg, (void *)state->ip);
goto err;
}
@@ -504,44 +532,48 @@ bool unwind_next_frame(struct unwind_state *state)
state->sp = sp;
state->regs = NULL;
+ state->prev_regs = NULL;
state->signal = false;
break;
case ORC_TYPE_REGS:
if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
- orc_warn("can't dereference registers at %p for ip %pB\n",
- (void *)sp, (void *)orig_ip);
+ orc_warn_current("can't access registers at %pB\n",
+ (void *)orig_ip);
goto err;
}
state->regs = (struct pt_regs *)sp;
+ state->prev_regs = NULL;
state->full_regs = true;
state->signal = true;
break;
case ORC_TYPE_REGS_IRET:
if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
- orc_warn("can't dereference iret registers at %p for ip %pB\n",
- (void *)sp, (void *)orig_ip);
+ orc_warn_current("can't access iret registers at %pB\n",
+ (void *)orig_ip);
goto err;
}
+ if (state->full_regs)
+ state->prev_regs = state->regs;
state->regs = (void *)sp - IRET_FRAME_OFFSET;
state->full_regs = false;
state->signal = true;
break;
default:
- orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+ orc_warn("unknown .orc_unwind entry type %d at %pB\n",
orc->type, (void *)orig_ip);
- break;
+ goto err;
}
/* Find BP: */
switch (orc->bp_reg) {
case ORC_REG_UNDEFINED:
- if (state->regs && state->full_regs)
- state->bp = state->regs->bp;
+ if (get_reg(state, offsetof(struct pt_regs, bp), &tmp))
+ state->bp = tmp;
break;
case ORC_REG_PREV_SP:
@@ -564,8 +596,8 @@ bool unwind_next_frame(struct unwind_state *state)
if (state->stack_info.type == prev_type &&
on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
state->sp <= prev_sp) {
- orc_warn("stack going in the wrong direction? ip=%pB\n",
- (void *)orig_ip);
+ orc_warn_current("stack going in the wrong direction? at %pB\n",
+ (void *)orig_ip);
goto err;
}
@@ -585,6 +617,9 @@ EXPORT_SYMBOL_GPL(unwind_next_frame);
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long *first_frame)
{
+ if (!orc_init)
+ goto done;
+
memset(state, 0, sizeof(*state));
state->task = task;
@@ -651,7 +686,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
/* Otherwise, skip ahead to the user-specified starting frame: */
while (!unwind_done(state) &&
(!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
- state->sp <= (unsigned long)first_frame))
+ state->sp < (unsigned long)first_frame))
unwind_next_frame(state);
return;
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 750ff0b29404..d057376bd3d3 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -225,12 +225,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
}
/*
- * AMD SVM AVIC accelerate EOI write and do not trap,
- * in-kernel IOAPIC will not be able to receive the EOI.
- * In this case, we do lazy update of the pending EOI when
- * trying to set IOAPIC irq.
+ * AMD SVM AVIC accelerate EOI write iff the interrupt is edge
+ * triggered, in which case the in-kernel IOAPIC will not be able
+ * to receive the EOI. In this case, we do a lazy update of the
+ * pending EOI when trying to set IOAPIC irq.
*/
- if (kvm_apicv_activated(ioapic->kvm))
+ if (edge && kvm_apicv_activated(ioapic->kvm))
ioapic_lazy_update_eoi(ioapic, irq);
/*
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index cf912b4aaba8..89f7f3aebd31 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -345,7 +345,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
return NULL;
/* Pin the user virtual address. */
- npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages);
+ npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
if (npinned != npages) {
pr_err("SEV: Failure locking %lu pages.\n", npages);
goto err;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2f379bacbb26..38f6aeefeb55 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1752,6 +1752,8 @@ static int db_interception(struct vcpu_svm *svm)
if (svm->vcpu.guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->debug.arch.dr6 = svm->vmcb->save.dr6;
+ kvm_run->debug.arch.dr7 = svm->vmcb->save.dr7;
kvm_run->debug.arch.pc =
svm->vmcb->save.cs.base + svm->vmcb->save.rip;
kvm_run->debug.arch.exception = DB_VECTOR;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index fd78ffbde644..e44f33c82332 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5165,7 +5165,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
*/
break;
default:
- BUG_ON(1);
+ BUG();
break;
}
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 87f3f24fef37..51d1a82742fd 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -82,6 +82,9 @@ SYM_FUNC_START(vmx_vmexit)
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+ /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
+ or $1, %_ASM_AX
+
pop %_ASM_AX
.Lvmexit_skip_rsb:
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c5835f9cb9ad..d786c7d27ce5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -926,19 +926,6 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
__reserved_bits; \
})
-static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
-{
- u64 reserved_bits = __cr4_reserved_bits(cpu_has, c);
-
- if (kvm_cpu_cap_has(X86_FEATURE_LA57))
- reserved_bits &= ~X86_CR4_LA57;
-
- if (kvm_cpu_cap_has(X86_FEATURE_UMIP))
- reserved_bits &= ~X86_CR4_UMIP;
-
- return reserved_bits;
-}
-
static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
if (cr4 & cr4_reserved_bits)
@@ -3385,6 +3372,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@@ -9675,7 +9663,9 @@ int kvm_arch_hardware_setup(void *opaque)
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
supported_xss = 0;
- cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
+#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
+ cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
+#undef __kvm_cpu_cap_has
if (kvm_has_tsc_control) {
/*
@@ -9707,7 +9697,8 @@ int kvm_arch_check_processor_compat(void *opaque)
WARN_ON(!irqs_disabled());
- if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits)
+ if (__cr4_reserved_bits(cpu_has, c) !=
+ __cr4_reserved_bits(cpu_has, &boot_cpu_data))
return -EIO;
return ops->check_processor_compatibility();
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 59eca6a94ce7..b8c55a2e402d 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -43,7 +43,8 @@ struct cpa_data {
unsigned long pfn;
unsigned int flags;
unsigned int force_split : 1,
- force_static_prot : 1;
+ force_static_prot : 1,
+ force_flush_all : 1;
struct page **pages;
};
@@ -355,10 +356,10 @@ static void cpa_flush(struct cpa_data *data, int cache)
return;
}
- if (cpa->numpages <= tlb_single_page_flush_ceiling)
- on_each_cpu(__cpa_flush_tlb, cpa, 1);
- else
+ if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling)
flush_tlb_all();
+ else
+ on_each_cpu(__cpa_flush_tlb, cpa, 1);
if (!cache)
return;
@@ -1598,6 +1599,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
alias_cpa.curpage = 0;
+ cpa->force_flush_all = 1;
+
ret = __change_page_attr_set_clr(&alias_cpa, 0);
if (ret)
return ret;
@@ -1618,6 +1621,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
alias_cpa.curpage = 0;
+ cpa->force_flush_all = 1;
/*
* The high mapping range is imprecise, so ignore the
* return value.