diff options
Diffstat (limited to 'arch')
344 files changed, 4912 insertions, 6271 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 405c85ab86f2..bd9f095d69fa 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -835,6 +835,36 @@ config CFI_CLANG https://clang.llvm.org/docs/ControlFlowIntegrity.html +config CFI_ICALL_NORMALIZE_INTEGERS + bool "Normalize CFI tags for integers" + depends on CFI_CLANG + depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS_CLANG + help + This option normalizes the CFI tags for integer types so that all + integer types of the same size and signedness receive the same CFI + tag. + + The option is separate from CONFIG_RUST because it affects the ABI. + When working with build systems that care about the ABI, it is + convenient to be able to turn on this flag first, before Rust is + turned on. + + This option is necessary for using CFI with Rust. If unsure, say N. + +config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_CLANG + def_bool y + depends on $(cc-option,-fsanitize=kcfi -fsanitize-cfi-icall-experimental-normalize-integers) + # With GCOV/KASAN we need this fix: https://github.com/llvm/llvm-project/pull/104826 + depends on CLANG_VERSION >= 190103 || (!GCOV_KERNEL && !KASAN_GENERIC && !KASAN_SW_TAGS) + +config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC + def_bool y + depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS_CLANG + depends on RUSTC_VERSION >= 107900 + # With GCOV/KASAN we need this fix: https://github.com/rust-lang/rust/pull/129373 + depends on (RUSTC_LLVM_VERSION >= 190103 && RUSTC_VERSION >= 108200) || \ + (!GCOV_KERNEL && !KASAN_GENERIC && !KASAN_SW_TAGS) + config CFI_PERMISSIVE bool "Use CFI in permissive mode" depends on CFI_CLANG diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h index 91d4a4d9258c..ae1b96479d0c 100644 --- a/arch/alpha/include/asm/cmpxchg.h +++ b/arch/alpha/include/asm/cmpxchg.h @@ -3,17 +3,232 @@ #define _ALPHA_CMPXCHG_H /* - * Atomic exchange routines. + * Atomic exchange. + * Since it can be used to implement critical sections + * it must clobber "memory" (also for interrupts in UP). */ -#define ____xchg(type, args...) __arch_xchg ## type ## _local(args) -#define ____cmpxchg(type, args...) __cmpxchg ## type ## _local(args) -#include <asm/xchg.h> +static inline unsigned long +____xchg_u8(volatile char *m, unsigned long val) +{ + unsigned long ret, tmp, addr64; + + __asm__ __volatile__( + " andnot %4,7,%3\n" + " insbl %1,%4,%1\n" + "1: ldq_l %2,0(%3)\n" + " extbl %2,%4,%0\n" + " mskbl %2,%4,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%3)\n" + " beq %2,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) + : "r" ((long)m), "1" (val) : "memory"); + + return ret; +} + +static inline unsigned long +____xchg_u16(volatile short *m, unsigned long val) +{ + unsigned long ret, tmp, addr64; + + __asm__ __volatile__( + " andnot %4,7,%3\n" + " inswl %1,%4,%1\n" + "1: ldq_l %2,0(%3)\n" + " extwl %2,%4,%0\n" + " mskwl %2,%4,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%3)\n" + " beq %2,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) + : "r" ((long)m), "1" (val) : "memory"); + + return ret; +} + +static inline unsigned long +____xchg_u32(volatile int *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + "1: ldl_l %0,%4\n" + " bis $31,%3,%1\n" + " stl_c %1,%2\n" + " beq %1,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (val), "=&r" (dummy), "=m" (*m) + : "rI" (val), "m" (*m) : "memory"); + + return val; +} + +static inline unsigned long +____xchg_u64(volatile long *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + "1: ldq_l %0,%4\n" + " bis $31,%3,%1\n" + " stq_c %1,%2\n" + " beq %1,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (val), "=&r" (dummy), "=m" (*m) + : "rI" (val), "m" (*m) : "memory"); + + return val; +} + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid xchg(). */ +extern void __xchg_called_with_bad_pointer(void); + +static __always_inline unsigned long +____xchg(volatile void *ptr, unsigned long x, int size) +{ + return + size == 1 ? ____xchg_u8(ptr, x) : + size == 2 ? ____xchg_u16(ptr, x) : + size == 4 ? ____xchg_u32(ptr, x) : + size == 8 ? ____xchg_u64(ptr, x) : + (__xchg_called_with_bad_pointer(), x); +} + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +static inline unsigned long +____cmpxchg_u8(volatile char *m, unsigned char old, unsigned char new) +{ + unsigned long prev, tmp, cmp, addr64; + + __asm__ __volatile__( + " andnot %5,7,%4\n" + " insbl %1,%5,%1\n" + "1: ldq_l %2,0(%4)\n" + " extbl %2,%5,%0\n" + " cmpeq %0,%6,%3\n" + " beq %3,2f\n" + " mskbl %2,%5,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%4)\n" + " beq %2,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) + : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + + return prev; +} + +static inline unsigned long +____cmpxchg_u16(volatile short *m, unsigned short old, unsigned short new) +{ + unsigned long prev, tmp, cmp, addr64; + + __asm__ __volatile__( + " andnot %5,7,%4\n" + " inswl %1,%5,%1\n" + "1: ldq_l %2,0(%4)\n" + " extwl %2,%5,%0\n" + " cmpeq %0,%6,%3\n" + " beq %3,2f\n" + " mskwl %2,%5,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%4)\n" + " beq %2,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) + : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + + return prev; +} + +static inline unsigned long +____cmpxchg_u32(volatile int *m, int old, int new) +{ + unsigned long prev, cmp; + + __asm__ __volatile__( + "1: ldl_l %0,%5\n" + " cmpeq %0,%3,%1\n" + " beq %1,2f\n" + " mov %4,%1\n" + " stl_c %1,%2\n" + " beq %1,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r"(prev), "=&r"(cmp), "=m"(*m) + : "r"((long) old), "r"(new), "m"(*m) : "memory"); + + return prev; +} + +static inline unsigned long +____cmpxchg_u64(volatile long *m, unsigned long old, unsigned long new) +{ + unsigned long prev, cmp; + + __asm__ __volatile__( + "1: ldq_l %0,%5\n" + " cmpeq %0,%3,%1\n" + " beq %1,2f\n" + " mov %4,%1\n" + " stq_c %1,%2\n" + " beq %1,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r"(prev), "=&r"(cmp), "=m"(*m) + : "r"((long) old), "r"(new), "m"(*m) : "memory"); + + return prev; +} + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid cmpxchg(). */ +extern void __cmpxchg_called_with_bad_pointer(void); + +static __always_inline unsigned long +____cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, + int size) +{ + return + size == 1 ? ____cmpxchg_u8(ptr, old, new) : + size == 2 ? ____cmpxchg_u16(ptr, old, new) : + size == 4 ? ____cmpxchg_u32(ptr, old, new) : + size == 8 ? ____cmpxchg_u64(ptr, old, new) : + (__cmpxchg_called_with_bad_pointer(), old); +} #define xchg_local(ptr, x) \ ({ \ __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __arch_xchg_local((ptr), (unsigned long)_x_,\ + (__typeof__(*(ptr))) ____xchg((ptr), (unsigned long)_x_, \ sizeof(*(ptr))); \ }) @@ -21,7 +236,7 @@ ({ \ __typeof__(*(ptr)) _o_ = (o); \ __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \ + (__typeof__(*(ptr))) ____cmpxchg((ptr), (unsigned long)_o_, \ (unsigned long)_n_, \ sizeof(*(ptr))); \ }) @@ -32,12 +247,6 @@ cmpxchg_local((ptr), (o), (n)); \ }) -#undef ____xchg -#undef ____cmpxchg -#define ____xchg(type, args...) __arch_xchg ##type(args) -#define ____cmpxchg(type, args...) __cmpxchg ##type(args) -#include <asm/xchg.h> - /* * The leading and the trailing memory barriers guarantee that these * operations are fully ordered. @@ -48,7 +257,7 @@ __typeof__(*(ptr)) _x_ = (x); \ smp_mb(); \ __ret = (__typeof__(*(ptr))) \ - __arch_xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ + ____xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ smp_mb(); \ __ret; \ }) @@ -59,7 +268,7 @@ __typeof__(*(ptr)) _o_ = (o); \ __typeof__(*(ptr)) _n_ = (n); \ smp_mb(); \ - __ret = (__typeof__(*(ptr))) __cmpxchg((ptr), \ + __ret = (__typeof__(*(ptr))) ____cmpxchg((ptr), \ (unsigned long)_o_, (unsigned long)_n_, sizeof(*(ptr)));\ smp_mb(); \ __ret; \ @@ -71,6 +280,4 @@ arch_cmpxchg((ptr), (o), (n)); \ }) -#undef ____cmpxchg - #endif /* _ALPHA_CMPXCHG_H */ diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h deleted file mode 100644 index 7adb80c6746a..000000000000 --- a/arch/alpha/include/asm/xchg.h +++ /dev/null @@ -1,246 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ALPHA_CMPXCHG_H -#error Do not include xchg.h directly! -#else -/* - * xchg/xchg_local and cmpxchg/cmpxchg_local share the same code - * except that local version do not have the expensive memory barrier. - * So this file is included twice from asm/cmpxchg.h. - */ - -/* - * Atomic exchange. - * Since it can be used to implement critical sections - * it must clobber "memory" (also for interrupts in UP). - */ - -static inline unsigned long -____xchg(_u8, volatile char *m, unsigned long val) -{ - unsigned long ret, tmp, addr64; - - __asm__ __volatile__( - " andnot %4,7,%3\n" - " insbl %1,%4,%1\n" - "1: ldq_l %2,0(%3)\n" - " extbl %2,%4,%0\n" - " mskbl %2,%4,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%3)\n" - " beq %2,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) - : "r" ((long)m), "1" (val) : "memory"); - - return ret; -} - -static inline unsigned long -____xchg(_u16, volatile short *m, unsigned long val) -{ - unsigned long ret, tmp, addr64; - - __asm__ __volatile__( - " andnot %4,7,%3\n" - " inswl %1,%4,%1\n" - "1: ldq_l %2,0(%3)\n" - " extwl %2,%4,%0\n" - " mskwl %2,%4,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%3)\n" - " beq %2,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) - : "r" ((long)m), "1" (val) : "memory"); - - return ret; -} - -static inline unsigned long -____xchg(_u32, volatile int *m, unsigned long val) -{ - unsigned long dummy; - - __asm__ __volatile__( - "1: ldl_l %0,%4\n" - " bis $31,%3,%1\n" - " stl_c %1,%2\n" - " beq %1,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (val), "=&r" (dummy), "=m" (*m) - : "rI" (val), "m" (*m) : "memory"); - - return val; -} - -static inline unsigned long -____xchg(_u64, volatile long *m, unsigned long val) -{ - unsigned long dummy; - - __asm__ __volatile__( - "1: ldq_l %0,%4\n" - " bis $31,%3,%1\n" - " stq_c %1,%2\n" - " beq %1,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (val), "=&r" (dummy), "=m" (*m) - : "rI" (val), "m" (*m) : "memory"); - - return val; -} - -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid xchg(). */ -extern void __xchg_called_with_bad_pointer(void); - -static __always_inline unsigned long -____xchg(, volatile void *ptr, unsigned long x, int size) -{ - switch (size) { - case 1: - return ____xchg(_u8, ptr, x); - case 2: - return ____xchg(_u16, ptr, x); - case 4: - return ____xchg(_u32, ptr, x); - case 8: - return ____xchg(_u64, ptr, x); - } - __xchg_called_with_bad_pointer(); - return x; -} - -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ - -static inline unsigned long -____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new) -{ - unsigned long prev, tmp, cmp, addr64; - - __asm__ __volatile__( - " andnot %5,7,%4\n" - " insbl %1,%5,%1\n" - "1: ldq_l %2,0(%4)\n" - " extbl %2,%5,%0\n" - " cmpeq %0,%6,%3\n" - " beq %3,2f\n" - " mskbl %2,%5,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%4)\n" - " beq %2,3f\n" - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) - : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); - - return prev; -} - -static inline unsigned long -____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new) -{ - unsigned long prev, tmp, cmp, addr64; - - __asm__ __volatile__( - " andnot %5,7,%4\n" - " inswl %1,%5,%1\n" - "1: ldq_l %2,0(%4)\n" - " extwl %2,%5,%0\n" - " cmpeq %0,%6,%3\n" - " beq %3,2f\n" - " mskwl %2,%5,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%4)\n" - " beq %2,3f\n" - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) - : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); - - return prev; -} - -static inline unsigned long -____cmpxchg(_u32, volatile int *m, int old, int new) -{ - unsigned long prev, cmp; - - __asm__ __volatile__( - "1: ldl_l %0,%5\n" - " cmpeq %0,%3,%1\n" - " beq %1,2f\n" - " mov %4,%1\n" - " stl_c %1,%2\n" - " beq %1,3f\n" - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r"(prev), "=&r"(cmp), "=m"(*m) - : "r"((long) old), "r"(new), "m"(*m) : "memory"); - - return prev; -} - -static inline unsigned long -____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new) -{ - unsigned long prev, cmp; - - __asm__ __volatile__( - "1: ldq_l %0,%5\n" - " cmpeq %0,%3,%1\n" - " beq %1,2f\n" - " mov %4,%1\n" - " stq_c %1,%2\n" - " beq %1,3f\n" - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r"(prev), "=&r"(cmp), "=m"(*m) - : "r"((long) old), "r"(new), "m"(*m) : "memory"); - - return prev; -} - -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid cmpxchg(). */ -extern void __cmpxchg_called_with_bad_pointer(void); - -static __always_inline unsigned long -____cmpxchg(, volatile void *ptr, unsigned long old, unsigned long new, - int size) -{ - switch (size) { - case 1: - return ____cmpxchg(_u8, ptr, old, new); - case 2: - return ____cmpxchg(_u16, ptr, old, new); - case 4: - return ____cmpxchg(_u32, ptr, old, new); - case 8: - return ____cmpxchg(_u64, ptr, old, new); - } - __cmpxchg_called_with_bad_pointer(); - return old; -} - -#endif diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 6afae65e9a8b..a9a38c80c4a7 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -22,7 +22,7 @@ #include <asm/gentrap.h> #include <linux/uaccess.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/sysinfo.h> #include <asm/hwrpb.h> #include <asm/mmu_context.h> diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index 4fdb7350636c..f57cb5a6b624 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -9,7 +9,7 @@ #include <linux/types.h> #include <asm/byteorder.h> #include <asm/page.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #ifdef CONFIG_ISA_ARCV2 #include <asm/barrier.h> diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 9febf5bc3de6..4ae2db59d494 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -14,6 +14,7 @@ typedef struct { unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */ } mm_context_t; +struct pt_regs; extern void do_tlb_overlap_fault(unsigned long, unsigned long, struct pt_regs *); #endif diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h deleted file mode 100644 index cf5a02382e0e..000000000000 --- a/arch/arc/include/asm/unaligned.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - */ - -#ifndef _ASM_ARC_UNALIGNED_H -#define _ASM_ARC_UNALIGNED_H - -/* ARC700 can't handle unaligned Data accesses. */ - -#include <asm-generic/unaligned.h> -#include <asm/ptrace.h> - -#ifdef CONFIG_ARC_EMUL_UNALIGNED -int misaligned_fixup(unsigned long address, struct pt_regs *regs, - struct callee_regs *cregs); -#else -static inline int -misaligned_fixup(unsigned long address, struct pt_regs *regs, - struct callee_regs *cregs) -{ - /* Not fixed */ - return 1; -} -#endif - -#endif /* _ASM_ARC_UNALIGNED_H */ diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index a19751e824fb..8d2ea2cbd98b 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -18,8 +18,9 @@ #include <linux/kgdb.h> #include <asm/entry.h> #include <asm/setup.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/kprobes.h> +#include "unaligned.h" void die(const char *str, struct pt_regs *regs, unsigned long address) { diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c index 99a9b92ed98d..d2f5ceaaed1b 100644 --- a/arch/arc/kernel/unaligned.c +++ b/arch/arc/kernel/unaligned.c @@ -12,6 +12,7 @@ #include <linux/ptrace.h> #include <linux/uaccess.h> #include <asm/disasm.h> +#include "unaligned.h" #ifdef CONFIG_CPU_BIG_ENDIAN #define BE 1 diff --git a/arch/arc/kernel/unaligned.h b/arch/arc/kernel/unaligned.h new file mode 100644 index 000000000000..5244453bb85f --- /dev/null +++ b/arch/arc/kernel/unaligned.h @@ -0,0 +1,16 @@ +struct pt_regs; +struct callee_regs; + +#ifdef CONFIG_ARC_EMUL_UNALIGNED +int misaligned_fixup(unsigned long address, struct pt_regs *regs, + struct callee_regs *cregs); +#else +static inline int +misaligned_fixup(unsigned long address, struct pt_regs *regs, + struct callee_regs *cregs) +{ + /* Not fixed */ + return 1; +} +#endif + diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 9270d0a713c3..d8969dab12d4 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -19,7 +19,7 @@ #include <linux/uaccess.h> #include <linux/ptrace.h> #include <asm/sections.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/unwind.h> extern char __start_unwind[], __end_unwind[]; diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 71afdd98ddf2..aafebf145738 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -183,7 +183,6 @@ machine-$(CONFIG_ARCH_CLPS711X) += clps711x machine-$(CONFIG_ARCH_DAVINCI) += davinci machine-$(CONFIG_ARCH_DIGICOLOR) += digicolor machine-$(CONFIG_ARCH_DOVE) += dove -machine-$(CONFIG_ARCH_EP93XX) += ep93xx machine-$(CONFIG_ARCH_EXYNOS) += exynos machine-$(CONFIG_ARCH_FOOTBRIDGE) += footbridge machine-$(CONFIG_ARCH_GEMINI) += gemini diff --git a/arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts b/arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts index 72d26d130efa..85f54fa595aa 100644 --- a/arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts +++ b/arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts @@ -77,7 +77,7 @@ }; &hdmi { - hpd-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; + hpd-gpios = <&expgpio 0 GPIO_ACTIVE_LOW>; power-domains = <&power RPI_POWER_DOMAIN_HDMI>; status = "okay"; }; diff --git a/arch/arm/boot/dts/cirrus/Makefile b/arch/arm/boot/dts/cirrus/Makefile index e944d3e2129d..e6015983e464 100644 --- a/arch/arm/boot/dts/cirrus/Makefile +++ b/arch/arm/boot/dts/cirrus/Makefile @@ -3,3 +3,7 @@ dtb-$(CONFIG_ARCH_CLPS711X) += \ ep7211-edb7211.dtb dtb-$(CONFIG_ARCH_CLPS711X) += \ ep7211-edb7211.dtb +dtb-$(CONFIG_ARCH_EP93XX) += \ + ep93xx-edb9302.dtb \ + ep93xx-bk3.dtb \ + ep93xx-ts7250.dtb diff --git a/arch/arm/boot/dts/cirrus/ep93xx-bk3.dts b/arch/arm/boot/dts/cirrus/ep93xx-bk3.dts new file mode 100644 index 000000000000..40bc9b2a6ba8 --- /dev/null +++ b/arch/arm/boot/dts/cirrus/ep93xx-bk3.dts @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Device Tree file for Liebherr controller BK3.1 based on Cirrus EP9302 SoC + */ +/dts-v1/; +#include "ep93xx.dtsi" + +/ { + model = "Liebherr controller BK3.1"; + compatible = "liebherr,bk3", "cirrus,ep9301"; + #address-cells = <1>; + #size-cells = <1>; + + chosen { + }; + + memory@0 { + device_type = "memory"; + /* should be set from ATAGS */ + reg = <0x00000000 0x02000000>, + <0x000530c0 0x01fdd000>; + }; + + leds { + compatible = "gpio-leds"; + led-0 { + label = "grled"; + gpios = <&gpio4 0 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "heartbeat"; + function = LED_FUNCTION_HEARTBEAT; + }; + + led-1 { + label = "rdled"; + gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>; + function = LED_FUNCTION_FAULT; + }; + }; +}; + +&ebi { + nand-controller@60000000 { + compatible = "technologic,ts7200-nand"; + reg = <0x60000000 0x8000000>; + #address-cells = <1>; + #size-cells = <0>; + + nand@0 { + reg = <0>; + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "System"; + reg = <0x00000000 0x01e00000>; + read-only; + }; + + partition@1e00000 { + label = "Data"; + reg = <0x01e00000 0x05f20000>; + }; + + partition@7d20000 { + label = "RedBoot"; + reg = <0x07d20000 0x002e0000>; + read-only; + }; + }; + }; + }; +}; + +ð0 { + phy-handle = <&phy0>; +}; + +&i2s { + dmas = <&dma0 0 1>, <&dma0 0 2>; + dma-names = "tx", "rx"; + pinctrl-names = "default"; + pinctrl-0 = <&i2s_on_ac97_pins>; + status = "okay"; +}; + +&gpio1 { + /* PWM */ + gpio-ranges = <&syscon 6 163 1>; +}; + +&gpio4 { + gpio-ranges = <&syscon 0 97 2>; + status = "okay"; +}; + +&gpio6 { + gpio-ranges = <&syscon 0 87 2>; + status = "okay"; +}; + +&gpio7 { + gpio-ranges = <&syscon 2 199 4>; + status = "okay"; +}; + +&mdio0 { + phy0: ethernet-phy@1 { + reg = <1>; + device_type = "ethernet-phy"; + }; +}; + +&uart0 { + status = "okay"; +}; + +&uart1 { + status = "okay"; +}; + +&usb0 { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/cirrus/ep93xx-edb9302.dts b/arch/arm/boot/dts/cirrus/ep93xx-edb9302.dts new file mode 100644 index 000000000000..312b2be1c638 --- /dev/null +++ b/arch/arm/boot/dts/cirrus/ep93xx-edb9302.dts @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +/* + * Device Tree file for Cirrus Logic EDB9302 board based on EP9302 SoC + */ +/dts-v1/; +#include "ep93xx.dtsi" + +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cirrus,edb9302", "cirrus,ep9301"; + model = "cirrus,edb9302"; + + chosen { + }; + + memory@0 { + device_type = "memory"; + /* should be set from ATAGS */ + reg = <0x0000000 0x800000>, + <0x1000000 0x800000>, + <0x4000000 0x800000>, + <0x5000000 0x800000>; + }; + + sound { + compatible = "audio-graph-card2"; + label = "EDB93XX"; + links = <&i2s_port>; + }; + + leds { + compatible = "gpio-leds"; + led-0 { + label = "grled"; + gpios = <&gpio4 0 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "heartbeat"; + function = LED_FUNCTION_HEARTBEAT; + }; + + led-1 { + label = "rdled"; + gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>; + function = LED_FUNCTION_FAULT; + }; + }; +}; + +&adc { + status = "okay"; +}; + +&ebi { + flash@60000000 { + compatible = "cfi-flash"; + reg = <0x60000000 0x1000000>; + bank-width = <2>; + }; +}; + +ð0 { + phy-handle = <&phy0>; +}; + +&gpio0 { + gpio-ranges = <&syscon 0 153 1>, + <&syscon 1 152 1>, + <&syscon 2 151 1>, + <&syscon 3 148 1>, + <&syscon 4 147 1>, + <&syscon 5 146 1>, + <&syscon 6 145 1>, + <&syscon 7 144 1>; +}; + +&gpio1 { + gpio-ranges = <&syscon 0 143 1>, + <&syscon 1 142 1>, + <&syscon 2 141 1>, + <&syscon 3 140 1>, + <&syscon 4 165 1>, + <&syscon 5 164 1>, + <&syscon 6 163 1>, + <&syscon 7 160 1>; +}; + +&gpio2 { + gpio-ranges = <&syscon 0 115 1>; +}; + +/* edb9302 doesn't have GPIO Port D present */ +&gpio3 { + status = "disabled"; +}; + +&gpio4 { + gpio-ranges = <&syscon 0 97 2>; +}; + +&gpio5 { + gpio-ranges = <&syscon 1 170 1>, + <&syscon 2 169 1>, + <&syscon 3 168 1>; +}; + +&gpio6 { + gpio-ranges = <&syscon 0 87 2>; +}; + +&gpio7 { + gpio-ranges = <&syscon 2 199 4>; +}; + +&i2s { + pinctrl-names = "default"; + pinctrl-0 = <&i2s_on_ac97_pins>; + status = "okay"; + i2s_port: port { + i2s_ep: endpoint { + system-clock-direction-out; + frame-master; + bitclock-master; + mclk-fs = <256>; + dai-format = "i2s"; + convert-channels = <2>; + convert-sample-format = "s32_le"; + remote-endpoint = <&codec_ep>; + }; + }; +}; + +&mdio0 { + phy0: ethernet-phy@1 { + reg = <1>; + device_type = "ethernet-phy"; + }; +}; + +&spi0 { + cs-gpios = <&gpio0 6 GPIO_ACTIVE_LOW + &gpio0 7 GPIO_ACTIVE_LOW>; + dmas = <&dma1 10 2>, <&dma1 10 1>; + dma-names = "rx", "tx"; + status = "okay"; + + cs4271: codec@0 { + compatible = "cirrus,cs4271"; + reg = <0>; + #sound-dai-cells = <0>; + spi-max-frequency = <6000000>; + spi-cpol; + spi-cpha; + reset-gpios = <&gpio0 1 GPIO_ACTIVE_LOW>; + port { + codec_ep: endpoint { + remote-endpoint = <&i2s_ep>; + }; + }; + }; + + at25f1024: eeprom@1 { + compatible = "atmel,at25"; + reg = <1>; + address-width = <8>; + size = <0x20000>; + pagesize = <256>; + spi-max-frequency = <20000000>; + }; +}; + +&uart0 { + status = "okay"; +}; + +&uart1 { + status = "okay"; +}; + +&usb0 { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/cirrus/ep93xx-ts7250.dts b/arch/arm/boot/dts/cirrus/ep93xx-ts7250.dts new file mode 100644 index 000000000000..9e03f93d9fc8 --- /dev/null +++ b/arch/arm/boot/dts/cirrus/ep93xx-ts7250.dts @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Device Tree file for Technologic Systems ts7250 board based on Cirrus EP9302 SoC + */ +/dts-v1/; +#include "ep93xx.dtsi" + +/ { + compatible = "technologic,ts7250", "cirrus,ep9301"; + model = "TS-7250 SBC"; + #address-cells = <1>; + #size-cells = <1>; + + chosen { + }; + + memory@0 { + device_type = "memory"; + /* should be set from ATAGS */ + reg = <0x00000000 0x02000000>, + <0x000530c0 0x01fdd000>; + }; + + leds { + compatible = "gpio-leds"; + led-0 { + label = "grled"; + gpios = <&gpio4 0 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "heartbeat"; + function = LED_FUNCTION_HEARTBEAT; + }; + + led-1 { + label = "rdled"; + gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>; + function = LED_FUNCTION_FAULT; + }; + }; +}; + +&ebi { + nand-controller@60000000 { + compatible = "technologic,ts7200-nand"; + reg = <0x60000000 0x8000000>; + #address-cells = <1>; + #size-cells = <0>; + + nand@0 { + reg = <0>; + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "TS-BOOTROM"; + reg = <0x00000000 0x00020000>; + read-only; + }; + + partition@20000 { + label = "Linux"; + reg = <0x00020000 0x07d00000>; + }; + + partition@7d20000 { + label = "RedBoot"; + reg = <0x07d20000 0x002e0000>; + read-only; + }; + }; + }; + }; + + rtc@10800000 { + compatible = "st,m48t86"; + reg = <0x10800000 0x1>, + <0x11700000 0x1>; + }; + + watchdog@23800000 { + compatible = "technologic,ts7200-wdt"; + reg = <0x23800000 0x01>, + <0x23c00000 0x01>; + timeout-sec = <30>; + }; +}; + +ð0 { + phy-handle = <&phy0>; +}; + +&gpio1 { + /* PWM */ + gpio-ranges = <&syscon 6 163 1>; +}; + +/* ts7250 doesn't have GPIO Port D present */ +&gpio3 { + status = "disabled"; +}; + +&gpio4 { + gpio-ranges = <&syscon 0 97 2>; +}; + +&gpio6 { + gpio-ranges = <&syscon 0 87 2>; +}; + +&gpio7 { + gpio-ranges = <&syscon 2 199 4>; +}; + +&spi0 { + cs-gpios = <&gpio5 2 GPIO_ACTIVE_HIGH>; + dmas = <&dma1 10 2>, <&dma1 10 1>; + dma-names = "rx", "tx"; + status = "okay"; + + tmp122: temperature-sensor@0 { + compatible = "ti,tmp122"; + reg = <0>; + spi-max-frequency = <2000000>; + }; +}; + +&mdio0 { + phy0: ethernet-phy@1 { + reg = <1>; + device_type = "ethernet-phy"; + }; +}; + +&uart0 { + status = "okay"; +}; + +&uart1 { + status = "okay"; +}; + +&usb0 { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/cirrus/ep93xx.dtsi b/arch/arm/boot/dts/cirrus/ep93xx.dtsi new file mode 100644 index 000000000000..0dd1eee346ca --- /dev/null +++ b/arch/arm/boot/dts/cirrus/ep93xx.dtsi @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Device Tree file for Cirrus Logic systems EP93XX SoC + */ +#include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/leds/common.h> +#include <dt-bindings/input/input.h> +#include <dt-bindings/clock/cirrus,ep9301-syscon.h> +/ { + soc: soc { + compatible = "simple-bus"; + ranges; + #address-cells = <1>; + #size-cells = <1>; + + syscon: syscon@80930000 { + compatible = "cirrus,ep9301-syscon", "syscon"; + reg = <0x80930000 0x1000>; + + #clock-cells = <1>; + clocks = <&xtali>; + + spi_default_pins: pins-spi { + function = "spi"; + groups = "ssp"; + }; + + ac97_default_pins: pins-ac97 { + function = "ac97"; + groups = "ac97"; + }; + + i2s_on_ssp_pins: pins-i2sonssp { + function = "i2s"; + groups = "i2s_on_ssp"; + }; + + i2s_on_ac97_pins: pins-i2sonac97 { + function = "i2s"; + groups = "i2s_on_ac97"; + }; + + gpio1_default_pins: pins-gpio1 { + function = "gpio"; + groups = "gpio1agrp"; + }; + + pwm1_default_pins: pins-pwm1 { + function = "pwm"; + groups = "pwm1"; + }; + + gpio2_default_pins: pins-gpio2 { + function = "gpio"; + groups = "gpio2agrp"; + }; + + gpio3_default_pins: pins-gpio3 { + function = "gpio"; + groups = "gpio3agrp"; + }; + + keypad_default_pins: pins-keypad { + function = "keypad"; + groups = "keypadgrp"; + }; + + gpio4_default_pins: pins-gpio4 { + function = "gpio"; + groups = "gpio4agrp"; + }; + + gpio6_default_pins: pins-gpio6 { + function = "gpio"; + groups = "gpio6agrp"; + }; + + gpio7_default_pins: pins-gpio7 { + function = "gpio"; + groups = "gpio7agrp"; + }; + + ide_default_pins: pins-ide { + function = "pata"; + groups = "idegrp"; + }; + + lcd_on_dram0_pins: pins-rasteronsdram0 { + function = "lcd"; + groups = "rasteronsdram0grp"; + }; + + lcd_on_dram3_pins: pins-rasteronsdram3 { + function = "lcd"; + groups = "rasteronsdram3grp"; + }; + }; + + adc: adc@80900000 { + compatible = "cirrus,ep9301-adc"; + reg = <0x80900000 0x28>; + clocks = <&syscon EP93XX_CLK_ADC>; + interrupt-parent = <&vic0>; + interrupts = <30>; + status = "disabled"; + }; + + /* + * The EP93XX expansion bus is a set of up to 7 each up to 16MB + * windows in the 256MB space from 0x50000000 to 0x5fffffff. + * But since we don't require to setup it in any way, we can + * represent it as a simple-bus. + */ + ebi: bus@80080000 { + compatible = "simple-bus"; + reg = <0x80080000 0x20>; + native-endian; + #address-cells = <1>; + #size-cells = <1>; + ranges; + }; + + dma0: dma-controller@80000000 { + compatible = "cirrus,ep9301-dma-m2p"; + reg = <0x80000000 0x0040>, + <0x80000040 0x0040>, + <0x80000080 0x0040>, + <0x800000c0 0x0040>, + <0x80000240 0x0040>, + <0x80000200 0x0040>, + <0x800002c0 0x0040>, + <0x80000280 0x0040>, + <0x80000340 0x0040>, + <0x80000300 0x0040>; + clocks = <&syscon EP93XX_CLK_M2P0>, + <&syscon EP93XX_CLK_M2P1>, + <&syscon EP93XX_CLK_M2P2>, + <&syscon EP93XX_CLK_M2P3>, + <&syscon EP93XX_CLK_M2P4>, + <&syscon EP93XX_CLK_M2P5>, + <&syscon EP93XX_CLK_M2P6>, + <&syscon EP93XX_CLK_M2P7>, + <&syscon EP93XX_CLK_M2P8>, + <&syscon EP93XX_CLK_M2P9>; + clock-names = "m2p0", "m2p1", + "m2p2", "m2p3", + "m2p4", "m2p5", + "m2p6", "m2p7", + "m2p8", "m2p9"; + interrupt-parent = <&vic0>; + interrupts = <7>, <8>, <9>, <10>, <11>, + <12>, <13>, <14>, <15>, <16>; + #dma-cells = <2>; + }; + + dma1: dma-controller@80000100 { + compatible = "cirrus,ep9301-dma-m2m"; + reg = <0x80000100 0x0040>, + <0x80000140 0x0040>; + clocks = <&syscon EP93XX_CLK_M2M0>, + <&syscon EP93XX_CLK_M2M1>; + clock-names = "m2m0", "m2m1"; + interrupt-parent = <&vic0>; + interrupts = <17>, <18>; + #dma-cells = <2>; + }; + + eth0: ethernet@80010000 { + compatible = "cirrus,ep9301-eth"; + reg = <0x80010000 0x10000>; + interrupt-parent = <&vic1>; + interrupts = <7>; + mdio0: mdio { + #address-cells = <1>; + #size-cells = <0>; + }; + }; + + gpio0: gpio@80840000 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840000 0x04>, + <0x80840010 0x04>, + <0x80840090 0x1c>; + reg-names = "data", "dir", "intr"; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&vic1>; + interrupts = <27>; + }; + + gpio1: gpio@80840004 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840004 0x04>, + <0x80840014 0x04>, + <0x808400ac 0x1c>; + reg-names = "data", "dir", "intr"; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&vic1>; + interrupts = <27>; + }; + + gpio2: gpio@80840008 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840008 0x04>, + <0x80840018 0x04>; + reg-names = "data", "dir"; + gpio-controller; + #gpio-cells = <2>; + pinctrl-names = "default"; + pinctrl-0 = <&gpio2_default_pins>; + }; + + gpio3: gpio@8084000c { + compatible = "cirrus,ep9301-gpio"; + reg = <0x8084000c 0x04>, + <0x8084001c 0x04>; + reg-names = "data", "dir"; + gpio-controller; + #gpio-cells = <2>; + pinctrl-names = "default"; + pinctrl-0 = <&gpio3_default_pins>; + }; + + gpio4: gpio@80840020 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840020 0x04>, + <0x80840024 0x04>; + reg-names = "data", "dir"; + gpio-controller; + #gpio-cells = <2>; + pinctrl-names = "default"; + pinctrl-0 = <&gpio4_default_pins>; + }; + + gpio5: gpio@80840030 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840030 0x04>, + <0x80840034 0x04>, + <0x8084004c 0x1c>; + reg-names = "data", "dir", "intr"; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + interrupts-extended = <&vic0 19>, <&vic0 20>, + <&vic0 21>, <&vic0 22>, + <&vic1 15>, <&vic1 16>, + <&vic1 17>, <&vic1 18>; + }; + + gpio6: gpio@80840038 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840038 0x04>, + <0x8084003c 0x04>; + reg-names = "data", "dir"; + gpio-controller; + #gpio-cells = <2>; + pinctrl-names = "default"; + pinctrl-0 = <&gpio6_default_pins>; + }; + + gpio7: gpio@80840040 { + compatible = "cirrus,ep9301-gpio"; + reg = <0x80840040 0x04>, + <0x80840044 0x04>; + reg-names = "data", "dir"; + gpio-controller; + #gpio-cells = <2>; + pinctrl-names = "default"; + pinctrl-0 = <&gpio7_default_pins>; + }; + + i2s: i2s@80820000 { + compatible = "cirrus,ep9301-i2s"; + reg = <0x80820000 0x100>; + #sound-dai-cells = <0>; + interrupt-parent = <&vic1>; + interrupts = <28>; + clocks = <&syscon EP93XX_CLK_I2S_MCLK>, + <&syscon EP93XX_CLK_I2S_SCLK>, + <&syscon EP93XX_CLK_I2S_LRCLK>; + clock-names = "mclk", "sclk", "lrclk"; + dmas = <&dma0 0 1>, <&dma0 0 2>; + dma-names = "tx", "rx"; + status = "disabled"; + }; + + ide: ide@800a0000 { + compatible = "cirrus,ep9312-pata"; + reg = <0x800a0000 0x38>; + interrupt-parent = <&vic1>; + interrupts = <8>; + pinctrl-names = "default"; + pinctrl-0 = <&ide_default_pins>; + status = "disabled"; + }; + + vic0: interrupt-controller@800b0000 { + compatible = "arm,pl192-vic"; + reg = <0x800b0000 0x1000>; + interrupt-controller; + #interrupt-cells = <1>; + valid-mask = <0x7ffffffc>; + valid-wakeup-mask = <0x0>; + }; + + vic1: interrupt-controller@800c0000 { + compatible = "arm,pl192-vic"; + reg = <0x800c0000 0x1000>; + interrupt-controller; + #interrupt-cells = <1>; + valid-mask = <0x1fffffff>; + valid-wakeup-mask = <0x0>; + }; + + keypad: keypad@800f0000 { + compatible = "cirrus,ep9307-keypad"; + reg = <0x800f0000 0x0c>; + interrupt-parent = <&vic0>; + interrupts = <29>; + clocks = <&syscon EP93XX_CLK_KEYPAD>; + pinctrl-names = "default"; + pinctrl-0 = <&keypad_default_pins>; + linux,keymap = <KEY_UP>, + <KEY_DOWN>, + <KEY_VOLUMEDOWN>, + <KEY_HOME>, + <KEY_RIGHT>, + <KEY_LEFT>, + <KEY_ENTER>, + <KEY_VOLUMEUP>, + <KEY_F6>, + <KEY_F8>, + <KEY_F9>, + <KEY_F10>, + <KEY_F1>, + <KEY_F2>, + <KEY_F3>, + <KEY_POWER>; + }; + + pwm0: pwm@80910000 { + compatible = "cirrus,ep9301-pwm"; + reg = <0x80910000 0x10>; + clocks = <&syscon EP93XX_CLK_PWM>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm1: pwm@80910020 { + compatible = "cirrus,ep9301-pwm"; + reg = <0x80910020 0x10>; + clocks = <&syscon EP93XX_CLK_PWM>; + #pwm-cells = <3>; + pinctrl-names = "default"; + pinctrl-0 = <&pwm1_default_pins>; + status = "disabled"; + }; + + rtc0: rtc@80920000 { + compatible = "cirrus,ep9301-rtc"; + reg = <0x80920000 0x100>; + }; + + spi0: spi@808a0000 { + compatible = "cirrus,ep9301-spi"; + reg = <0x808a0000 0x18>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <&vic1>; + interrupts = <21>; + clocks = <&syscon EP93XX_CLK_SPI>; + pinctrl-names = "default"; + pinctrl-0 = <&spi_default_pins>; + status = "disabled"; + }; + + timer: timer@80810000 { + compatible = "cirrus,ep9301-timer"; + reg = <0x80810000 0x100>; + interrupt-parent = <&vic1>; + interrupts = <19>; + }; + + uart0: serial@808c0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x808c0000 0x1000>; + arm,primecell-periphid = <0x00041010>; + clocks = <&syscon EP93XX_CLK_UART1>, <&syscon EP93XX_CLK_UART>; + clock-names = "uartclk", "apb_pclk"; + interrupt-parent = <&vic1>; + interrupts = <20>; + status = "disabled"; + }; + + uart1: uart@808d0000 { + compatible = "arm,primecell"; + reg = <0x808d0000 0x1000>; + arm,primecell-periphid = <0x00041010>; + clocks = <&syscon EP93XX_CLK_UART2>, <&syscon EP93XX_CLK_UART>; + clock-names = "apb:uart2", "apb_pclk"; + interrupt-parent = <&vic1>; + interrupts = <22>; + status = "disabled"; + }; + + uart2: uart@808b0000 { + compatible = "arm,primecell"; + reg = <0x808b0000 0x1000>; + arm,primecell-periphid = <0x00041010>; + clocks = <&syscon EP93XX_CLK_UART3>, <&syscon EP93XX_CLK_UART>; + clock-names = "apb:uart3", "apb_pclk"; + interrupt-parent = <&vic1>; + interrupts = <23>; + status = "disabled"; + }; + + usb0: usb@80020000 { + compatible = "generic-ohci"; + reg = <0x80020000 0x10000>; + interrupt-parent = <&vic1>; + interrupts = <24>; + clocks = <&syscon EP93XX_CLK_USB>; + status = "disabled"; + }; + + watchdog0: watchdog@80940000 { + compatible = "cirrus,ep9301-wdt"; + reg = <0x80940000 0x08>; + }; + }; + + xtali: oscillator { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <14745600>; + clock-output-names = "xtali"; + }; +}; diff --git a/arch/arm/boot/dts/rockchip/rk3036-kylin.dts b/arch/arm/boot/dts/rockchip/rk3036-kylin.dts index e32c73d32f0a..2f84e2805712 100644 --- a/arch/arm/boot/dts/rockchip/rk3036-kylin.dts +++ b/arch/arm/boot/dts/rockchip/rk3036-kylin.dts @@ -325,8 +325,8 @@ &i2c2 { status = "okay"; - rt5616: rt5616@1b { - compatible = "rt5616"; + rt5616: audio-codec@1b { + compatible = "realtek,rt5616"; reg = <0x1b>; clocks = <&cru SCLK_I2S_OUT>; clock-names = "mclk"; diff --git a/arch/arm/boot/dts/rockchip/rk3036.dtsi b/arch/arm/boot/dts/rockchip/rk3036.dtsi index 96279d1e02fe..63b9912be06a 100644 --- a/arch/arm/boot/dts/rockchip/rk3036.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3036.dtsi @@ -384,12 +384,13 @@ }; }; - acodec: acodec-ana@20030000 { - compatible = "rk3036-codec"; + acodec: audio-codec@20030000 { + compatible = "rockchip,rk3036-codec"; reg = <0x20030000 0x4000>; - rockchip,grf = <&grf>; clock-names = "acodec_pclk"; clocks = <&cru PCLK_ACODEC>; + rockchip,grf = <&grf>; + #sound-dai-cells = <0>; status = "disabled"; }; @@ -399,7 +400,6 @@ interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru PCLK_HDMI>; clock-names = "pclk"; - rockchip,grf = <&grf>; pinctrl-names = "default"; pinctrl-0 = <&hdmi_ctl>; #sound-dai-cells = <0>; @@ -553,11 +553,11 @@ }; spi: spi@20074000 { - compatible = "rockchip,rockchip-spi"; + compatible = "rockchip,rk3036-spi"; reg = <0x20074000 0x1000>; interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&cru PCLK_SPI>, <&cru SCLK_SPI>; - clock-names = "apb-pclk","spi_pclk"; + clocks = <&cru SCLK_SPI>, <&cru PCLK_SPI>; + clock-names = "spiclk", "apb_pclk"; dmas = <&pdma 8>, <&pdma 9>; dma-names = "tx", "rx"; pinctrl-names = "default"; diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index f5b66f4cf45d..21df5e7f51f9 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -8,7 +8,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/aes.h> #include <crypto/ctr.h> #include <crypto/internal/simd.h> diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c index 4ff18044af07..20b4dff13e3a 100644 --- a/arch/arm/crypto/crc32-ce-glue.c +++ b/arch/arm/crypto/crc32-ce-glue.c @@ -18,7 +18,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #define PMULL_MIN_LEN 64L /* minimum size of buffer * for crc32_pmull_le_16 */ diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c index 3ddf05b4234d..3af997082534 100644 --- a/arch/arm/crypto/ghash-ce-glue.c +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -9,7 +9,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/aes.h> #include <crypto/gcm.h> #include <crypto/b128ops.h> diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c index 8482e302c45a..4464ffbf8fd1 100644 --- a/arch/arm/crypto/poly1305-glue.c +++ b/arch/arm/crypto/poly1305-glue.c @@ -8,7 +8,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/algapi.h> #include <crypto/internal/hash.h> #include <crypto/internal/poly1305.h> diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c index c62ce89dd3e0..aeac45bfbf9f 100644 --- a/arch/arm/crypto/sha2-ce-glue.c +++ b/arch/arm/crypto/sha2-ce-glue.c @@ -16,7 +16,7 @@ #include <asm/hwcap.h> #include <asm/simd.h> #include <asm/neon.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "sha256_glue.h" diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 6c9c16d767cf..f90be312418e 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -12,7 +12,7 @@ #include <linux/string.h> #include <asm/page.h> #include <asm/domain.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/unified.h> #include <asm/pgtable.h> #include <asm/proc-fns.h> diff --git a/arch/arm/mach-ep93xx/Kconfig b/arch/arm/mach-ep93xx/Kconfig index 703f3d232a60..812b71dcf60e 100644 --- a/arch/arm/mach-ep93xx/Kconfig +++ b/arch/arm/mach-ep93xx/Kconfig @@ -3,27 +3,27 @@ menuconfig ARCH_EP93XX bool "EP93xx-based" depends on ATAGS depends on ARCH_MULTI_V4T + # CONFIG_ARCH_MULTI_V7 is not set depends on CPU_LITTLE_ENDIAN + select ARCH_HAS_RESET_CONTROLLER select ARCH_SPARSEMEM_ENABLE select ARM_AMBA select ARM_VIC + select ARM_APPENDED_DTB # Old Redboot bootloaders deployed + select ARM_ATAG_DTB_COMPAT # we need this to update dt memory node + select COMMON_CLK_EP93XX + select EP93XX_TIMER select CLKSRC_MMIO select CPU_ARM920T select GPIOLIB + select PINCTRL + select PINCTRL_EP93XX help This enables support for the Cirrus EP93xx series of CPUs. if ARCH_EP93XX -menu "Cirrus EP93xx Implementation Options" - -config EP93XX_SOC_COMMON - bool - default y - select SOC_BUS - select LEDS_GPIO_REGISTER - -comment "EP93xx Platforms" +# menu "EP93xx Platforms" config MACH_BK3 bool "Support Liebherr BK3.1" @@ -103,6 +103,6 @@ config MACH_VISION_EP9307 Say 'Y' here if you want your kernel to support the Vision Engraving Systems EP9307 SoM. -endmenu +# endmenu endif diff --git a/arch/arm/mach-ep93xx/Makefile b/arch/arm/mach-ep93xx/Makefile deleted file mode 100644 index 62e37403df14..000000000000 --- a/arch/arm/mach-ep93xx/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the linux kernel. -# -obj-y := core.o clock.o timer-ep93xx.o - -obj-$(CONFIG_EP93XX_DMA) += dma.o - -obj-$(CONFIG_MACH_EDB93XX) += edb93xx.o -obj-$(CONFIG_MACH_TS72XX) += ts72xx.o -obj-$(CONFIG_MACH_VISION_EP9307)+= vision_ep9307.o diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c deleted file mode 100644 index 85a496ddc619..000000000000 --- a/arch/arm/mach-ep93xx/clock.c +++ /dev/null @@ -1,733 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/arm/mach-ep93xx/clock.c - * Clock control for Cirrus EP93xx chips. - * - * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org> - */ - -#define pr_fmt(fmt) "ep93xx " KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/clk.h> -#include <linux/err.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/io.h> -#include <linux/spinlock.h> -#include <linux/clkdev.h> -#include <linux/clk-provider.h> -#include <linux/soc/cirrus/ep93xx.h> - -#include "hardware.h" - -#include <asm/div64.h> - -#include "soc.h" - -static DEFINE_SPINLOCK(clk_lock); - -static char fclk_divisors[] = { 1, 2, 4, 8, 16, 1, 1, 1 }; -static char hclk_divisors[] = { 1, 2, 4, 5, 6, 8, 16, 32 }; -static char pclk_divisors[] = { 1, 2, 4, 8 }; - -static char adc_divisors[] = { 16, 4 }; -static char sclk_divisors[] = { 2, 4 }; -static char lrclk_divisors[] = { 32, 64, 128 }; - -static const char * const mux_parents[] = { - "xtali", - "pll1", - "pll2" -}; - -/* - * PLL rate = 14.7456 MHz * (X1FBD + 1) * (X2FBD + 1) / (X2IPD + 1) / 2^PS - */ -static unsigned long calc_pll_rate(unsigned long long rate, u32 config_word) -{ - int i; - - rate *= ((config_word >> 11) & 0x1f) + 1; /* X1FBD */ - rate *= ((config_word >> 5) & 0x3f) + 1; /* X2FBD */ - do_div(rate, (config_word & 0x1f) + 1); /* X2IPD */ - for (i = 0; i < ((config_word >> 16) & 3); i++) /* PS */ - rate >>= 1; - - return (unsigned long)rate; -} - -struct clk_psc { - struct clk_hw hw; - void __iomem *reg; - u8 bit_idx; - u32 mask; - u8 shift; - u8 width; - char *div; - u8 num_div; - spinlock_t *lock; -}; - -#define to_clk_psc(_hw) container_of(_hw, struct clk_psc, hw) - -static int ep93xx_clk_is_enabled(struct clk_hw *hw) -{ - struct clk_psc *psc = to_clk_psc(hw); - u32 val = readl(psc->reg); - - return (val & BIT(psc->bit_idx)) ? 1 : 0; -} - -static int ep93xx_clk_enable(struct clk_hw *hw) -{ - struct clk_psc *psc = to_clk_psc(hw); - unsigned long flags = 0; - u32 val; - - if (psc->lock) - spin_lock_irqsave(psc->lock, flags); - - val = __raw_readl(psc->reg); - val |= BIT(psc->bit_idx); - - ep93xx_syscon_swlocked_write(val, psc->reg); - - if (psc->lock) - spin_unlock_irqrestore(psc->lock, flags); - - return 0; -} - -static void ep93xx_clk_disable(struct clk_hw *hw) -{ - struct clk_psc *psc = to_clk_psc(hw); - unsigned long flags = 0; - u32 val; - - if (psc->lock) - spin_lock_irqsave(psc->lock, flags); - - val = __raw_readl(psc->reg); - val &= ~BIT(psc->bit_idx); - - ep93xx_syscon_swlocked_write(val, psc->reg); - - if (psc->lock) - spin_unlock_irqrestore(psc->lock, flags); -} - -static const struct clk_ops clk_ep93xx_gate_ops = { - .enable = ep93xx_clk_enable, - .disable = ep93xx_clk_disable, - .is_enabled = ep93xx_clk_is_enabled, -}; - -static struct clk_hw *ep93xx_clk_register_gate(const char *name, - const char *parent_name, - void __iomem *reg, - u8 bit_idx) -{ - struct clk_init_data init; - struct clk_psc *psc; - struct clk *clk; - - psc = kzalloc(sizeof(*psc), GFP_KERNEL); - if (!psc) - return ERR_PTR(-ENOMEM); - - init.name = name; - init.ops = &clk_ep93xx_gate_ops; - init.flags = CLK_SET_RATE_PARENT; - init.parent_names = (parent_name ? &parent_name : NULL); - init.num_parents = (parent_name ? 1 : 0); - - psc->reg = reg; - psc->bit_idx = bit_idx; - psc->hw.init = &init; - psc->lock = &clk_lock; - - clk = clk_register(NULL, &psc->hw); - if (IS_ERR(clk)) { - kfree(psc); - return ERR_CAST(clk); - } - - return &psc->hw; -} - -static u8 ep93xx_mux_get_parent(struct clk_hw *hw) -{ - struct clk_psc *psc = to_clk_psc(hw); - u32 val = __raw_readl(psc->reg); - - if (!(val & EP93XX_SYSCON_CLKDIV_ESEL)) - return 0; - - if (!(val & EP93XX_SYSCON_CLKDIV_PSEL)) - return 1; - - return 2; -} - -static int ep93xx_mux_set_parent_lock(struct clk_hw *hw, u8 index) -{ - struct clk_psc *psc = to_clk_psc(hw); - unsigned long flags = 0; - u32 val; - - if (index >= ARRAY_SIZE(mux_parents)) - return -EINVAL; - - if (psc->lock) - spin_lock_irqsave(psc->lock, flags); - - val = __raw_readl(psc->reg); - val &= ~(EP93XX_SYSCON_CLKDIV_ESEL | EP93XX_SYSCON_CLKDIV_PSEL); - - - if (index != 0) { - val |= EP93XX_SYSCON_CLKDIV_ESEL; - val |= (index - 1) ? EP93XX_SYSCON_CLKDIV_PSEL : 0; - } - - ep93xx_syscon_swlocked_write(val, psc->reg); - - if (psc->lock) - spin_unlock_irqrestore(psc->lock, flags); - - return 0; -} - -static bool is_best(unsigned long rate, unsigned long now, - unsigned long best) -{ - return abs(rate - now) < abs(rate - best); -} - -static int ep93xx_mux_determine_rate(struct clk_hw *hw, - struct clk_rate_request *req) -{ - unsigned long rate = req->rate; - struct clk *best_parent = NULL; - unsigned long __parent_rate; - unsigned long best_rate = 0, actual_rate, mclk_rate; - unsigned long best_parent_rate; - int __div = 0, __pdiv = 0; - int i; - - /* - * Try the two pll's and the external clock - * Because the valid predividers are 2, 2.5 and 3, we multiply - * all the clocks by 2 to avoid floating point math. - * - * This is based on the algorithm in the ep93xx raster guide: - * http://be-a-maverick.com/en/pubs/appNote/AN269REV1.pdf - * - */ - for (i = 0; i < ARRAY_SIZE(mux_parents); i++) { - struct clk *parent = clk_get_sys(mux_parents[i], NULL); - - __parent_rate = clk_get_rate(parent); - mclk_rate = __parent_rate * 2; - - /* Try each predivider value */ - for (__pdiv = 4; __pdiv <= 6; __pdiv++) { - __div = mclk_rate / (rate * __pdiv); - if (__div < 2 || __div > 127) - continue; - - actual_rate = mclk_rate / (__pdiv * __div); - if (is_best(rate, actual_rate, best_rate)) { - best_rate = actual_rate; - best_parent_rate = __parent_rate; - best_parent = parent; - } - } - } - - if (!best_parent) - return -EINVAL; - - req->best_parent_rate = best_parent_rate; - req->best_parent_hw = __clk_get_hw(best_parent); - req->rate = best_rate; - - return 0; -} - -static unsigned long ep93xx_ddiv_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) -{ - struct clk_psc *psc = to_clk_psc(hw); - unsigned long rate = 0; - u32 val = __raw_readl(psc->reg); - int __pdiv = ((val >> EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) & 0x03); - int __div = val & 0x7f; - - if (__div > 0) - rate = (parent_rate * 2) / ((__pdiv + 3) * __div); - - return rate; -} - -static int ep93xx_ddiv_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate) -{ - struct clk_psc *psc = to_clk_psc(hw); - int pdiv = 0, div = 0; - unsigned long best_rate = 0, actual_rate, mclk_rate; - int __div = 0, __pdiv = 0; - u32 val; - - mclk_rate = parent_rate * 2; - - for (__pdiv = 4; __pdiv <= 6; __pdiv++) { - __div = mclk_rate / (rate * __pdiv); - if (__div < 2 || __div > 127) - continue; - - actual_rate = mclk_rate / (__pdiv * __div); - if (is_best(rate, actual_rate, best_rate)) { - pdiv = __pdiv - 3; - div = __div; - best_rate = actual_rate; - } - } - - if (!best_rate) - return -EINVAL; - - val = __raw_readl(psc->reg); - - /* Clear old dividers */ - val &= ~0x37f; - - /* Set the new pdiv and div bits for the new clock rate */ - val |= (pdiv << EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) | div; - ep93xx_syscon_swlocked_write(val, psc->reg); - - return 0; -} - -static const struct clk_ops clk_ddiv_ops = { - .enable = ep93xx_clk_enable, - .disable = ep93xx_clk_disable, - .is_enabled = ep93xx_clk_is_enabled, - .get_parent = ep93xx_mux_get_parent, - .set_parent = ep93xx_mux_set_parent_lock, - .determine_rate = ep93xx_mux_determine_rate, - .recalc_rate = ep93xx_ddiv_recalc_rate, - .set_rate = ep93xx_ddiv_set_rate, -}; - -static struct clk_hw *clk_hw_register_ddiv(const char *name, - void __iomem *reg, - u8 bit_idx) -{ - struct clk_init_data init; - struct clk_psc *psc; - struct clk *clk; - - psc = kzalloc(sizeof(*psc), GFP_KERNEL); - if (!psc) - return ERR_PTR(-ENOMEM); - - init.name = name; - init.ops = &clk_ddiv_ops; - init.flags = 0; - init.parent_names = mux_parents; - init.num_parents = ARRAY_SIZE(mux_parents); - - psc->reg = reg; - psc->bit_idx = bit_idx; - psc->lock = &clk_lock; - psc->hw.init = &init; - - clk = clk_register(NULL, &psc->hw); - if (IS_ERR(clk)) { - kfree(psc); - return ERR_CAST(clk); - } - return &psc->hw; -} - -static unsigned long ep93xx_div_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) -{ - struct clk_psc *psc = to_clk_psc(hw); - u32 val = __raw_readl(psc->reg); - u8 index = (val & psc->mask) >> psc->shift; - - if (index > psc->num_div) - return 0; - - return DIV_ROUND_UP_ULL(parent_rate, psc->div[index]); -} - -static long ep93xx_div_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) -{ - struct clk_psc *psc = to_clk_psc(hw); - unsigned long best = 0, now, maxdiv; - int i; - - maxdiv = psc->div[psc->num_div - 1]; - - for (i = 0; i < psc->num_div; i++) { - if ((rate * psc->div[i]) == *parent_rate) - return DIV_ROUND_UP_ULL((u64)*parent_rate, psc->div[i]); - - now = DIV_ROUND_UP_ULL((u64)*parent_rate, psc->div[i]); - - if (is_best(rate, now, best)) - best = now; - } - - if (!best) - best = DIV_ROUND_UP_ULL(*parent_rate, maxdiv); - - return best; -} - -static int ep93xx_div_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate) -{ - struct clk_psc *psc = to_clk_psc(hw); - u32 val = __raw_readl(psc->reg) & ~psc->mask; - int i; - - for (i = 0; i < psc->num_div; i++) - if (rate == parent_rate / psc->div[i]) { - val |= i << psc->shift; - break; - } - - if (i == psc->num_div) - return -EINVAL; - - ep93xx_syscon_swlocked_write(val, psc->reg); - - return 0; -} - -static const struct clk_ops ep93xx_div_ops = { - .enable = ep93xx_clk_enable, - .disable = ep93xx_clk_disable, - .is_enabled = ep93xx_clk_is_enabled, - .recalc_rate = ep93xx_div_recalc_rate, - .round_rate = ep93xx_div_round_rate, - .set_rate = ep93xx_div_set_rate, -}; - -static struct clk_hw *clk_hw_register_div(const char *name, - const char *parent_name, - void __iomem *reg, - u8 enable_bit, - u8 shift, - u8 width, - char *clk_divisors, - u8 num_div) -{ - struct clk_init_data init; - struct clk_psc *psc; - struct clk *clk; - - psc = kzalloc(sizeof(*psc), GFP_KERNEL); - if (!psc) - return ERR_PTR(-ENOMEM); - - init.name = name; - init.ops = &ep93xx_div_ops; - init.flags = 0; - init.parent_names = (parent_name ? &parent_name : NULL); - init.num_parents = 1; - - psc->reg = reg; - psc->bit_idx = enable_bit; - psc->mask = GENMASK(shift + width - 1, shift); - psc->shift = shift; - psc->div = clk_divisors; - psc->num_div = num_div; - psc->lock = &clk_lock; - psc->hw.init = &init; - - clk = clk_register(NULL, &psc->hw); - if (IS_ERR(clk)) { - kfree(psc); - return ERR_CAST(clk); - } - return &psc->hw; -} - -struct ep93xx_gate { - unsigned int bit; - const char *dev_id; - const char *con_id; -}; - -static struct ep93xx_gate ep93xx_uarts[] = { - {EP93XX_SYSCON_DEVCFG_U1EN, "apb:uart1", NULL}, - {EP93XX_SYSCON_DEVCFG_U2EN, "apb:uart2", NULL}, - {EP93XX_SYSCON_DEVCFG_U3EN, "apb:uart3", NULL}, -}; - -static void __init ep93xx_uart_clock_init(void) -{ - unsigned int i; - struct clk_hw *hw; - u32 value; - unsigned int clk_uart_div; - - value = __raw_readl(EP93XX_SYSCON_PWRCNT); - if (value & EP93XX_SYSCON_PWRCNT_UARTBAUD) - clk_uart_div = 1; - else - clk_uart_div = 2; - - hw = clk_hw_register_fixed_factor(NULL, "uart", "xtali", 0, 1, clk_uart_div); - - /* parenting uart gate clocks to uart clock */ - for (i = 0; i < ARRAY_SIZE(ep93xx_uarts); i++) { - hw = ep93xx_clk_register_gate(ep93xx_uarts[i].dev_id, - "uart", - EP93XX_SYSCON_DEVCFG, - ep93xx_uarts[i].bit); - - clk_hw_register_clkdev(hw, NULL, ep93xx_uarts[i].dev_id); - } -} - -static struct ep93xx_gate ep93xx_dmas[] = { - {EP93XX_SYSCON_PWRCNT_DMA_M2P0, NULL, "m2p0"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P1, NULL, "m2p1"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P2, NULL, "m2p2"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P3, NULL, "m2p3"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P4, NULL, "m2p4"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P5, NULL, "m2p5"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P6, NULL, "m2p6"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P7, NULL, "m2p7"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P8, NULL, "m2p8"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2P9, NULL, "m2p9"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2M0, NULL, "m2m0"}, - {EP93XX_SYSCON_PWRCNT_DMA_M2M1, NULL, "m2m1"}, -}; - -static void __init ep93xx_dma_clock_init(void) -{ - unsigned int i; - struct clk_hw *hw; - int ret; - - for (i = 0; i < ARRAY_SIZE(ep93xx_dmas); i++) { - hw = clk_hw_register_gate(NULL, ep93xx_dmas[i].con_id, - "hclk", 0, - EP93XX_SYSCON_PWRCNT, - ep93xx_dmas[i].bit, - 0, - &clk_lock); - - ret = clk_hw_register_clkdev(hw, ep93xx_dmas[i].con_id, NULL); - if (ret) - pr_err("%s: failed to register lookup %s\n", - __func__, ep93xx_dmas[i].con_id); - } -} - -static int __init ep93xx_clock_init(void) -{ - u32 value; - struct clk_hw *hw; - unsigned long clk_pll1_rate; - unsigned long clk_f_rate; - unsigned long clk_h_rate; - unsigned long clk_p_rate; - unsigned long clk_pll2_rate; - unsigned int clk_f_div; - unsigned int clk_h_div; - unsigned int clk_p_div; - unsigned int clk_usb_div; - unsigned long clk_spi_div; - - hw = clk_hw_register_fixed_rate(NULL, "xtali", NULL, 0, EP93XX_EXT_CLK_RATE); - clk_hw_register_clkdev(hw, NULL, "xtali"); - - /* Determine the bootloader configured pll1 rate */ - value = __raw_readl(EP93XX_SYSCON_CLKSET1); - if (!(value & EP93XX_SYSCON_CLKSET1_NBYP1)) - clk_pll1_rate = EP93XX_EXT_CLK_RATE; - else - clk_pll1_rate = calc_pll_rate(EP93XX_EXT_CLK_RATE, value); - - hw = clk_hw_register_fixed_rate(NULL, "pll1", "xtali", 0, clk_pll1_rate); - clk_hw_register_clkdev(hw, NULL, "pll1"); - - /* Initialize the pll1 derived clocks */ - clk_f_div = fclk_divisors[(value >> 25) & 0x7]; - clk_h_div = hclk_divisors[(value >> 20) & 0x7]; - clk_p_div = pclk_divisors[(value >> 18) & 0x3]; - - hw = clk_hw_register_fixed_factor(NULL, "fclk", "pll1", 0, 1, clk_f_div); - clk_f_rate = clk_get_rate(hw->clk); - hw = clk_hw_register_fixed_factor(NULL, "hclk", "pll1", 0, 1, clk_h_div); - clk_h_rate = clk_get_rate(hw->clk); - hw = clk_hw_register_fixed_factor(NULL, "pclk", "hclk", 0, 1, clk_p_div); - clk_p_rate = clk_get_rate(hw->clk); - - clk_hw_register_clkdev(hw, "apb_pclk", NULL); - - ep93xx_dma_clock_init(); - - /* Determine the bootloader configured pll2 rate */ - value = __raw_readl(EP93XX_SYSCON_CLKSET2); - if (!(value & EP93XX_SYSCON_CLKSET2_NBYP2)) - clk_pll2_rate = EP93XX_EXT_CLK_RATE; - else if (value & EP93XX_SYSCON_CLKSET2_PLL2_EN) - clk_pll2_rate = calc_pll_rate(EP93XX_EXT_CLK_RATE, value); - else - clk_pll2_rate = 0; - - hw = clk_hw_register_fixed_rate(NULL, "pll2", "xtali", 0, clk_pll2_rate); - clk_hw_register_clkdev(hw, NULL, "pll2"); - - /* Initialize the pll2 derived clocks */ - /* - * These four bits set the divide ratio between the PLL2 - * output and the USB clock. - * 0000 - Divide by 1 - * 0001 - Divide by 2 - * 0010 - Divide by 3 - * 0011 - Divide by 4 - * 0100 - Divide by 5 - * 0101 - Divide by 6 - * 0110 - Divide by 7 - * 0111 - Divide by 8 - * 1000 - Divide by 9 - * 1001 - Divide by 10 - * 1010 - Divide by 11 - * 1011 - Divide by 12 - * 1100 - Divide by 13 - * 1101 - Divide by 14 - * 1110 - Divide by 15 - * 1111 - Divide by 1 - * On power-on-reset these bits are reset to 0000b. - */ - clk_usb_div = (((value >> 28) & 0xf) + 1); - hw = clk_hw_register_fixed_factor(NULL, "usb_clk", "pll2", 0, 1, clk_usb_div); - hw = clk_hw_register_gate(NULL, "ohci-platform", - "usb_clk", 0, - EP93XX_SYSCON_PWRCNT, - EP93XX_SYSCON_PWRCNT_USH_EN, - 0, - &clk_lock); - clk_hw_register_clkdev(hw, NULL, "ohci-platform"); - - /* - * EP93xx SSP clock rate was doubled in version E2. For more information - * see: - * http://www.cirrus.com/en/pubs/appNote/AN273REV4.pdf - */ - clk_spi_div = 1; - if (ep93xx_chip_revision() < EP93XX_CHIP_REV_E2) - clk_spi_div = 2; - hw = clk_hw_register_fixed_factor(NULL, "ep93xx-spi.0", "xtali", 0, 1, clk_spi_div); - clk_hw_register_clkdev(hw, NULL, "ep93xx-spi.0"); - - /* pwm clock */ - hw = clk_hw_register_fixed_factor(NULL, "pwm_clk", "xtali", 0, 1, 1); - clk_hw_register_clkdev(hw, "pwm_clk", NULL); - - pr_info("PLL1 running at %ld MHz, PLL2 at %ld MHz\n", - clk_pll1_rate / 1000000, clk_pll2_rate / 1000000); - pr_info("FCLK %ld MHz, HCLK %ld MHz, PCLK %ld MHz\n", - clk_f_rate / 1000000, clk_h_rate / 1000000, - clk_p_rate / 1000000); - - ep93xx_uart_clock_init(); - - /* touchscreen/adc clock */ - hw = clk_hw_register_div("ep93xx-adc", - "xtali", - EP93XX_SYSCON_KEYTCHCLKDIV, - EP93XX_SYSCON_KEYTCHCLKDIV_TSEN, - EP93XX_SYSCON_KEYTCHCLKDIV_ADIV, - 1, - adc_divisors, - ARRAY_SIZE(adc_divisors)); - - clk_hw_register_clkdev(hw, NULL, "ep93xx-adc"); - - /* keypad clock */ - hw = clk_hw_register_div("ep93xx-keypad", - "xtali", - EP93XX_SYSCON_KEYTCHCLKDIV, - EP93XX_SYSCON_KEYTCHCLKDIV_KEN, - EP93XX_SYSCON_KEYTCHCLKDIV_KDIV, - 1, - adc_divisors, - ARRAY_SIZE(adc_divisors)); - - clk_hw_register_clkdev(hw, NULL, "ep93xx-keypad"); - - /* On reset PDIV and VDIV is set to zero, while PDIV zero - * means clock disable, VDIV shouldn't be zero. - * So i set both dividers to minimum. - */ - /* ENA - Enable CLK divider. */ - /* PDIV - 00 - Disable clock */ - /* VDIV - at least 2 */ - /* Check and enable video clk registers */ - value = __raw_readl(EP93XX_SYSCON_VIDCLKDIV); - value |= (1 << EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) | 2; - ep93xx_syscon_swlocked_write(value, EP93XX_SYSCON_VIDCLKDIV); - - /* check and enable i2s clk registers */ - value = __raw_readl(EP93XX_SYSCON_I2SCLKDIV); - value |= (1 << EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) | 2; - ep93xx_syscon_swlocked_write(value, EP93XX_SYSCON_I2SCLKDIV); - - /* video clk */ - hw = clk_hw_register_ddiv("ep93xx-fb", - EP93XX_SYSCON_VIDCLKDIV, - EP93XX_SYSCON_CLKDIV_ENABLE); - - clk_hw_register_clkdev(hw, NULL, "ep93xx-fb"); - - /* i2s clk */ - hw = clk_hw_register_ddiv("mclk", - EP93XX_SYSCON_I2SCLKDIV, - EP93XX_SYSCON_CLKDIV_ENABLE); - - clk_hw_register_clkdev(hw, "mclk", "ep93xx-i2s"); - - /* i2s sclk */ -#define EP93XX_I2SCLKDIV_SDIV_SHIFT 16 -#define EP93XX_I2SCLKDIV_SDIV_WIDTH 1 - hw = clk_hw_register_div("sclk", - "mclk", - EP93XX_SYSCON_I2SCLKDIV, - EP93XX_SYSCON_I2SCLKDIV_SENA, - EP93XX_I2SCLKDIV_SDIV_SHIFT, - EP93XX_I2SCLKDIV_SDIV_WIDTH, - sclk_divisors, - ARRAY_SIZE(sclk_divisors)); - - clk_hw_register_clkdev(hw, "sclk", "ep93xx-i2s"); - - /* i2s lrclk */ -#define EP93XX_I2SCLKDIV_LRDIV32_SHIFT 17 -#define EP93XX_I2SCLKDIV_LRDIV32_WIDTH 3 - hw = clk_hw_register_div("lrclk", - "sclk", - EP93XX_SYSCON_I2SCLKDIV, - EP93XX_SYSCON_I2SCLKDIV_SENA, - EP93XX_I2SCLKDIV_LRDIV32_SHIFT, - EP93XX_I2SCLKDIV_LRDIV32_WIDTH, - lrclk_divisors, - ARRAY_SIZE(lrclk_divisors)); - - clk_hw_register_clkdev(hw, "lrclk", "ep93xx-i2s"); - - return 0; -} -postcore_initcall(ep93xx_clock_init); diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c deleted file mode 100644 index 8b1ec60a9a46..000000000000 --- a/arch/arm/mach-ep93xx/core.c +++ /dev/null @@ -1,1018 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/arm/mach-ep93xx/core.c - * Core routines for Cirrus EP93xx chips. - * - * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org> - * Copyright (C) 2007 Herbert Valerio Riedel <hvr@gnu.org> - * - * Thanks go to Michael Burian and Ray Lehtiniemi for their key - * role in the ep93xx linux community. - */ - -#define pr_fmt(fmt) "ep93xx " KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/platform_device.h> -#include <linux/interrupt.h> -#include <linux/dma-mapping.h> -#include <linux/sys_soc.h> -#include <linux/irq.h> -#include <linux/io.h> -#include <linux/gpio.h> -#include <linux/leds.h> -#include <linux/uaccess.h> -#include <linux/termios.h> -#include <linux/amba/bus.h> -#include <linux/amba/serial.h> -#include <linux/mtd/physmap.h> -#include <linux/i2c.h> -#include <linux/gpio/machine.h> -#include <linux/spi/spi.h> -#include <linux/export.h> -#include <linux/irqchip/arm-vic.h> -#include <linux/reboot.h> -#include <linux/usb/ohci_pdriver.h> -#include <linux/random.h> - -#include "hardware.h" -#include <linux/platform_data/video-ep93xx.h> -#include <linux/platform_data/keypad-ep93xx.h> -#include <linux/platform_data/spi-ep93xx.h> -#include <linux/soc/cirrus/ep93xx.h> - -#include "gpio-ep93xx.h" - -#include <asm/mach/arch.h> -#include <asm/mach/map.h> - -#include "soc.h" -#include "irqs.h" - -/************************************************************************* - * Static I/O mappings that are needed for all EP93xx platforms - *************************************************************************/ -static struct map_desc ep93xx_io_desc[] __initdata = { - { - .virtual = EP93XX_AHB_VIRT_BASE, - .pfn = __phys_to_pfn(EP93XX_AHB_PHYS_BASE), - .length = EP93XX_AHB_SIZE, - .type = MT_DEVICE, - }, { - .virtual = EP93XX_APB_VIRT_BASE, - .pfn = __phys_to_pfn(EP93XX_APB_PHYS_BASE), - .length = EP93XX_APB_SIZE, - .type = MT_DEVICE, - }, -}; - -void __init ep93xx_map_io(void) -{ - iotable_init(ep93xx_io_desc, ARRAY_SIZE(ep93xx_io_desc)); -} - -/************************************************************************* - * EP93xx IRQ handling - *************************************************************************/ -void __init ep93xx_init_irq(void) -{ - vic_init(EP93XX_VIC1_BASE, IRQ_EP93XX_VIC0, EP93XX_VIC1_VALID_IRQ_MASK, 0); - vic_init(EP93XX_VIC2_BASE, IRQ_EP93XX_VIC1, EP93XX_VIC2_VALID_IRQ_MASK, 0); -} - - -/************************************************************************* - * EP93xx System Controller Software Locked register handling - *************************************************************************/ - -/* - * syscon_swlock prevents anything else from writing to the syscon - * block while a software locked register is being written. - */ -static DEFINE_SPINLOCK(syscon_swlock); - -void ep93xx_syscon_swlocked_write(unsigned int val, void __iomem *reg) -{ - unsigned long flags; - - spin_lock_irqsave(&syscon_swlock, flags); - - __raw_writel(0xaa, EP93XX_SYSCON_SWLOCK); - __raw_writel(val, reg); - - spin_unlock_irqrestore(&syscon_swlock, flags); -} - -void ep93xx_devcfg_set_clear(unsigned int set_bits, unsigned int clear_bits) -{ - unsigned long flags; - unsigned int val; - - spin_lock_irqsave(&syscon_swlock, flags); - - val = __raw_readl(EP93XX_SYSCON_DEVCFG); - val &= ~clear_bits; - val |= set_bits; - __raw_writel(0xaa, EP93XX_SYSCON_SWLOCK); - __raw_writel(val, EP93XX_SYSCON_DEVCFG); - - spin_unlock_irqrestore(&syscon_swlock, flags); -} - -/** - * ep93xx_chip_revision() - returns the EP93xx chip revision - * - * See "platform.h" for more information. - */ -unsigned int ep93xx_chip_revision(void) -{ - unsigned int v; - - v = __raw_readl(EP93XX_SYSCON_SYSCFG); - v &= EP93XX_SYSCON_SYSCFG_REV_MASK; - v >>= EP93XX_SYSCON_SYSCFG_REV_SHIFT; - return v; -} -EXPORT_SYMBOL_GPL(ep93xx_chip_revision); - -/************************************************************************* - * EP93xx GPIO - *************************************************************************/ -static struct resource ep93xx_gpio_resource[] = { - DEFINE_RES_MEM(EP93XX_GPIO_PHYS_BASE, 0xcc), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO_AB), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO0MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO1MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO2MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO3MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO4MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO5MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO6MUX), - DEFINE_RES_IRQ(IRQ_EP93XX_GPIO7MUX), -}; - -static struct platform_device ep93xx_gpio_device = { - .name = "gpio-ep93xx", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_gpio_resource), - .resource = ep93xx_gpio_resource, -}; - -/************************************************************************* - * EP93xx peripheral handling - *************************************************************************/ -#define EP93XX_UART_MCR_OFFSET (0x0100) - -static void ep93xx_uart_set_mctrl(struct amba_device *dev, - void __iomem *base, unsigned int mctrl) -{ - unsigned int mcr; - - mcr = 0; - if (mctrl & TIOCM_RTS) - mcr |= 2; - if (mctrl & TIOCM_DTR) - mcr |= 1; - - __raw_writel(mcr, base + EP93XX_UART_MCR_OFFSET); -} - -static struct amba_pl010_data ep93xx_uart_data = { - .set_mctrl = ep93xx_uart_set_mctrl, -}; - -static AMBA_APB_DEVICE(uart1, "apb:uart1", 0x00041010, EP93XX_UART1_PHYS_BASE, - { IRQ_EP93XX_UART1 }, &ep93xx_uart_data); - -static AMBA_APB_DEVICE(uart2, "apb:uart2", 0x00041010, EP93XX_UART2_PHYS_BASE, - { IRQ_EP93XX_UART2 }, NULL); - -static AMBA_APB_DEVICE(uart3, "apb:uart3", 0x00041010, EP93XX_UART3_PHYS_BASE, - { IRQ_EP93XX_UART3 }, &ep93xx_uart_data); - -static struct resource ep93xx_rtc_resource[] = { - DEFINE_RES_MEM(EP93XX_RTC_PHYS_BASE, 0x10c), -}; - -static struct platform_device ep93xx_rtc_device = { - .name = "ep93xx-rtc", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_rtc_resource), - .resource = ep93xx_rtc_resource, -}; - -/************************************************************************* - * EP93xx OHCI USB Host - *************************************************************************/ - -static struct clk *ep93xx_ohci_host_clock; - -static int ep93xx_ohci_power_on(struct platform_device *pdev) -{ - if (!ep93xx_ohci_host_clock) { - ep93xx_ohci_host_clock = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(ep93xx_ohci_host_clock)) - return PTR_ERR(ep93xx_ohci_host_clock); - } - - return clk_prepare_enable(ep93xx_ohci_host_clock); -} - -static void ep93xx_ohci_power_off(struct platform_device *pdev) -{ - clk_disable(ep93xx_ohci_host_clock); -} - -static struct usb_ohci_pdata ep93xx_ohci_pdata = { - .power_on = ep93xx_ohci_power_on, - .power_off = ep93xx_ohci_power_off, - .power_suspend = ep93xx_ohci_power_off, -}; - -static struct resource ep93xx_ohci_resources[] = { - DEFINE_RES_MEM(EP93XX_USB_PHYS_BASE, 0x1000), - DEFINE_RES_IRQ(IRQ_EP93XX_USB), -}; - -static u64 ep93xx_ohci_dma_mask = DMA_BIT_MASK(32); - -static struct platform_device ep93xx_ohci_device = { - .name = "ohci-platform", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_ohci_resources), - .resource = ep93xx_ohci_resources, - .dev = { - .dma_mask = &ep93xx_ohci_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &ep93xx_ohci_pdata, - }, -}; - -/************************************************************************* - * EP93xx physmap'ed flash - *************************************************************************/ -static struct physmap_flash_data ep93xx_flash_data; - -static struct resource ep93xx_flash_resource = { - .flags = IORESOURCE_MEM, -}; - -static struct platform_device ep93xx_flash = { - .name = "physmap-flash", - .id = 0, - .dev = { - .platform_data = &ep93xx_flash_data, - }, - .num_resources = 1, - .resource = &ep93xx_flash_resource, -}; - -/** - * ep93xx_register_flash() - Register the external flash device. - * @width: bank width in octets - * @start: resource start address - * @size: resource size - */ -void __init ep93xx_register_flash(unsigned int width, - resource_size_t start, resource_size_t size) -{ - ep93xx_flash_data.width = width; - - ep93xx_flash_resource.start = start; - ep93xx_flash_resource.end = start + size - 1; - - platform_device_register(&ep93xx_flash); -} - - -/************************************************************************* - * EP93xx ethernet peripheral handling - *************************************************************************/ -static struct ep93xx_eth_data ep93xx_eth_data; - -static struct resource ep93xx_eth_resource[] = { - DEFINE_RES_MEM(EP93XX_ETHERNET_PHYS_BASE, 0x10000), - DEFINE_RES_IRQ(IRQ_EP93XX_ETHERNET), -}; - -static u64 ep93xx_eth_dma_mask = DMA_BIT_MASK(32); - -static struct platform_device ep93xx_eth_device = { - .name = "ep93xx-eth", - .id = -1, - .dev = { - .platform_data = &ep93xx_eth_data, - .coherent_dma_mask = DMA_BIT_MASK(32), - .dma_mask = &ep93xx_eth_dma_mask, - }, - .num_resources = ARRAY_SIZE(ep93xx_eth_resource), - .resource = ep93xx_eth_resource, -}; - -/** - * ep93xx_register_eth - Register the built-in ethernet platform device. - * @data: platform specific ethernet configuration (__initdata) - * @copy_addr: flag indicating that the MAC address should be copied - * from the IndAd registers (as programmed by the bootloader) - */ -void __init ep93xx_register_eth(struct ep93xx_eth_data *data, int copy_addr) -{ - if (copy_addr) - memcpy_fromio(data->dev_addr, EP93XX_ETHERNET_BASE + 0x50, 6); - - ep93xx_eth_data = *data; - platform_device_register(&ep93xx_eth_device); -} - - -/************************************************************************* - * EP93xx i2c peripheral handling - *************************************************************************/ - -/* All EP93xx devices use the same two GPIO pins for I2C bit-banging */ -static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = { - .dev_id = "i2c-gpio.0", - .table = { - /* Use local offsets on gpiochip/port "G" */ - GPIO_LOOKUP_IDX("G", 1, NULL, 0, - GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), - GPIO_LOOKUP_IDX("G", 0, NULL, 1, - GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), - { } - }, -}; - -static struct platform_device ep93xx_i2c_device = { - .name = "i2c-gpio", - .id = 0, - .dev = { - .platform_data = NULL, - }, -}; - -/** - * ep93xx_register_i2c - Register the i2c platform device. - * @devices: platform specific i2c bus device information (__initdata) - * @num: the number of devices on the i2c bus - */ -void __init ep93xx_register_i2c(struct i2c_board_info *devices, int num) -{ - /* - * FIXME: this just sets the two pins as non-opendrain, as no - * platforms tries to do that anyway. Flag the applicable lines - * as open drain in the GPIO_LOOKUP above and the driver or - * gpiolib will handle open drain/open drain emulation as need - * be. Right now i2c-gpio emulates open drain which is not - * optimal. - */ - __raw_writel((0 << 1) | (0 << 0), - EP93XX_GPIO_EEDRIVE); - - i2c_register_board_info(0, devices, num); - gpiod_add_lookup_table(&ep93xx_i2c_gpiod_table); - platform_device_register(&ep93xx_i2c_device); -} - -/************************************************************************* - * EP93xx SPI peripheral handling - *************************************************************************/ -static struct ep93xx_spi_info ep93xx_spi_master_data; - -static struct resource ep93xx_spi_resources[] = { - DEFINE_RES_MEM(EP93XX_SPI_PHYS_BASE, 0x18), - DEFINE_RES_IRQ(IRQ_EP93XX_SSP), -}; - -static u64 ep93xx_spi_dma_mask = DMA_BIT_MASK(32); - -static struct platform_device ep93xx_spi_device = { - .name = "ep93xx-spi", - .id = 0, - .dev = { - .platform_data = &ep93xx_spi_master_data, - .coherent_dma_mask = DMA_BIT_MASK(32), - .dma_mask = &ep93xx_spi_dma_mask, - }, - .num_resources = ARRAY_SIZE(ep93xx_spi_resources), - .resource = ep93xx_spi_resources, -}; - -/** - * ep93xx_register_spi() - registers spi platform device - * @info: ep93xx board specific spi master info (__initdata) - * @devices: SPI devices to register (__initdata) - * @num: number of SPI devices to register - * - * This function registers platform device for the EP93xx SPI controller and - * also makes sure that SPI pins are muxed so that I2S is not using those pins. - */ -void __init ep93xx_register_spi(struct ep93xx_spi_info *info, - struct spi_board_info *devices, int num) -{ - /* - * When SPI is used, we need to make sure that I2S is muxed off from - * SPI pins. - */ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_I2SONSSP); - - ep93xx_spi_master_data = *info; - spi_register_board_info(devices, num); - platform_device_register(&ep93xx_spi_device); -} - -/************************************************************************* - * EP93xx LEDs - *************************************************************************/ -static const struct gpio_led ep93xx_led_pins[] __initconst = { - { - .name = "platform:grled", - }, { - .name = "platform:rdled", - }, -}; - -static const struct gpio_led_platform_data ep93xx_led_data __initconst = { - .num_leds = ARRAY_SIZE(ep93xx_led_pins), - .leds = ep93xx_led_pins, -}; - -static struct gpiod_lookup_table ep93xx_leds_gpio_table = { - .dev_id = "leds-gpio", - .table = { - /* Use local offsets on gpiochip/port "E" */ - GPIO_LOOKUP_IDX("E", 0, NULL, 0, GPIO_ACTIVE_HIGH), - GPIO_LOOKUP_IDX("E", 1, NULL, 1, GPIO_ACTIVE_HIGH), - { } - }, -}; - -/************************************************************************* - * EP93xx pwm peripheral handling - *************************************************************************/ -static struct resource ep93xx_pwm0_resource[] = { - DEFINE_RES_MEM(EP93XX_PWM_PHYS_BASE, 0x10), -}; - -static struct platform_device ep93xx_pwm0_device = { - .name = "ep93xx-pwm", - .id = 0, - .num_resources = ARRAY_SIZE(ep93xx_pwm0_resource), - .resource = ep93xx_pwm0_resource, -}; - -static struct resource ep93xx_pwm1_resource[] = { - DEFINE_RES_MEM(EP93XX_PWM_PHYS_BASE + 0x20, 0x10), -}; - -static struct platform_device ep93xx_pwm1_device = { - .name = "ep93xx-pwm", - .id = 1, - .num_resources = ARRAY_SIZE(ep93xx_pwm1_resource), - .resource = ep93xx_pwm1_resource, -}; - -void __init ep93xx_register_pwm(int pwm0, int pwm1) -{ - if (pwm0) - platform_device_register(&ep93xx_pwm0_device); - - /* NOTE: EP9307 does not have PWMOUT1 (pin EGPIO14) */ - if (pwm1) - platform_device_register(&ep93xx_pwm1_device); -} - -int ep93xx_pwm_acquire_gpio(struct platform_device *pdev) -{ - int err; - - if (pdev->id == 0) { - err = 0; - } else if (pdev->id == 1) { - err = gpio_request(EP93XX_GPIO_LINE_EGPIO14, - dev_name(&pdev->dev)); - if (err) - return err; - err = gpio_direction_output(EP93XX_GPIO_LINE_EGPIO14, 0); - if (err) - goto fail; - - /* PWM 1 output on EGPIO[14] */ - ep93xx_devcfg_set_bits(EP93XX_SYSCON_DEVCFG_PONG); - } else { - err = -ENODEV; - } - - return err; - -fail: - gpio_free(EP93XX_GPIO_LINE_EGPIO14); - return err; -} -EXPORT_SYMBOL(ep93xx_pwm_acquire_gpio); - -void ep93xx_pwm_release_gpio(struct platform_device *pdev) -{ - if (pdev->id == 1) { - gpio_direction_input(EP93XX_GPIO_LINE_EGPIO14); - gpio_free(EP93XX_GPIO_LINE_EGPIO14); - - /* EGPIO[14] used for GPIO */ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_PONG); - } -} -EXPORT_SYMBOL(ep93xx_pwm_release_gpio); - - -/************************************************************************* - * EP93xx video peripheral handling - *************************************************************************/ -static struct ep93xxfb_mach_info ep93xxfb_data; - -static struct resource ep93xx_fb_resource[] = { - DEFINE_RES_MEM(EP93XX_RASTER_PHYS_BASE, 0x800), -}; - -static struct platform_device ep93xx_fb_device = { - .name = "ep93xx-fb", - .id = -1, - .dev = { - .platform_data = &ep93xxfb_data, - .coherent_dma_mask = DMA_BIT_MASK(32), - .dma_mask = &ep93xx_fb_device.dev.coherent_dma_mask, - }, - .num_resources = ARRAY_SIZE(ep93xx_fb_resource), - .resource = ep93xx_fb_resource, -}; - -/* The backlight use a single register in the framebuffer's register space */ -#define EP93XX_RASTER_REG_BRIGHTNESS 0x20 - -static struct resource ep93xx_bl_resources[] = { - DEFINE_RES_MEM(EP93XX_RASTER_PHYS_BASE + - EP93XX_RASTER_REG_BRIGHTNESS, 0x04), -}; - -static struct platform_device ep93xx_bl_device = { - .name = "ep93xx-bl", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_bl_resources), - .resource = ep93xx_bl_resources, -}; - -/** - * ep93xx_register_fb - Register the framebuffer platform device. - * @data: platform specific framebuffer configuration (__initdata) - */ -void __init ep93xx_register_fb(struct ep93xxfb_mach_info *data) -{ - ep93xxfb_data = *data; - platform_device_register(&ep93xx_fb_device); - platform_device_register(&ep93xx_bl_device); -} - - -/************************************************************************* - * EP93xx matrix keypad peripheral handling - *************************************************************************/ -static struct ep93xx_keypad_platform_data ep93xx_keypad_data; - -static struct resource ep93xx_keypad_resource[] = { - DEFINE_RES_MEM(EP93XX_KEY_MATRIX_PHYS_BASE, 0x0c), - DEFINE_RES_IRQ(IRQ_EP93XX_KEY), -}; - -static struct platform_device ep93xx_keypad_device = { - .name = "ep93xx-keypad", - .id = -1, - .dev = { - .platform_data = &ep93xx_keypad_data, - }, - .num_resources = ARRAY_SIZE(ep93xx_keypad_resource), - .resource = ep93xx_keypad_resource, -}; - -/** - * ep93xx_register_keypad - Register the keypad platform device. - * @data: platform specific keypad configuration (__initdata) - */ -void __init ep93xx_register_keypad(struct ep93xx_keypad_platform_data *data) -{ - ep93xx_keypad_data = *data; - platform_device_register(&ep93xx_keypad_device); -} - -int ep93xx_keypad_acquire_gpio(struct platform_device *pdev) -{ - int err; - int i; - - for (i = 0; i < 8; i++) { - err = gpio_request(EP93XX_GPIO_LINE_C(i), dev_name(&pdev->dev)); - if (err) - goto fail_gpio_c; - err = gpio_request(EP93XX_GPIO_LINE_D(i), dev_name(&pdev->dev)); - if (err) - goto fail_gpio_d; - } - - /* Enable the keypad controller; GPIO ports C and D used for keypad */ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_KEYS | - EP93XX_SYSCON_DEVCFG_GONK); - - return 0; - -fail_gpio_d: - gpio_free(EP93XX_GPIO_LINE_C(i)); -fail_gpio_c: - for (--i; i >= 0; --i) { - gpio_free(EP93XX_GPIO_LINE_C(i)); - gpio_free(EP93XX_GPIO_LINE_D(i)); - } - return err; -} -EXPORT_SYMBOL(ep93xx_keypad_acquire_gpio); - -void ep93xx_keypad_release_gpio(struct platform_device *pdev) -{ - int i; - - for (i = 0; i < 8; i++) { - gpio_free(EP93XX_GPIO_LINE_C(i)); - gpio_free(EP93XX_GPIO_LINE_D(i)); - } - - /* Disable the keypad controller; GPIO ports C and D used for GPIO */ - ep93xx_devcfg_set_bits(EP93XX_SYSCON_DEVCFG_KEYS | - EP93XX_SYSCON_DEVCFG_GONK); -} -EXPORT_SYMBOL(ep93xx_keypad_release_gpio); - -/************************************************************************* - * EP93xx I2S audio peripheral handling - *************************************************************************/ -static struct resource ep93xx_i2s_resource[] = { - DEFINE_RES_MEM(EP93XX_I2S_PHYS_BASE, 0x100), - DEFINE_RES_IRQ(IRQ_EP93XX_SAI), -}; - -static struct platform_device ep93xx_i2s_device = { - .name = "ep93xx-i2s", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_i2s_resource), - .resource = ep93xx_i2s_resource, -}; - -static struct platform_device ep93xx_pcm_device = { - .name = "ep93xx-pcm-audio", - .id = -1, -}; - -void __init ep93xx_register_i2s(void) -{ - platform_device_register(&ep93xx_i2s_device); - platform_device_register(&ep93xx_pcm_device); -} - -#define EP93XX_SYSCON_DEVCFG_I2S_MASK (EP93XX_SYSCON_DEVCFG_I2SONSSP | \ - EP93XX_SYSCON_DEVCFG_I2SONAC97) - -#define EP93XX_I2SCLKDIV_MASK (EP93XX_SYSCON_I2SCLKDIV_ORIDE | \ - EP93XX_SYSCON_I2SCLKDIV_SPOL) - -int ep93xx_i2s_acquire(void) -{ - unsigned val; - - ep93xx_devcfg_set_clear(EP93XX_SYSCON_DEVCFG_I2SONAC97, - EP93XX_SYSCON_DEVCFG_I2S_MASK); - - /* - * This is potentially racy with the clock api for i2s_mclk, sclk and - * lrclk. Since the i2s driver is the only user of those clocks we - * rely on it to prevent parallel use of this function and the - * clock api for the i2s clocks. - */ - val = __raw_readl(EP93XX_SYSCON_I2SCLKDIV); - val &= ~EP93XX_I2SCLKDIV_MASK; - val |= EP93XX_SYSCON_I2SCLKDIV_ORIDE | EP93XX_SYSCON_I2SCLKDIV_SPOL; - ep93xx_syscon_swlocked_write(val, EP93XX_SYSCON_I2SCLKDIV); - - return 0; -} -EXPORT_SYMBOL(ep93xx_i2s_acquire); - -void ep93xx_i2s_release(void) -{ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_I2S_MASK); -} -EXPORT_SYMBOL(ep93xx_i2s_release); - -/************************************************************************* - * EP93xx AC97 audio peripheral handling - *************************************************************************/ -static struct resource ep93xx_ac97_resources[] = { - DEFINE_RES_MEM(EP93XX_AAC_PHYS_BASE, 0xac), - DEFINE_RES_IRQ(IRQ_EP93XX_AACINTR), -}; - -static struct platform_device ep93xx_ac97_device = { - .name = "ep93xx-ac97", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_ac97_resources), - .resource = ep93xx_ac97_resources, -}; - -void __init ep93xx_register_ac97(void) -{ - /* - * Make sure that the AC97 pins are not used by I2S. - */ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_I2SONAC97); - - platform_device_register(&ep93xx_ac97_device); - platform_device_register(&ep93xx_pcm_device); -} - -/************************************************************************* - * EP93xx Watchdog - *************************************************************************/ -static struct resource ep93xx_wdt_resources[] = { - DEFINE_RES_MEM(EP93XX_WATCHDOG_PHYS_BASE, 0x08), -}; - -static struct platform_device ep93xx_wdt_device = { - .name = "ep93xx-wdt", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_wdt_resources), - .resource = ep93xx_wdt_resources, -}; - -/************************************************************************* - * EP93xx IDE - *************************************************************************/ -static struct resource ep93xx_ide_resources[] = { - DEFINE_RES_MEM(EP93XX_IDE_PHYS_BASE, 0x38), - DEFINE_RES_IRQ(IRQ_EP93XX_EXT3), -}; - -static struct platform_device ep93xx_ide_device = { - .name = "ep93xx-ide", - .id = -1, - .dev = { - .dma_mask = &ep93xx_ide_device.dev.coherent_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, - .num_resources = ARRAY_SIZE(ep93xx_ide_resources), - .resource = ep93xx_ide_resources, -}; - -void __init ep93xx_register_ide(void) -{ - platform_device_register(&ep93xx_ide_device); -} - -int ep93xx_ide_acquire_gpio(struct platform_device *pdev) -{ - int err; - int i; - - err = gpio_request(EP93XX_GPIO_LINE_EGPIO2, dev_name(&pdev->dev)); - if (err) - return err; - err = gpio_request(EP93XX_GPIO_LINE_EGPIO15, dev_name(&pdev->dev)); - if (err) - goto fail_egpio15; - for (i = 2; i < 8; i++) { - err = gpio_request(EP93XX_GPIO_LINE_E(i), dev_name(&pdev->dev)); - if (err) - goto fail_gpio_e; - } - for (i = 4; i < 8; i++) { - err = gpio_request(EP93XX_GPIO_LINE_G(i), dev_name(&pdev->dev)); - if (err) - goto fail_gpio_g; - } - for (i = 0; i < 8; i++) { - err = gpio_request(EP93XX_GPIO_LINE_H(i), dev_name(&pdev->dev)); - if (err) - goto fail_gpio_h; - } - - /* GPIO ports E[7:2], G[7:4] and H used by IDE */ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_EONIDE | - EP93XX_SYSCON_DEVCFG_GONIDE | - EP93XX_SYSCON_DEVCFG_HONIDE); - return 0; - -fail_gpio_h: - for (--i; i >= 0; --i) - gpio_free(EP93XX_GPIO_LINE_H(i)); - i = 8; -fail_gpio_g: - for (--i; i >= 4; --i) - gpio_free(EP93XX_GPIO_LINE_G(i)); - i = 8; -fail_gpio_e: - for (--i; i >= 2; --i) - gpio_free(EP93XX_GPIO_LINE_E(i)); - gpio_free(EP93XX_GPIO_LINE_EGPIO15); -fail_egpio15: - gpio_free(EP93XX_GPIO_LINE_EGPIO2); - return err; -} -EXPORT_SYMBOL(ep93xx_ide_acquire_gpio); - -void ep93xx_ide_release_gpio(struct platform_device *pdev) -{ - int i; - - for (i = 2; i < 8; i++) - gpio_free(EP93XX_GPIO_LINE_E(i)); - for (i = 4; i < 8; i++) - gpio_free(EP93XX_GPIO_LINE_G(i)); - for (i = 0; i < 8; i++) - gpio_free(EP93XX_GPIO_LINE_H(i)); - gpio_free(EP93XX_GPIO_LINE_EGPIO15); - gpio_free(EP93XX_GPIO_LINE_EGPIO2); - - - /* GPIO ports E[7:2], G[7:4] and H used by GPIO */ - ep93xx_devcfg_set_bits(EP93XX_SYSCON_DEVCFG_EONIDE | - EP93XX_SYSCON_DEVCFG_GONIDE | - EP93XX_SYSCON_DEVCFG_HONIDE); -} -EXPORT_SYMBOL(ep93xx_ide_release_gpio); - -/************************************************************************* - * EP93xx ADC - *************************************************************************/ -static struct resource ep93xx_adc_resources[] = { - DEFINE_RES_MEM(EP93XX_ADC_PHYS_BASE, 0x28), - DEFINE_RES_IRQ(IRQ_EP93XX_TOUCH), -}; - -static struct platform_device ep93xx_adc_device = { - .name = "ep93xx-adc", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_adc_resources), - .resource = ep93xx_adc_resources, -}; - -void __init ep93xx_register_adc(void) -{ - /* Power up ADC, deactivate Touch Screen Controller */ - ep93xx_devcfg_set_clear(EP93XX_SYSCON_DEVCFG_TIN, - EP93XX_SYSCON_DEVCFG_ADCPD); - - platform_device_register(&ep93xx_adc_device); -} - -/************************************************************************* - * EP93xx Security peripheral - *************************************************************************/ - -/* - * The Maverick Key is 256 bits of micro fuses blown at the factory during - * manufacturing to uniquely identify a part. - * - * See: http://arm.cirrus.com/forum/viewtopic.php?t=486&highlight=maverick+key - */ -#define EP93XX_SECURITY_REG(x) (EP93XX_SECURITY_BASE + (x)) -#define EP93XX_SECURITY_SECFLG EP93XX_SECURITY_REG(0x2400) -#define EP93XX_SECURITY_FUSEFLG EP93XX_SECURITY_REG(0x2410) -#define EP93XX_SECURITY_UNIQID EP93XX_SECURITY_REG(0x2440) -#define EP93XX_SECURITY_UNIQCHK EP93XX_SECURITY_REG(0x2450) -#define EP93XX_SECURITY_UNIQVAL EP93XX_SECURITY_REG(0x2460) -#define EP93XX_SECURITY_SECID1 EP93XX_SECURITY_REG(0x2500) -#define EP93XX_SECURITY_SECID2 EP93XX_SECURITY_REG(0x2504) -#define EP93XX_SECURITY_SECCHK1 EP93XX_SECURITY_REG(0x2520) -#define EP93XX_SECURITY_SECCHK2 EP93XX_SECURITY_REG(0x2524) -#define EP93XX_SECURITY_UNIQID2 EP93XX_SECURITY_REG(0x2700) -#define EP93XX_SECURITY_UNIQID3 EP93XX_SECURITY_REG(0x2704) -#define EP93XX_SECURITY_UNIQID4 EP93XX_SECURITY_REG(0x2708) -#define EP93XX_SECURITY_UNIQID5 EP93XX_SECURITY_REG(0x270c) - -static char ep93xx_soc_id[33]; - -static const char __init *ep93xx_get_soc_id(void) -{ - unsigned int id, id2, id3, id4, id5; - - if (__raw_readl(EP93XX_SECURITY_UNIQVAL) != 1) - return "bad Hamming code"; - - id = __raw_readl(EP93XX_SECURITY_UNIQID); - id2 = __raw_readl(EP93XX_SECURITY_UNIQID2); - id3 = __raw_readl(EP93XX_SECURITY_UNIQID3); - id4 = __raw_readl(EP93XX_SECURITY_UNIQID4); - id5 = __raw_readl(EP93XX_SECURITY_UNIQID5); - - if (id != id2) - return "invalid"; - - /* Toss the unique ID into the entropy pool */ - add_device_randomness(&id2, 4); - add_device_randomness(&id3, 4); - add_device_randomness(&id4, 4); - add_device_randomness(&id5, 4); - - snprintf(ep93xx_soc_id, sizeof(ep93xx_soc_id), - "%08x%08x%08x%08x", id2, id3, id4, id5); - - return ep93xx_soc_id; -} - -static const char __init *ep93xx_get_soc_rev(void) -{ - int rev = ep93xx_chip_revision(); - - switch (rev) { - case EP93XX_CHIP_REV_D0: - return "D0"; - case EP93XX_CHIP_REV_D1: - return "D1"; - case EP93XX_CHIP_REV_E0: - return "E0"; - case EP93XX_CHIP_REV_E1: - return "E1"; - case EP93XX_CHIP_REV_E2: - return "E2"; - default: - return "unknown"; - } -} - -static const char __init *ep93xx_get_machine_name(void) -{ - return kasprintf(GFP_KERNEL,"%s", machine_desc->name); -} - -static struct device __init *ep93xx_init_soc(void) -{ - struct soc_device_attribute *soc_dev_attr; - struct soc_device *soc_dev; - - soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL); - if (!soc_dev_attr) - return NULL; - - soc_dev_attr->machine = ep93xx_get_machine_name(); - soc_dev_attr->family = "Cirrus Logic EP93xx"; - soc_dev_attr->revision = ep93xx_get_soc_rev(); - soc_dev_attr->soc_id = ep93xx_get_soc_id(); - - soc_dev = soc_device_register(soc_dev_attr); - if (IS_ERR(soc_dev)) { - kfree(soc_dev_attr->machine); - kfree(soc_dev_attr); - return NULL; - } - - return soc_device_to_device(soc_dev); -} - -struct device __init *ep93xx_init_devices(void) -{ - struct device *parent; - - /* Disallow access to MaverickCrunch initially */ - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_CPENA); - - /* Default all ports to GPIO */ - ep93xx_devcfg_set_bits(EP93XX_SYSCON_DEVCFG_KEYS | - EP93XX_SYSCON_DEVCFG_GONK | - EP93XX_SYSCON_DEVCFG_EONIDE | - EP93XX_SYSCON_DEVCFG_GONIDE | - EP93XX_SYSCON_DEVCFG_HONIDE); - - parent = ep93xx_init_soc(); - - /* Get the GPIO working early, other devices need it */ - platform_device_register(&ep93xx_gpio_device); - - amba_device_register(&uart1_device, &iomem_resource); - amba_device_register(&uart2_device, &iomem_resource); - amba_device_register(&uart3_device, &iomem_resource); - - platform_device_register(&ep93xx_rtc_device); - platform_device_register(&ep93xx_ohci_device); - platform_device_register(&ep93xx_wdt_device); - - gpiod_add_lookup_table(&ep93xx_leds_gpio_table); - gpio_led_register_device(-1, &ep93xx_led_data); - - return parent; -} - -void ep93xx_restart(enum reboot_mode mode, const char *cmd) -{ - /* - * Set then clear the SWRST bit to initiate a software reset - */ - ep93xx_devcfg_set_bits(EP93XX_SYSCON_DEVCFG_SWRST); - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_SWRST); - - while (1) - ; -} diff --git a/arch/arm/mach-ep93xx/dma.c b/arch/arm/mach-ep93xx/dma.c deleted file mode 100644 index 74515acab8ef..000000000000 --- a/arch/arm/mach-ep93xx/dma.c +++ /dev/null @@ -1,114 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/arm/mach-ep93xx/dma.c - * - * Platform support code for the EP93xx dmaengine driver. - * - * Copyright (C) 2011 Mika Westerberg - * - * This work is based on the original dma-m2p implementation with - * following copyrights: - * - * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org> - * Copyright (C) 2006 Applied Data Systems - * Copyright (C) 2009 Ryan Mallon <rmallon@gmail.com> - */ - -#include <linux/dmaengine.h> -#include <linux/dma-mapping.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/kernel.h> -#include <linux/platform_device.h> - -#include <linux/platform_data/dma-ep93xx.h> -#include "hardware.h" - -#include "soc.h" - -#define DMA_CHANNEL(_name, _base, _irq) \ - { .name = (_name), .base = (_base), .irq = (_irq) } - -/* - * DMA M2P channels. - * - * On the EP93xx chip the following peripherals my be allocated to the 10 - * Memory to Internal Peripheral (M2P) channels (5 transmit + 5 receive). - * - * I2S contains 3 Tx and 3 Rx DMA Channels - * AAC contains 3 Tx and 3 Rx DMA Channels - * UART1 contains 1 Tx and 1 Rx DMA Channels - * UART2 contains 1 Tx and 1 Rx DMA Channels - * UART3 contains 1 Tx and 1 Rx DMA Channels - * IrDA contains 1 Tx and 1 Rx DMA Channels - * - * Registers are mapped statically in ep93xx_map_io(). - */ -static struct ep93xx_dma_chan_data ep93xx_dma_m2p_channels[] = { - DMA_CHANNEL("m2p0", EP93XX_DMA_BASE + 0x0000, IRQ_EP93XX_DMAM2P0), - DMA_CHANNEL("m2p1", EP93XX_DMA_BASE + 0x0040, IRQ_EP93XX_DMAM2P1), - DMA_CHANNEL("m2p2", EP93XX_DMA_BASE + 0x0080, IRQ_EP93XX_DMAM2P2), - DMA_CHANNEL("m2p3", EP93XX_DMA_BASE + 0x00c0, IRQ_EP93XX_DMAM2P3), - DMA_CHANNEL("m2p4", EP93XX_DMA_BASE + 0x0240, IRQ_EP93XX_DMAM2P4), - DMA_CHANNEL("m2p5", EP93XX_DMA_BASE + 0x0200, IRQ_EP93XX_DMAM2P5), - DMA_CHANNEL("m2p6", EP93XX_DMA_BASE + 0x02c0, IRQ_EP93XX_DMAM2P6), - DMA_CHANNEL("m2p7", EP93XX_DMA_BASE + 0x0280, IRQ_EP93XX_DMAM2P7), - DMA_CHANNEL("m2p8", EP93XX_DMA_BASE + 0x0340, IRQ_EP93XX_DMAM2P8), - DMA_CHANNEL("m2p9", EP93XX_DMA_BASE + 0x0300, IRQ_EP93XX_DMAM2P9), -}; - -static struct ep93xx_dma_platform_data ep93xx_dma_m2p_data = { - .channels = ep93xx_dma_m2p_channels, - .num_channels = ARRAY_SIZE(ep93xx_dma_m2p_channels), -}; - -static u64 ep93xx_dma_m2p_mask = DMA_BIT_MASK(32); - -static struct platform_device ep93xx_dma_m2p_device = { - .name = "ep93xx-dma-m2p", - .id = -1, - .dev = { - .platform_data = &ep93xx_dma_m2p_data, - .dma_mask = &ep93xx_dma_m2p_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, -}; - -/* - * DMA M2M channels. - * - * There are 2 M2M channels which support memcpy/memset and in addition simple - * hardware requests from/to SSP and IDE. We do not implement an external - * hardware requests. - * - * Registers are mapped statically in ep93xx_map_io(). - */ -static struct ep93xx_dma_chan_data ep93xx_dma_m2m_channels[] = { - DMA_CHANNEL("m2m0", EP93XX_DMA_BASE + 0x0100, IRQ_EP93XX_DMAM2M0), - DMA_CHANNEL("m2m1", EP93XX_DMA_BASE + 0x0140, IRQ_EP93XX_DMAM2M1), -}; - -static struct ep93xx_dma_platform_data ep93xx_dma_m2m_data = { - .channels = ep93xx_dma_m2m_channels, - .num_channels = ARRAY_SIZE(ep93xx_dma_m2m_channels), -}; - -static u64 ep93xx_dma_m2m_mask = DMA_BIT_MASK(32); - -static struct platform_device ep93xx_dma_m2m_device = { - .name = "ep93xx-dma-m2m", - .id = -1, - .dev = { - .platform_data = &ep93xx_dma_m2m_data, - .dma_mask = &ep93xx_dma_m2m_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, -}; - -static int __init ep93xx_dma_init(void) -{ - platform_device_register(&ep93xx_dma_m2p_device); - platform_device_register(&ep93xx_dma_m2m_device); - return 0; -} -arch_initcall(ep93xx_dma_init); diff --git a/arch/arm/mach-ep93xx/edb93xx.c b/arch/arm/mach-ep93xx/edb93xx.c deleted file mode 100644 index dbdb822a0100..000000000000 --- a/arch/arm/mach-ep93xx/edb93xx.c +++ /dev/null @@ -1,368 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/arm/mach-ep93xx/edb93xx.c - * Cirrus Logic EDB93xx Development Board support. - * - * EDB93XX, EDB9301, EDB9307A - * Copyright (C) 2008-2009 H Hartley Sweeten <hsweeten@visionengravers.com> - * - * EDB9302 - * Copyright (C) 2006 George Kashperko <george@chas.com.ua> - * - * EDB9302A, EDB9315, EDB9315A - * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org> - * - * EDB9307 - * Copyright (C) 2007 Herbert Valerio Riedel <hvr@gnu.org> - * - * EDB9312 - * Copyright (C) 2006 Infosys Technologies Limited - * Toufeeq Hussain <toufeeq_hussain@infosys.com> - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/platform_device.h> -#include <linux/i2c.h> -#include <linux/spi/spi.h> -#include <linux/gpio/machine.h> - -#include <sound/cs4271.h> - -#include "hardware.h" -#include <linux/platform_data/video-ep93xx.h> -#include <linux/platform_data/spi-ep93xx.h> -#include "gpio-ep93xx.h" - -#include <asm/mach-types.h> -#include <asm/mach/arch.h> - -#include "soc.h" - -static void __init edb93xx_register_flash(void) -{ - if (machine_is_edb9307() || machine_is_edb9312() || - machine_is_edb9315()) { - ep93xx_register_flash(4, EP93XX_CS6_PHYS_BASE, SZ_32M); - } else { - ep93xx_register_flash(2, EP93XX_CS6_PHYS_BASE, SZ_16M); - } -} - -static struct ep93xx_eth_data __initdata edb93xx_eth_data = { - .phy_id = 1, -}; - - -/************************************************************************* - * EDB93xx i2c peripheral handling - *************************************************************************/ - -static struct i2c_board_info __initdata edb93xxa_i2c_board_info[] = { - { - I2C_BOARD_INFO("isl1208", 0x6f), - }, -}; - -static struct i2c_board_info __initdata edb93xx_i2c_board_info[] = { - { - I2C_BOARD_INFO("ds1337", 0x68), - }, -}; - -static void __init edb93xx_register_i2c(void) -{ - if (machine_is_edb9302a() || machine_is_edb9307a() || - machine_is_edb9315a()) { - ep93xx_register_i2c(edb93xxa_i2c_board_info, - ARRAY_SIZE(edb93xxa_i2c_board_info)); - } else if (machine_is_edb9302() || machine_is_edb9307() - || machine_is_edb9312() || machine_is_edb9315()) { - ep93xx_register_i2c(edb93xx_i2c_board_info, - ARRAY_SIZE(edb93xx_i2c_board_info)); - } -} - - -/************************************************************************* - * EDB93xx SPI peripheral handling - *************************************************************************/ -static struct cs4271_platform_data edb93xx_cs4271_data = { - /* Intentionally left blank */ -}; - -static struct spi_board_info edb93xx_spi_board_info[] __initdata = { - { - .modalias = "cs4271", - .platform_data = &edb93xx_cs4271_data, - .max_speed_hz = 6000000, - .bus_num = 0, - .chip_select = 0, - .mode = SPI_MODE_3, - }, -}; - -static struct gpiod_lookup_table edb93xx_spi_cs_gpio_table = { - .dev_id = "spi0", - .table = { - GPIO_LOOKUP("A", 6, "cs", GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct ep93xx_spi_info edb93xx_spi_info __initdata = { - /* Intentionally left blank */ -}; - -static struct gpiod_lookup_table edb93xx_cs4272_edb9301_gpio_table = { - .dev_id = "spi0.0", /* CS0 on SPI0 */ - .table = { - GPIO_LOOKUP("A", 1, "reset", GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct gpiod_lookup_table edb93xx_cs4272_edb9302_gpio_table = { - .dev_id = "spi0.0", /* CS0 on SPI0 */ - .table = { - GPIO_LOOKUP("H", 2, "reset", GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct gpiod_lookup_table edb93xx_cs4272_edb9315_gpio_table = { - .dev_id = "spi0.0", /* CS0 on SPI0 */ - .table = { - GPIO_LOOKUP("B", 6, "reset", GPIO_ACTIVE_LOW), - { }, - }, -}; - -static void __init edb93xx_register_spi(void) -{ - if (machine_is_edb9301() || machine_is_edb9302()) - gpiod_add_lookup_table(&edb93xx_cs4272_edb9301_gpio_table); - else if (machine_is_edb9302a() || machine_is_edb9307a()) - gpiod_add_lookup_table(&edb93xx_cs4272_edb9302_gpio_table); - else if (machine_is_edb9315a()) - gpiod_add_lookup_table(&edb93xx_cs4272_edb9315_gpio_table); - - gpiod_add_lookup_table(&edb93xx_spi_cs_gpio_table); - ep93xx_register_spi(&edb93xx_spi_info, edb93xx_spi_board_info, - ARRAY_SIZE(edb93xx_spi_board_info)); -} - - -/************************************************************************* - * EDB93xx I2S - *************************************************************************/ -static struct platform_device edb93xx_audio_device = { - .name = "edb93xx-audio", - .id = -1, -}; - -static int __init edb93xx_has_audio(void) -{ - return (machine_is_edb9301() || machine_is_edb9302() || - machine_is_edb9302a() || machine_is_edb9307a() || - machine_is_edb9315a()); -} - -static void __init edb93xx_register_i2s(void) -{ - if (edb93xx_has_audio()) { - ep93xx_register_i2s(); - platform_device_register(&edb93xx_audio_device); - } -} - - -/************************************************************************* - * EDB93xx pwm - *************************************************************************/ -static void __init edb93xx_register_pwm(void) -{ - if (machine_is_edb9301() || - machine_is_edb9302() || machine_is_edb9302a()) { - /* EP9301 and EP9302 only have pwm.1 (EGPIO14) */ - ep93xx_register_pwm(0, 1); - } else if (machine_is_edb9307() || machine_is_edb9307a()) { - /* EP9307 only has pwm.0 (PWMOUT) */ - ep93xx_register_pwm(1, 0); - } else { - /* EP9312 and EP9315 have both */ - ep93xx_register_pwm(1, 1); - } -} - - -/************************************************************************* - * EDB93xx framebuffer - *************************************************************************/ -static struct ep93xxfb_mach_info __initdata edb93xxfb_info = { - .flags = 0, -}; - -static int __init edb93xx_has_fb(void) -{ - /* These platforms have an ep93xx with video capability */ - return machine_is_edb9307() || machine_is_edb9307a() || - machine_is_edb9312() || machine_is_edb9315() || - machine_is_edb9315a(); -} - -static void __init edb93xx_register_fb(void) -{ - if (!edb93xx_has_fb()) - return; - - if (machine_is_edb9307a() || machine_is_edb9315a()) - edb93xxfb_info.flags |= EP93XXFB_USE_SDCSN0; - else - edb93xxfb_info.flags |= EP93XXFB_USE_SDCSN3; - - ep93xx_register_fb(&edb93xxfb_info); -} - - -/************************************************************************* - * EDB93xx IDE - *************************************************************************/ -static int __init edb93xx_has_ide(void) -{ - /* - * Although EDB9312 and EDB9315 do have IDE capability, they have - * INTRQ line wired as pull-up, which makes using IDE interface - * problematic. - */ - return machine_is_edb9312() || machine_is_edb9315() || - machine_is_edb9315a(); -} - -static void __init edb93xx_register_ide(void) -{ - if (!edb93xx_has_ide()) - return; - - ep93xx_register_ide(); -} - - -static void __init edb93xx_init_machine(void) -{ - ep93xx_init_devices(); - edb93xx_register_flash(); - ep93xx_register_eth(&edb93xx_eth_data, 1); - edb93xx_register_i2c(); - edb93xx_register_spi(); - edb93xx_register_i2s(); - edb93xx_register_pwm(); - edb93xx_register_fb(); - edb93xx_register_ide(); - ep93xx_register_adc(); -} - - -#ifdef CONFIG_MACH_EDB9301 -MACHINE_START(EDB9301, "Cirrus Logic EDB9301 Evaluation Board") - /* Maintainer: H Hartley Sweeten <hsweeten@visionengravers.com> */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9302 -MACHINE_START(EDB9302, "Cirrus Logic EDB9302 Evaluation Board") - /* Maintainer: George Kashperko <george@chas.com.ua> */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9302A -MACHINE_START(EDB9302A, "Cirrus Logic EDB9302A Evaluation Board") - /* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9307 -MACHINE_START(EDB9307, "Cirrus Logic EDB9307 Evaluation Board") - /* Maintainer: Herbert Valerio Riedel <hvr@gnu.org> */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9307A -MACHINE_START(EDB9307A, "Cirrus Logic EDB9307A Evaluation Board") - /* Maintainer: H Hartley Sweeten <hsweeten@visionengravers.com> */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9312 -MACHINE_START(EDB9312, "Cirrus Logic EDB9312 Evaluation Board") - /* Maintainer: Toufeeq Hussain <toufeeq_hussain@infosys.com> */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9315 -MACHINE_START(EDB9315, "Cirrus Logic EDB9315 Evaluation Board") - /* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_EDB9315A -MACHINE_START(EDB9315A, "Cirrus Logic EDB9315A Evaluation Board") - /* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ep93xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = edb93xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END -#endif diff --git a/arch/arm/mach-ep93xx/ep93xx-regs.h b/arch/arm/mach-ep93xx/ep93xx-regs.h deleted file mode 100644 index 8fa3646de0a4..000000000000 --- a/arch/arm/mach-ep93xx/ep93xx-regs.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARCH_EP93XX_REGS_H -#define __ASM_ARCH_EP93XX_REGS_H - -/* - * EP93xx linux memory map: - * - * virt phys size - * fe800000 5M per-platform mappings - * fed00000 80800000 2M APB - * fef00000 80000000 1M AHB - */ - -#define EP93XX_AHB_PHYS_BASE 0x80000000 -#define EP93XX_AHB_VIRT_BASE 0xfef00000 -#define EP93XX_AHB_SIZE 0x00100000 - -#define EP93XX_AHB_PHYS(x) (EP93XX_AHB_PHYS_BASE + (x)) -#define EP93XX_AHB_IOMEM(x) IOMEM(EP93XX_AHB_VIRT_BASE + (x)) - -#define EP93XX_APB_PHYS_BASE 0x80800000 -#define EP93XX_APB_VIRT_BASE 0xfed00000 -#define EP93XX_APB_SIZE 0x00200000 - -#define EP93XX_APB_PHYS(x) (EP93XX_APB_PHYS_BASE + (x)) -#define EP93XX_APB_IOMEM(x) IOMEM(EP93XX_APB_VIRT_BASE + (x)) - -/* APB UARTs */ -#define EP93XX_UART1_PHYS_BASE EP93XX_APB_PHYS(0x000c0000) -#define EP93XX_UART1_BASE EP93XX_APB_IOMEM(0x000c0000) - -#define EP93XX_UART2_PHYS_BASE EP93XX_APB_PHYS(0x000d0000) -#define EP93XX_UART2_BASE EP93XX_APB_IOMEM(0x000d0000) - -#define EP93XX_UART3_PHYS_BASE EP93XX_APB_PHYS(0x000e0000) -#define EP93XX_UART3_BASE EP93XX_APB_IOMEM(0x000e0000) - -#endif diff --git a/arch/arm/mach-ep93xx/gpio-ep93xx.h b/arch/arm/mach-ep93xx/gpio-ep93xx.h deleted file mode 100644 index 7b46eb7e5507..000000000000 --- a/arch/arm/mach-ep93xx/gpio-ep93xx.h +++ /dev/null @@ -1,111 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Include file for the EP93XX GPIO controller machine specifics */ - -#ifndef __GPIO_EP93XX_H -#define __GPIO_EP93XX_H - -#include "ep93xx-regs.h" - -#define EP93XX_GPIO_PHYS_BASE EP93XX_APB_PHYS(0x00040000) -#define EP93XX_GPIO_BASE EP93XX_APB_IOMEM(0x00040000) -#define EP93XX_GPIO_REG(x) (EP93XX_GPIO_BASE + (x)) -#define EP93XX_GPIO_F_INT_STATUS EP93XX_GPIO_REG(0x5c) -#define EP93XX_GPIO_A_INT_STATUS EP93XX_GPIO_REG(0xa0) -#define EP93XX_GPIO_B_INT_STATUS EP93XX_GPIO_REG(0xbc) -#define EP93XX_GPIO_EEDRIVE EP93XX_GPIO_REG(0xc8) - -/* GPIO port A. */ -#define EP93XX_GPIO_LINE_A(x) ((x) + 0) -#define EP93XX_GPIO_LINE_EGPIO0 EP93XX_GPIO_LINE_A(0) -#define EP93XX_GPIO_LINE_EGPIO1 EP93XX_GPIO_LINE_A(1) -#define EP93XX_GPIO_LINE_EGPIO2 EP93XX_GPIO_LINE_A(2) -#define EP93XX_GPIO_LINE_EGPIO3 EP93XX_GPIO_LINE_A(3) -#define EP93XX_GPIO_LINE_EGPIO4 EP93XX_GPIO_LINE_A(4) -#define EP93XX_GPIO_LINE_EGPIO5 EP93XX_GPIO_LINE_A(5) -#define EP93XX_GPIO_LINE_EGPIO6 EP93XX_GPIO_LINE_A(6) -#define EP93XX_GPIO_LINE_EGPIO7 EP93XX_GPIO_LINE_A(7) - -/* GPIO port B. */ -#define EP93XX_GPIO_LINE_B(x) ((x) + 8) -#define EP93XX_GPIO_LINE_EGPIO8 EP93XX_GPIO_LINE_B(0) -#define EP93XX_GPIO_LINE_EGPIO9 EP93XX_GPIO_LINE_B(1) -#define EP93XX_GPIO_LINE_EGPIO10 EP93XX_GPIO_LINE_B(2) -#define EP93XX_GPIO_LINE_EGPIO11 EP93XX_GPIO_LINE_B(3) -#define EP93XX_GPIO_LINE_EGPIO12 EP93XX_GPIO_LINE_B(4) -#define EP93XX_GPIO_LINE_EGPIO13 EP93XX_GPIO_LINE_B(5) -#define EP93XX_GPIO_LINE_EGPIO14 EP93XX_GPIO_LINE_B(6) -#define EP93XX_GPIO_LINE_EGPIO15 EP93XX_GPIO_LINE_B(7) - -/* GPIO port C. */ -#define EP93XX_GPIO_LINE_C(x) ((x) + 40) -#define EP93XX_GPIO_LINE_ROW0 EP93XX_GPIO_LINE_C(0) -#define EP93XX_GPIO_LINE_ROW1 EP93XX_GPIO_LINE_C(1) -#define EP93XX_GPIO_LINE_ROW2 EP93XX_GPIO_LINE_C(2) -#define EP93XX_GPIO_LINE_ROW3 EP93XX_GPIO_LINE_C(3) -#define EP93XX_GPIO_LINE_ROW4 EP93XX_GPIO_LINE_C(4) -#define EP93XX_GPIO_LINE_ROW5 EP93XX_GPIO_LINE_C(5) -#define EP93XX_GPIO_LINE_ROW6 EP93XX_GPIO_LINE_C(6) -#define EP93XX_GPIO_LINE_ROW7 EP93XX_GPIO_LINE_C(7) - -/* GPIO port D. */ -#define EP93XX_GPIO_LINE_D(x) ((x) + 24) -#define EP93XX_GPIO_LINE_COL0 EP93XX_GPIO_LINE_D(0) -#define EP93XX_GPIO_LINE_COL1 EP93XX_GPIO_LINE_D(1) -#define EP93XX_GPIO_LINE_COL2 EP93XX_GPIO_LINE_D(2) -#define EP93XX_GPIO_LINE_COL3 EP93XX_GPIO_LINE_D(3) -#define EP93XX_GPIO_LINE_COL4 EP93XX_GPIO_LINE_D(4) -#define EP93XX_GPIO_LINE_COL5 EP93XX_GPIO_LINE_D(5) -#define EP93XX_GPIO_LINE_COL6 EP93XX_GPIO_LINE_D(6) -#define EP93XX_GPIO_LINE_COL7 EP93XX_GPIO_LINE_D(7) - -/* GPIO port E. */ -#define EP93XX_GPIO_LINE_E(x) ((x) + 32) -#define EP93XX_GPIO_LINE_GRLED EP93XX_GPIO_LINE_E(0) -#define EP93XX_GPIO_LINE_RDLED EP93XX_GPIO_LINE_E(1) -#define EP93XX_GPIO_LINE_DIORn EP93XX_GPIO_LINE_E(2) -#define EP93XX_GPIO_LINE_IDECS1n EP93XX_GPIO_LINE_E(3) -#define EP93XX_GPIO_LINE_IDECS2n EP93XX_GPIO_LINE_E(4) -#define EP93XX_GPIO_LINE_IDEDA0 EP93XX_GPIO_LINE_E(5) -#define EP93XX_GPIO_LINE_IDEDA1 EP93XX_GPIO_LINE_E(6) -#define EP93XX_GPIO_LINE_IDEDA2 EP93XX_GPIO_LINE_E(7) - -/* GPIO port F. */ -#define EP93XX_GPIO_LINE_F(x) ((x) + 16) -#define EP93XX_GPIO_LINE_WP EP93XX_GPIO_LINE_F(0) -#define EP93XX_GPIO_LINE_MCCD1 EP93XX_GPIO_LINE_F(1) -#define EP93XX_GPIO_LINE_MCCD2 EP93XX_GPIO_LINE_F(2) -#define EP93XX_GPIO_LINE_MCBVD1 EP93XX_GPIO_LINE_F(3) -#define EP93XX_GPIO_LINE_MCBVD2 EP93XX_GPIO_LINE_F(4) -#define EP93XX_GPIO_LINE_VS1 EP93XX_GPIO_LINE_F(5) -#define EP93XX_GPIO_LINE_READY EP93XX_GPIO_LINE_F(6) -#define EP93XX_GPIO_LINE_VS2 EP93XX_GPIO_LINE_F(7) - -/* GPIO port G. */ -#define EP93XX_GPIO_LINE_G(x) ((x) + 48) -#define EP93XX_GPIO_LINE_EECLK EP93XX_GPIO_LINE_G(0) -#define EP93XX_GPIO_LINE_EEDAT EP93XX_GPIO_LINE_G(1) -#define EP93XX_GPIO_LINE_SLA0 EP93XX_GPIO_LINE_G(2) -#define EP93XX_GPIO_LINE_SLA1 EP93XX_GPIO_LINE_G(3) -#define EP93XX_GPIO_LINE_DD12 EP93XX_GPIO_LINE_G(4) -#define EP93XX_GPIO_LINE_DD13 EP93XX_GPIO_LINE_G(5) -#define EP93XX_GPIO_LINE_DD14 EP93XX_GPIO_LINE_G(6) -#define EP93XX_GPIO_LINE_DD15 EP93XX_GPIO_LINE_G(7) - -/* GPIO port H. */ -#define EP93XX_GPIO_LINE_H(x) ((x) + 56) -#define EP93XX_GPIO_LINE_DD0 EP93XX_GPIO_LINE_H(0) -#define EP93XX_GPIO_LINE_DD1 EP93XX_GPIO_LINE_H(1) -#define EP93XX_GPIO_LINE_DD2 EP93XX_GPIO_LINE_H(2) -#define EP93XX_GPIO_LINE_DD3 EP93XX_GPIO_LINE_H(3) -#define EP93XX_GPIO_LINE_DD4 EP93XX_GPIO_LINE_H(4) -#define EP93XX_GPIO_LINE_DD5 EP93XX_GPIO_LINE_H(5) -#define EP93XX_GPIO_LINE_DD6 EP93XX_GPIO_LINE_H(6) -#define EP93XX_GPIO_LINE_DD7 EP93XX_GPIO_LINE_H(7) - -/* maximum value for gpio line identifiers */ -#define EP93XX_GPIO_LINE_MAX EP93XX_GPIO_LINE_H(7) - -/* maximum value for irq capable line identifiers */ -#define EP93XX_GPIO_LINE_MAX_IRQ EP93XX_GPIO_LINE_F(7) - -#endif /* __GPIO_EP93XX_H */ diff --git a/arch/arm/mach-ep93xx/hardware.h b/arch/arm/mach-ep93xx/hardware.h deleted file mode 100644 index e7d850e04782..000000000000 --- a/arch/arm/mach-ep93xx/hardware.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/arm/mach-ep93xx/include/mach/hardware.h - */ - -#ifndef __ASM_ARCH_HARDWARE_H -#define __ASM_ARCH_HARDWARE_H - -#include "platform.h" - -/* - * The EP93xx has two external crystal oscillators. To generate the - * required high-frequency clocks, the processor uses two phase-locked- - * loops (PLLs) to multiply the incoming external clock signal to much - * higher frequencies that are then divided down by programmable dividers - * to produce the needed clocks. The PLLs operate independently of one - * another. - */ -#define EP93XX_EXT_CLK_RATE 14745600 -#define EP93XX_EXT_RTC_RATE 32768 - -#define EP93XX_KEYTCHCLK_DIV4 (EP93XX_EXT_CLK_RATE / 4) -#define EP93XX_KEYTCHCLK_DIV16 (EP93XX_EXT_CLK_RATE / 16) - -#endif diff --git a/arch/arm/mach-ep93xx/irqs.h b/arch/arm/mach-ep93xx/irqs.h deleted file mode 100644 index 353201b90c66..000000000000 --- a/arch/arm/mach-ep93xx/irqs.h +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARCH_IRQS_H -#define __ASM_ARCH_IRQS_H - -#define IRQ_EP93XX_VIC0 1 - -#define IRQ_EP93XX_COMMRX (IRQ_EP93XX_VIC0 + 2) -#define IRQ_EP93XX_COMMTX (IRQ_EP93XX_VIC0 + 3) -#define IRQ_EP93XX_TIMER1 (IRQ_EP93XX_VIC0 + 4) -#define IRQ_EP93XX_TIMER2 (IRQ_EP93XX_VIC0 + 5) -#define IRQ_EP93XX_AACINTR (IRQ_EP93XX_VIC0 + 6) -#define IRQ_EP93XX_DMAM2P0 (IRQ_EP93XX_VIC0 + 7) -#define IRQ_EP93XX_DMAM2P1 (IRQ_EP93XX_VIC0 + 8) -#define IRQ_EP93XX_DMAM2P2 (IRQ_EP93XX_VIC0 + 9) -#define IRQ_EP93XX_DMAM2P3 (IRQ_EP93XX_VIC0 + 10) -#define IRQ_EP93XX_DMAM2P4 (IRQ_EP93XX_VIC0 + 11) -#define IRQ_EP93XX_DMAM2P5 (IRQ_EP93XX_VIC0 + 12) -#define IRQ_EP93XX_DMAM2P6 (IRQ_EP93XX_VIC0 + 13) -#define IRQ_EP93XX_DMAM2P7 (IRQ_EP93XX_VIC0 + 14) -#define IRQ_EP93XX_DMAM2P8 (IRQ_EP93XX_VIC0 + 15) -#define IRQ_EP93XX_DMAM2P9 (IRQ_EP93XX_VIC0 + 16) -#define IRQ_EP93XX_DMAM2M0 (IRQ_EP93XX_VIC0 + 17) -#define IRQ_EP93XX_DMAM2M1 (IRQ_EP93XX_VIC0 + 18) -#define IRQ_EP93XX_GPIO0MUX (IRQ_EP93XX_VIC0 + 19) -#define IRQ_EP93XX_GPIO1MUX (IRQ_EP93XX_VIC0 + 20) -#define IRQ_EP93XX_GPIO2MUX (IRQ_EP93XX_VIC0 + 21) -#define IRQ_EP93XX_GPIO3MUX (IRQ_EP93XX_VIC0 + 22) -#define IRQ_EP93XX_UART1RX (IRQ_EP93XX_VIC0 + 23) -#define IRQ_EP93XX_UART1TX (IRQ_EP93XX_VIC0 + 24) -#define IRQ_EP93XX_UART2RX (IRQ_EP93XX_VIC0 + 25) -#define IRQ_EP93XX_UART2TX (IRQ_EP93XX_VIC0 + 26) -#define IRQ_EP93XX_UART3RX (IRQ_EP93XX_VIC0 + 27) -#define IRQ_EP93XX_UART3TX (IRQ_EP93XX_VIC0 + 28) -#define IRQ_EP93XX_KEY (IRQ_EP93XX_VIC0 + 29) -#define IRQ_EP93XX_TOUCH (IRQ_EP93XX_VIC0 + 30) -#define EP93XX_VIC1_VALID_IRQ_MASK 0x7ffffffc - -#define IRQ_EP93XX_VIC1 (IRQ_EP93XX_VIC0 + 32) - -#define IRQ_EP93XX_EXT0 (IRQ_EP93XX_VIC1 + 0) -#define IRQ_EP93XX_EXT1 (IRQ_EP93XX_VIC1 + 1) -#define IRQ_EP93XX_EXT2 (IRQ_EP93XX_VIC1 + 2) -#define IRQ_EP93XX_64HZ (IRQ_EP93XX_VIC1 + 3) -#define IRQ_EP93XX_WATCHDOG (IRQ_EP93XX_VIC1 + 4) -#define IRQ_EP93XX_RTC (IRQ_EP93XX_VIC1 + 5) -#define IRQ_EP93XX_IRDA (IRQ_EP93XX_VIC1 + 6) -#define IRQ_EP93XX_ETHERNET (IRQ_EP93XX_VIC1 + 7) -#define IRQ_EP93XX_EXT3 (IRQ_EP93XX_VIC1 + 8) -#define IRQ_EP93XX_PROG (IRQ_EP93XX_VIC1 + 9) -#define IRQ_EP93XX_1HZ (IRQ_EP93XX_VIC1 + 10) -#define IRQ_EP93XX_VSYNC (IRQ_EP93XX_VIC1 + 11) -#define IRQ_EP93XX_VIDEO_FIFO (IRQ_EP93XX_VIC1 + 12) -#define IRQ_EP93XX_SSP1RX (IRQ_EP93XX_VIC1 + 13) -#define IRQ_EP93XX_SSP1TX (IRQ_EP93XX_VIC1 + 14) -#define IRQ_EP93XX_GPIO4MUX (IRQ_EP93XX_VIC1 + 15) -#define IRQ_EP93XX_GPIO5MUX (IRQ_EP93XX_VIC1 + 16) -#define IRQ_EP93XX_GPIO6MUX (IRQ_EP93XX_VIC1 + 17) -#define IRQ_EP93XX_GPIO7MUX (IRQ_EP93XX_VIC1 + 18) -#define IRQ_EP93XX_TIMER3 (IRQ_EP93XX_VIC1 + 19) -#define IRQ_EP93XX_UART1 (IRQ_EP93XX_VIC1 + 20) -#define IRQ_EP93XX_SSP (IRQ_EP93XX_VIC1 + 21) -#define IRQ_EP93XX_UART2 (IRQ_EP93XX_VIC1 + 22) -#define IRQ_EP93XX_UART3 (IRQ_EP93XX_VIC1 + 23) -#define IRQ_EP93XX_USB (IRQ_EP93XX_VIC1 + 24) -#define IRQ_EP93XX_ETHERNET_PME (IRQ_EP93XX_VIC1 + 25) -#define IRQ_EP93XX_DSP (IRQ_EP93XX_VIC1 + 26) -#define IRQ_EP93XX_GPIO_AB (IRQ_EP93XX_VIC1 + 27) -#define IRQ_EP93XX_SAI (IRQ_EP93XX_VIC1 + 28) -#define EP93XX_VIC2_VALID_IRQ_MASK 0x1fffffff - -#define NR_EP93XX_IRQS (IRQ_EP93XX_VIC1 + 32 + 24) - -#define EP93XX_BOARD_IRQ(x) (NR_EP93XX_IRQS + (x)) -#define EP93XX_BOARD_IRQS 32 - -#endif diff --git a/arch/arm/mach-ep93xx/platform.h b/arch/arm/mach-ep93xx/platform.h deleted file mode 100644 index 5fb1b919133f..000000000000 --- a/arch/arm/mach-ep93xx/platform.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/arm/mach-ep93xx/include/mach/platform.h - */ - -#ifndef __ASSEMBLY__ - -#include <linux/platform_data/eth-ep93xx.h> -#include <linux/reboot.h> - -struct device; -struct i2c_board_info; -struct spi_board_info; -struct platform_device; -struct ep93xxfb_mach_info; -struct ep93xx_keypad_platform_data; -struct ep93xx_spi_info; - -void ep93xx_map_io(void); -void ep93xx_init_irq(void); - -void ep93xx_register_flash(unsigned int width, - resource_size_t start, resource_size_t size); - -void ep93xx_register_eth(struct ep93xx_eth_data *data, int copy_addr); -void ep93xx_register_i2c(struct i2c_board_info *devices, int num); -void ep93xx_register_spi(struct ep93xx_spi_info *info, - struct spi_board_info *devices, int num); -void ep93xx_register_fb(struct ep93xxfb_mach_info *data); -void ep93xx_register_pwm(int pwm0, int pwm1); -void ep93xx_register_keypad(struct ep93xx_keypad_platform_data *data); -void ep93xx_register_i2s(void); -void ep93xx_register_ac97(void); -void ep93xx_register_ide(void); -void ep93xx_register_adc(void); - -struct device *ep93xx_init_devices(void); -extern void ep93xx_timer_init(void); - -void ep93xx_restart(enum reboot_mode, const char *); - -#endif diff --git a/arch/arm/mach-ep93xx/soc.h b/arch/arm/mach-ep93xx/soc.h deleted file mode 100644 index 3245ebbd5069..000000000000 --- a/arch/arm/mach-ep93xx/soc.h +++ /dev/null @@ -1,212 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * arch/arm/mach-ep93xx/soc.h - * - * Copyright (C) 2012 Open Kernel Labs <www.ok-labs.com> - * Copyright (C) 2012 Ryan Mallon <rmallon@gmail.com> - */ - -#ifndef _EP93XX_SOC_H -#define _EP93XX_SOC_H - -#include "ep93xx-regs.h" -#include "irqs.h" - -/* - * EP93xx Physical Memory Map: - * - * The ASDO pin is sampled at system reset to select a synchronous or - * asynchronous boot configuration. When ASDO is "1" (i.e. pulled-up) - * the synchronous boot mode is selected. When ASDO is "0" (i.e - * pulled-down) the asynchronous boot mode is selected. - * - * In synchronous boot mode nSDCE3 is decoded starting at physical address - * 0x00000000 and nCS0 is decoded starting at 0xf0000000. For asynchronous - * boot mode they are swapped with nCS0 decoded at 0x00000000 ann nSDCE3 - * decoded at 0xf0000000. - * - * There is known errata for the EP93xx dealing with External Memory - * Configurations. Please refer to "AN273: EP93xx Silicon Rev E Design - * Guidelines" for more information. This document can be found at: - * - * http://www.cirrus.com/en/pubs/appNote/AN273REV4.pdf - */ - -#define EP93XX_CS0_PHYS_BASE_ASYNC 0x00000000 /* ASDO Pin = 0 */ -#define EP93XX_SDCE3_PHYS_BASE_SYNC 0x00000000 /* ASDO Pin = 1 */ -#define EP93XX_CS1_PHYS_BASE 0x10000000 -#define EP93XX_CS2_PHYS_BASE 0x20000000 -#define EP93XX_CS3_PHYS_BASE 0x30000000 -#define EP93XX_PCMCIA_PHYS_BASE 0x40000000 -#define EP93XX_CS6_PHYS_BASE 0x60000000 -#define EP93XX_CS7_PHYS_BASE 0x70000000 -#define EP93XX_SDCE0_PHYS_BASE 0xc0000000 -#define EP93XX_SDCE1_PHYS_BASE 0xd0000000 -#define EP93XX_SDCE2_PHYS_BASE 0xe0000000 -#define EP93XX_SDCE3_PHYS_BASE_ASYNC 0xf0000000 /* ASDO Pin = 0 */ -#define EP93XX_CS0_PHYS_BASE_SYNC 0xf0000000 /* ASDO Pin = 1 */ - -/* AHB peripherals */ -#define EP93XX_DMA_BASE EP93XX_AHB_IOMEM(0x00000000) - -#define EP93XX_ETHERNET_PHYS_BASE EP93XX_AHB_PHYS(0x00010000) -#define EP93XX_ETHERNET_BASE EP93XX_AHB_IOMEM(0x00010000) - -#define EP93XX_USB_PHYS_BASE EP93XX_AHB_PHYS(0x00020000) -#define EP93XX_USB_BASE EP93XX_AHB_IOMEM(0x00020000) - -#define EP93XX_RASTER_PHYS_BASE EP93XX_AHB_PHYS(0x00030000) -#define EP93XX_RASTER_BASE EP93XX_AHB_IOMEM(0x00030000) - -#define EP93XX_GRAPHICS_ACCEL_BASE EP93XX_AHB_IOMEM(0x00040000) - -#define EP93XX_SDRAM_CONTROLLER_BASE EP93XX_AHB_IOMEM(0x00060000) - -#define EP93XX_PCMCIA_CONTROLLER_BASE EP93XX_AHB_IOMEM(0x00080000) - -#define EP93XX_BOOT_ROM_BASE EP93XX_AHB_IOMEM(0x00090000) - -#define EP93XX_IDE_PHYS_BASE EP93XX_AHB_PHYS(0x000a0000) -#define EP93XX_IDE_BASE EP93XX_AHB_IOMEM(0x000a0000) - -#define EP93XX_VIC1_BASE EP93XX_AHB_IOMEM(0x000b0000) - -#define EP93XX_VIC2_BASE EP93XX_AHB_IOMEM(0x000c0000) - -/* APB peripherals */ -#define EP93XX_TIMER_BASE EP93XX_APB_IOMEM(0x00010000) - -#define EP93XX_I2S_PHYS_BASE EP93XX_APB_PHYS(0x00020000) -#define EP93XX_I2S_BASE EP93XX_APB_IOMEM(0x00020000) - -#define EP93XX_SECURITY_BASE EP93XX_APB_IOMEM(0x00030000) - -#define EP93XX_AAC_PHYS_BASE EP93XX_APB_PHYS(0x00080000) -#define EP93XX_AAC_BASE EP93XX_APB_IOMEM(0x00080000) - -#define EP93XX_SPI_PHYS_BASE EP93XX_APB_PHYS(0x000a0000) -#define EP93XX_SPI_BASE EP93XX_APB_IOMEM(0x000a0000) - -#define EP93XX_IRDA_BASE EP93XX_APB_IOMEM(0x000b0000) - -#define EP93XX_KEY_MATRIX_PHYS_BASE EP93XX_APB_PHYS(0x000f0000) -#define EP93XX_KEY_MATRIX_BASE EP93XX_APB_IOMEM(0x000f0000) - -#define EP93XX_ADC_PHYS_BASE EP93XX_APB_PHYS(0x00100000) -#define EP93XX_ADC_BASE EP93XX_APB_IOMEM(0x00100000) -#define EP93XX_TOUCHSCREEN_BASE EP93XX_APB_IOMEM(0x00100000) - -#define EP93XX_PWM_PHYS_BASE EP93XX_APB_PHYS(0x00110000) -#define EP93XX_PWM_BASE EP93XX_APB_IOMEM(0x00110000) - -#define EP93XX_RTC_PHYS_BASE EP93XX_APB_PHYS(0x00120000) -#define EP93XX_RTC_BASE EP93XX_APB_IOMEM(0x00120000) - -#define EP93XX_WATCHDOG_PHYS_BASE EP93XX_APB_PHYS(0x00140000) -#define EP93XX_WATCHDOG_BASE EP93XX_APB_IOMEM(0x00140000) - -/* System controller */ -#define EP93XX_SYSCON_BASE EP93XX_APB_IOMEM(0x00130000) -#define EP93XX_SYSCON_REG(x) (EP93XX_SYSCON_BASE + (x)) -#define EP93XX_SYSCON_POWER_STATE EP93XX_SYSCON_REG(0x00) -#define EP93XX_SYSCON_PWRCNT EP93XX_SYSCON_REG(0x04) -#define EP93XX_SYSCON_PWRCNT_FIR_EN (1<<31) -#define EP93XX_SYSCON_PWRCNT_UARTBAUD (1<<29) -#define EP93XX_SYSCON_PWRCNT_USH_EN 28 -#define EP93XX_SYSCON_PWRCNT_DMA_M2M1 27 -#define EP93XX_SYSCON_PWRCNT_DMA_M2M0 26 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P8 25 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P9 24 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P6 23 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P7 22 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P4 21 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P5 20 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P2 19 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P3 18 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P0 17 -#define EP93XX_SYSCON_PWRCNT_DMA_M2P1 16 -#define EP93XX_SYSCON_HALT EP93XX_SYSCON_REG(0x08) -#define EP93XX_SYSCON_STANDBY EP93XX_SYSCON_REG(0x0c) -#define EP93XX_SYSCON_CLKSET1 EP93XX_SYSCON_REG(0x20) -#define EP93XX_SYSCON_CLKSET1_NBYP1 (1<<23) -#define EP93XX_SYSCON_CLKSET2 EP93XX_SYSCON_REG(0x24) -#define EP93XX_SYSCON_CLKSET2_NBYP2 (1<<19) -#define EP93XX_SYSCON_CLKSET2_PLL2_EN (1<<18) -#define EP93XX_SYSCON_DEVCFG EP93XX_SYSCON_REG(0x80) -#define EP93XX_SYSCON_DEVCFG_SWRST (1<<31) -#define EP93XX_SYSCON_DEVCFG_D1ONG (1<<30) -#define EP93XX_SYSCON_DEVCFG_D0ONG (1<<29) -#define EP93XX_SYSCON_DEVCFG_IONU2 (1<<28) -#define EP93XX_SYSCON_DEVCFG_GONK (1<<27) -#define EP93XX_SYSCON_DEVCFG_TONG (1<<26) -#define EP93XX_SYSCON_DEVCFG_MONG (1<<25) -#define EP93XX_SYSCON_DEVCFG_U3EN 24 -#define EP93XX_SYSCON_DEVCFG_CPENA (1<<23) -#define EP93XX_SYSCON_DEVCFG_A2ONG (1<<22) -#define EP93XX_SYSCON_DEVCFG_A1ONG (1<<21) -#define EP93XX_SYSCON_DEVCFG_U2EN 20 -#define EP93XX_SYSCON_DEVCFG_EXVC (1<<19) -#define EP93XX_SYSCON_DEVCFG_U1EN 18 -#define EP93XX_SYSCON_DEVCFG_TIN (1<<17) -#define EP93XX_SYSCON_DEVCFG_HC3IN (1<<15) -#define EP93XX_SYSCON_DEVCFG_HC3EN (1<<14) -#define EP93XX_SYSCON_DEVCFG_HC1IN (1<<13) -#define EP93XX_SYSCON_DEVCFG_HC1EN (1<<12) -#define EP93XX_SYSCON_DEVCFG_HONIDE (1<<11) -#define EP93XX_SYSCON_DEVCFG_GONIDE (1<<10) -#define EP93XX_SYSCON_DEVCFG_PONG (1<<9) -#define EP93XX_SYSCON_DEVCFG_EONIDE (1<<8) -#define EP93XX_SYSCON_DEVCFG_I2SONSSP (1<<7) -#define EP93XX_SYSCON_DEVCFG_I2SONAC97 (1<<6) -#define EP93XX_SYSCON_DEVCFG_RASONP3 (1<<4) -#define EP93XX_SYSCON_DEVCFG_RAS (1<<3) -#define EP93XX_SYSCON_DEVCFG_ADCPD (1<<2) -#define EP93XX_SYSCON_DEVCFG_KEYS (1<<1) -#define EP93XX_SYSCON_DEVCFG_SHENA (1<<0) -#define EP93XX_SYSCON_VIDCLKDIV EP93XX_SYSCON_REG(0x84) -#define EP93XX_SYSCON_CLKDIV_ENABLE 15 -#define EP93XX_SYSCON_CLKDIV_ESEL (1<<14) -#define EP93XX_SYSCON_CLKDIV_PSEL (1<<13) -#define EP93XX_SYSCON_CLKDIV_PDIV_SHIFT 8 -#define EP93XX_SYSCON_I2SCLKDIV EP93XX_SYSCON_REG(0x8c) -#define EP93XX_SYSCON_I2SCLKDIV_SENA 31 -#define EP93XX_SYSCON_I2SCLKDIV_ORIDE (1<<29) -#define EP93XX_SYSCON_I2SCLKDIV_SPOL (1<<19) -#define EP93XX_I2SCLKDIV_SDIV (1 << 16) -#define EP93XX_I2SCLKDIV_LRDIV32 (0 << 17) -#define EP93XX_I2SCLKDIV_LRDIV64 (1 << 17) -#define EP93XX_I2SCLKDIV_LRDIV128 (2 << 17) -#define EP93XX_I2SCLKDIV_LRDIV_MASK (3 << 17) -#define EP93XX_SYSCON_KEYTCHCLKDIV EP93XX_SYSCON_REG(0x90) -#define EP93XX_SYSCON_KEYTCHCLKDIV_TSEN 31 -#define EP93XX_SYSCON_KEYTCHCLKDIV_ADIV 16 -#define EP93XX_SYSCON_KEYTCHCLKDIV_KEN 15 -#define EP93XX_SYSCON_KEYTCHCLKDIV_KDIV (1<<0) -#define EP93XX_SYSCON_SYSCFG EP93XX_SYSCON_REG(0x9c) -#define EP93XX_SYSCON_SYSCFG_REV_MASK (0xf0000000) -#define EP93XX_SYSCON_SYSCFG_REV_SHIFT (28) -#define EP93XX_SYSCON_SYSCFG_SBOOT (1<<8) -#define EP93XX_SYSCON_SYSCFG_LCSN7 (1<<7) -#define EP93XX_SYSCON_SYSCFG_LCSN6 (1<<6) -#define EP93XX_SYSCON_SYSCFG_LASDO (1<<5) -#define EP93XX_SYSCON_SYSCFG_LEEDA (1<<4) -#define EP93XX_SYSCON_SYSCFG_LEECLK (1<<3) -#define EP93XX_SYSCON_SYSCFG_LCSN2 (1<<1) -#define EP93XX_SYSCON_SYSCFG_LCSN1 (1<<0) -#define EP93XX_SYSCON_SWLOCK EP93XX_SYSCON_REG(0xc0) - -/* EP93xx System Controller software locked register write */ -void ep93xx_syscon_swlocked_write(unsigned int val, void __iomem *reg); -void ep93xx_devcfg_set_clear(unsigned int set_bits, unsigned int clear_bits); - -static inline void ep93xx_devcfg_set_bits(unsigned int bits) -{ - ep93xx_devcfg_set_clear(bits, 0x00); -} - -static inline void ep93xx_devcfg_clear_bits(unsigned int bits) -{ - ep93xx_devcfg_set_clear(0x00, bits); -} - -#endif /* _EP93XX_SOC_H */ diff --git a/arch/arm/mach-ep93xx/timer-ep93xx.c b/arch/arm/mach-ep93xx/timer-ep93xx.c deleted file mode 100644 index a9efa7bc2fa1..000000000000 --- a/arch/arm/mach-ep93xx/timer-ep93xx.c +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/clocksource.h> -#include <linux/clockchips.h> -#include <linux/sched_clock.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/io.h> -#include <asm/mach/time.h> -#include "soc.h" -#include "platform.h" - -/************************************************************************* - * Timer handling for EP93xx - ************************************************************************* - * The ep93xx has four internal timers. Timers 1, 2 (both 16 bit) and - * 3 (32 bit) count down at 508 kHz, are self-reloading, and can generate - * an interrupt on underflow. Timer 4 (40 bit) counts down at 983.04 kHz, - * is free-running, and can't generate interrupts. - * - * The 508 kHz timers are ideal for use for the timer interrupt, as the - * most common values of HZ divide 508 kHz nicely. We pick the 32 bit - * timer (timer 3) to get as long sleep intervals as possible when using - * CONFIG_NO_HZ. - * - * The higher clock rate of timer 4 makes it a better choice than the - * other timers for use as clock source and for sched_clock(), providing - * a stable 40 bit time base. - ************************************************************************* - */ -#define EP93XX_TIMER_REG(x) (EP93XX_TIMER_BASE + (x)) -#define EP93XX_TIMER1_LOAD EP93XX_TIMER_REG(0x00) -#define EP93XX_TIMER1_VALUE EP93XX_TIMER_REG(0x04) -#define EP93XX_TIMER1_CONTROL EP93XX_TIMER_REG(0x08) -#define EP93XX_TIMER123_CONTROL_ENABLE (1 << 7) -#define EP93XX_TIMER123_CONTROL_MODE (1 << 6) -#define EP93XX_TIMER123_CONTROL_CLKSEL (1 << 3) -#define EP93XX_TIMER1_CLEAR EP93XX_TIMER_REG(0x0c) -#define EP93XX_TIMER2_LOAD EP93XX_TIMER_REG(0x20) -#define EP93XX_TIMER2_VALUE EP93XX_TIMER_REG(0x24) -#define EP93XX_TIMER2_CONTROL EP93XX_TIMER_REG(0x28) -#define EP93XX_TIMER2_CLEAR EP93XX_TIMER_REG(0x2c) -#define EP93XX_TIMER4_VALUE_LOW EP93XX_TIMER_REG(0x60) -#define EP93XX_TIMER4_VALUE_HIGH EP93XX_TIMER_REG(0x64) -#define EP93XX_TIMER4_VALUE_HIGH_ENABLE (1 << 8) -#define EP93XX_TIMER3_LOAD EP93XX_TIMER_REG(0x80) -#define EP93XX_TIMER3_VALUE EP93XX_TIMER_REG(0x84) -#define EP93XX_TIMER3_CONTROL EP93XX_TIMER_REG(0x88) -#define EP93XX_TIMER3_CLEAR EP93XX_TIMER_REG(0x8c) - -#define EP93XX_TIMER123_RATE 508469 -#define EP93XX_TIMER4_RATE 983040 - -static u64 notrace ep93xx_read_sched_clock(void) -{ - u64 ret; - - ret = readl(EP93XX_TIMER4_VALUE_LOW); - ret |= ((u64) (readl(EP93XX_TIMER4_VALUE_HIGH) & 0xff) << 32); - return ret; -} - -static u64 ep93xx_clocksource_read(struct clocksource *c) -{ - u64 ret; - - ret = readl(EP93XX_TIMER4_VALUE_LOW); - ret |= ((u64) (readl(EP93XX_TIMER4_VALUE_HIGH) & 0xff) << 32); - return (u64) ret; -} - -static int ep93xx_clkevt_set_next_event(unsigned long next, - struct clock_event_device *evt) -{ - /* Default mode: periodic, off, 508 kHz */ - u32 tmode = EP93XX_TIMER123_CONTROL_MODE | - EP93XX_TIMER123_CONTROL_CLKSEL; - - /* Clear timer */ - writel(tmode, EP93XX_TIMER3_CONTROL); - - /* Set next event */ - writel(next, EP93XX_TIMER3_LOAD); - writel(tmode | EP93XX_TIMER123_CONTROL_ENABLE, - EP93XX_TIMER3_CONTROL); - return 0; -} - - -static int ep93xx_clkevt_shutdown(struct clock_event_device *evt) -{ - /* Disable timer */ - writel(0, EP93XX_TIMER3_CONTROL); - - return 0; -} - -static struct clock_event_device ep93xx_clockevent = { - .name = "timer1", - .features = CLOCK_EVT_FEAT_ONESHOT, - .set_state_shutdown = ep93xx_clkevt_shutdown, - .set_state_oneshot = ep93xx_clkevt_shutdown, - .tick_resume = ep93xx_clkevt_shutdown, - .set_next_event = ep93xx_clkevt_set_next_event, - .rating = 300, -}; - -static irqreturn_t ep93xx_timer_interrupt(int irq, void *dev_id) -{ - struct clock_event_device *evt = dev_id; - - /* Writing any value clears the timer interrupt */ - writel(1, EP93XX_TIMER3_CLEAR); - - evt->event_handler(evt); - - return IRQ_HANDLED; -} - -void __init ep93xx_timer_init(void) -{ - int irq = IRQ_EP93XX_TIMER3; - unsigned long flags = IRQF_TIMER | IRQF_IRQPOLL; - - /* Enable and register clocksource and sched_clock on timer 4 */ - writel(EP93XX_TIMER4_VALUE_HIGH_ENABLE, - EP93XX_TIMER4_VALUE_HIGH); - clocksource_mmio_init(NULL, "timer4", - EP93XX_TIMER4_RATE, 200, 40, - ep93xx_clocksource_read); - sched_clock_register(ep93xx_read_sched_clock, 40, - EP93XX_TIMER4_RATE); - - /* Set up clockevent on timer 3 */ - if (request_irq(irq, ep93xx_timer_interrupt, flags, "ep93xx timer", - &ep93xx_clockevent)) - pr_err("Failed to request irq %d (ep93xx timer)\n", irq); - clockevents_config_and_register(&ep93xx_clockevent, - EP93XX_TIMER123_RATE, - 1, - 0xffffffffU); -} diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c deleted file mode 100644 index d3de7283ecb3..000000000000 --- a/arch/arm/mach-ep93xx/ts72xx.c +++ /dev/null @@ -1,422 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/arm/mach-ep93xx/ts72xx.c - * Technologic Systems TS72xx SBC support. - * - * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/platform_device.h> -#include <linux/io.h> -#include <linux/mtd/platnand.h> -#include <linux/spi/spi.h> -#include <linux/spi/flash.h> -#include <linux/spi/mmc_spi.h> -#include <linux/mmc/host.h> -#include <linux/platform_data/spi-ep93xx.h> -#include <linux/gpio/machine.h> - -#include "gpio-ep93xx.h" -#include "hardware.h" - -#include <asm/mach-types.h> -#include <asm/mach/map.h> -#include <asm/mach/arch.h> - -#include "soc.h" -#include "ts72xx.h" - -/************************************************************************* - * IO map - *************************************************************************/ -static struct map_desc ts72xx_io_desc[] __initdata = { - { - .virtual = (unsigned long)TS72XX_MODEL_VIRT_BASE, - .pfn = __phys_to_pfn(TS72XX_MODEL_PHYS_BASE), - .length = TS72XX_MODEL_SIZE, - .type = MT_DEVICE, - }, { - .virtual = (unsigned long)TS72XX_OPTIONS_VIRT_BASE, - .pfn = __phys_to_pfn(TS72XX_OPTIONS_PHYS_BASE), - .length = TS72XX_OPTIONS_SIZE, - .type = MT_DEVICE, - }, { - .virtual = (unsigned long)TS72XX_OPTIONS2_VIRT_BASE, - .pfn = __phys_to_pfn(TS72XX_OPTIONS2_PHYS_BASE), - .length = TS72XX_OPTIONS2_SIZE, - .type = MT_DEVICE, - }, { - .virtual = (unsigned long)TS72XX_CPLDVER_VIRT_BASE, - .pfn = __phys_to_pfn(TS72XX_CPLDVER_PHYS_BASE), - .length = TS72XX_CPLDVER_SIZE, - .type = MT_DEVICE, - } -}; - -static void __init ts72xx_map_io(void) -{ - ep93xx_map_io(); - iotable_init(ts72xx_io_desc, ARRAY_SIZE(ts72xx_io_desc)); -} - - -/************************************************************************* - * NAND flash - *************************************************************************/ -#define TS72XX_NAND_CONTROL_ADDR_LINE 22 /* 0xN0400000 */ -#define TS72XX_NAND_BUSY_ADDR_LINE 23 /* 0xN0800000 */ - -static void ts72xx_nand_hwcontrol(struct nand_chip *chip, - int cmd, unsigned int ctrl) -{ - if (ctrl & NAND_CTRL_CHANGE) { - void __iomem *addr = chip->legacy.IO_ADDR_R; - unsigned char bits; - - addr += (1 << TS72XX_NAND_CONTROL_ADDR_LINE); - - bits = __raw_readb(addr) & ~0x07; - bits |= (ctrl & NAND_NCE) << 2; /* bit 0 -> bit 2 */ - bits |= (ctrl & NAND_CLE); /* bit 1 -> bit 1 */ - bits |= (ctrl & NAND_ALE) >> 2; /* bit 2 -> bit 0 */ - - __raw_writeb(bits, addr); - } - - if (cmd != NAND_CMD_NONE) - __raw_writeb(cmd, chip->legacy.IO_ADDR_W); -} - -static int ts72xx_nand_device_ready(struct nand_chip *chip) -{ - void __iomem *addr = chip->legacy.IO_ADDR_R; - - addr += (1 << TS72XX_NAND_BUSY_ADDR_LINE); - - return !!(__raw_readb(addr) & 0x20); -} - -#define TS72XX_BOOTROM_PART_SIZE (SZ_16K) -#define TS72XX_REDBOOT_PART_SIZE (SZ_2M + SZ_1M) - -static struct mtd_partition ts72xx_nand_parts[] = { - { - .name = "TS-BOOTROM", - .offset = 0, - .size = TS72XX_BOOTROM_PART_SIZE, - .mask_flags = MTD_WRITEABLE, /* force read-only */ - }, { - .name = "Linux", - .offset = MTDPART_OFS_RETAIN, - .size = TS72XX_REDBOOT_PART_SIZE, - /* leave so much for last partition */ - }, { - .name = "RedBoot", - .offset = MTDPART_OFS_APPEND, - .size = MTDPART_SIZ_FULL, - .mask_flags = MTD_WRITEABLE, /* force read-only */ - }, -}; - -static struct platform_nand_data ts72xx_nand_data = { - .chip = { - .nr_chips = 1, - .chip_offset = 0, - .chip_delay = 15, - }, - .ctrl = { - .cmd_ctrl = ts72xx_nand_hwcontrol, - .dev_ready = ts72xx_nand_device_ready, - }, -}; - -static struct resource ts72xx_nand_resource[] = { - { - .start = 0, /* filled in later */ - .end = 0, /* filled in later */ - .flags = IORESOURCE_MEM, - }, -}; - -static struct platform_device ts72xx_nand_flash = { - .name = "gen_nand", - .id = -1, - .dev.platform_data = &ts72xx_nand_data, - .resource = ts72xx_nand_resource, - .num_resources = ARRAY_SIZE(ts72xx_nand_resource), -}; - -static void __init ts72xx_register_flash(struct mtd_partition *parts, int n, - resource_size_t start) -{ - /* - * TS7200 has NOR flash all other TS72xx board have NAND flash. - */ - if (board_is_ts7200()) { - ep93xx_register_flash(2, EP93XX_CS6_PHYS_BASE, SZ_16M); - } else { - ts72xx_nand_resource[0].start = start; - ts72xx_nand_resource[0].end = start + SZ_16M - 1; - - ts72xx_nand_data.chip.partitions = parts; - ts72xx_nand_data.chip.nr_partitions = n; - - platform_device_register(&ts72xx_nand_flash); - } -} - -/************************************************************************* - * RTC M48T86 - *************************************************************************/ -#define TS72XX_RTC_INDEX_PHYS_BASE (EP93XX_CS1_PHYS_BASE + 0x00800000) -#define TS72XX_RTC_DATA_PHYS_BASE (EP93XX_CS1_PHYS_BASE + 0x01700000) - -static struct resource ts72xx_rtc_resources[] = { - DEFINE_RES_MEM(TS72XX_RTC_INDEX_PHYS_BASE, 0x01), - DEFINE_RES_MEM(TS72XX_RTC_DATA_PHYS_BASE, 0x01), -}; - -static struct platform_device ts72xx_rtc_device = { - .name = "rtc-m48t86", - .id = -1, - .resource = ts72xx_rtc_resources, - .num_resources = ARRAY_SIZE(ts72xx_rtc_resources), -}; - -/************************************************************************* - * Watchdog (in CPLD) - *************************************************************************/ -#define TS72XX_WDT_CONTROL_PHYS_BASE (EP93XX_CS2_PHYS_BASE + 0x03800000) -#define TS72XX_WDT_FEED_PHYS_BASE (EP93XX_CS2_PHYS_BASE + 0x03c00000) - -static struct resource ts72xx_wdt_resources[] = { - DEFINE_RES_MEM(TS72XX_WDT_CONTROL_PHYS_BASE, 0x01), - DEFINE_RES_MEM(TS72XX_WDT_FEED_PHYS_BASE, 0x01), -}; - -static struct platform_device ts72xx_wdt_device = { - .name = "ts72xx-wdt", - .id = -1, - .resource = ts72xx_wdt_resources, - .num_resources = ARRAY_SIZE(ts72xx_wdt_resources), -}; - -/************************************************************************* - * ETH - *************************************************************************/ -static struct ep93xx_eth_data __initdata ts72xx_eth_data = { - .phy_id = 1, -}; - -/************************************************************************* - * SPI SD/MMC host - *************************************************************************/ -#define BK3_EN_SDCARD_PHYS_BASE 0x12400000 -#define BK3_EN_SDCARD_PWR 0x0 -#define BK3_DIS_SDCARD_PWR 0x0C -static void bk3_mmc_spi_setpower(struct device *dev, unsigned int vdd) -{ - void __iomem *pwr_sd = ioremap(BK3_EN_SDCARD_PHYS_BASE, SZ_4K); - - if (!pwr_sd) { - pr_err("Failed to enable SD card power!"); - return; - } - - pr_debug("%s: SD card pwr %s VDD:0x%x\n", __func__, - !!vdd ? "ON" : "OFF", vdd); - - if (!!vdd) - __raw_writeb(BK3_EN_SDCARD_PWR, pwr_sd); - else - __raw_writeb(BK3_DIS_SDCARD_PWR, pwr_sd); - - iounmap(pwr_sd); -} - -static struct mmc_spi_platform_data bk3_spi_mmc_data = { - .detect_delay = 500, - .powerup_msecs = 100, - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .caps = MMC_CAP_NONREMOVABLE, - .setpower = bk3_mmc_spi_setpower, -}; - -/************************************************************************* - * SPI Bus - SD card access - *************************************************************************/ -static struct spi_board_info bk3_spi_board_info[] __initdata = { - { - .modalias = "mmc_spi", - .platform_data = &bk3_spi_mmc_data, - .max_speed_hz = 7.4E6, - .bus_num = 0, - .chip_select = 0, - .mode = SPI_MODE_0, - }, -}; - -/* - * This is a stub -> the FGPIO[3] pin is not connected on the schematic - * The all work is performed automatically by !SPI_FRAME (SFRM1) and - * goes through CPLD - */ -static struct gpiod_lookup_table bk3_spi_cs_gpio_table = { - .dev_id = "spi0", - .table = { - GPIO_LOOKUP("F", 3, "cs", GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct ep93xx_spi_info bk3_spi_master __initdata = { - .use_dma = 1, -}; - -/************************************************************************* - * TS72XX support code - *************************************************************************/ -#if IS_ENABLED(CONFIG_FPGA_MGR_TS73XX) - -/* Relative to EP93XX_CS1_PHYS_BASE */ -#define TS73XX_FPGA_LOADER_BASE 0x03c00000 - -static struct resource ts73xx_fpga_resources[] = { - { - .start = EP93XX_CS1_PHYS_BASE + TS73XX_FPGA_LOADER_BASE, - .end = EP93XX_CS1_PHYS_BASE + TS73XX_FPGA_LOADER_BASE + 1, - .flags = IORESOURCE_MEM, - }, -}; - -static struct platform_device ts73xx_fpga_device = { - .name = "ts73xx-fpga-mgr", - .id = -1, - .resource = ts73xx_fpga_resources, - .num_resources = ARRAY_SIZE(ts73xx_fpga_resources), -}; - -#endif - -/************************************************************************* - * SPI Bus - *************************************************************************/ -static struct spi_board_info ts72xx_spi_devices[] __initdata = { - { - .modalias = "tmp122", - .max_speed_hz = 2 * 1000 * 1000, - .bus_num = 0, - .chip_select = 0, - }, -}; - -static struct gpiod_lookup_table ts72xx_spi_cs_gpio_table = { - .dev_id = "spi0", - .table = { - /* DIO_17 */ - GPIO_LOOKUP("F", 2, "cs", GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct ep93xx_spi_info ts72xx_spi_info __initdata = { - /* Intentionally left blank */ -}; - -static void __init ts72xx_init_machine(void) -{ - ep93xx_init_devices(); - ts72xx_register_flash(ts72xx_nand_parts, ARRAY_SIZE(ts72xx_nand_parts), - is_ts9420_installed() ? - EP93XX_CS7_PHYS_BASE : EP93XX_CS6_PHYS_BASE); - platform_device_register(&ts72xx_rtc_device); - platform_device_register(&ts72xx_wdt_device); - - ep93xx_register_eth(&ts72xx_eth_data, 1); -#if IS_ENABLED(CONFIG_FPGA_MGR_TS73XX) - if (board_is_ts7300()) - platform_device_register(&ts73xx_fpga_device); -#endif - gpiod_add_lookup_table(&ts72xx_spi_cs_gpio_table); - ep93xx_register_spi(&ts72xx_spi_info, ts72xx_spi_devices, - ARRAY_SIZE(ts72xx_spi_devices)); -} - -MACHINE_START(TS72XX, "Technologic Systems TS-72xx SBC") - /* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ts72xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = ts72xx_init_machine, - .restart = ep93xx_restart, -MACHINE_END - -/************************************************************************* - * EP93xx I2S audio peripheral handling - *************************************************************************/ -static struct resource ep93xx_i2s_resource[] = { - DEFINE_RES_MEM(EP93XX_I2S_PHYS_BASE, 0x100), - DEFINE_RES_IRQ_NAMED(IRQ_EP93XX_SAI, "spilink i2s slave"), -}; - -static struct platform_device ep93xx_i2s_device = { - .name = "ep93xx-spilink-i2s", - .id = -1, - .num_resources = ARRAY_SIZE(ep93xx_i2s_resource), - .resource = ep93xx_i2s_resource, -}; - -/************************************************************************* - * BK3 support code - *************************************************************************/ -static struct mtd_partition bk3_nand_parts[] = { - { - .name = "System", - .offset = 0x00000000, - .size = 0x01e00000, - }, { - .name = "Data", - .offset = 0x01e00000, - .size = 0x05f20000 - }, { - .name = "RedBoot", - .offset = 0x07d20000, - .size = 0x002e0000, - .mask_flags = MTD_WRITEABLE, /* force RO */ - }, -}; - -static void __init bk3_init_machine(void) -{ - ep93xx_init_devices(); - - ts72xx_register_flash(bk3_nand_parts, ARRAY_SIZE(bk3_nand_parts), - EP93XX_CS6_PHYS_BASE); - - ep93xx_register_eth(&ts72xx_eth_data, 1); - - gpiod_add_lookup_table(&bk3_spi_cs_gpio_table); - ep93xx_register_spi(&bk3_spi_master, bk3_spi_board_info, - ARRAY_SIZE(bk3_spi_board_info)); - - /* Configure ep93xx's I2S to use AC97 pins */ - ep93xx_devcfg_set_bits(EP93XX_SYSCON_DEVCFG_I2SONAC97); - platform_device_register(&ep93xx_i2s_device); -} - -MACHINE_START(BK3, "Liebherr controller BK3.1") - /* Maintainer: Lukasz Majewski <lukma@denx.de> */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS, - .map_io = ts72xx_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = bk3_init_machine, - .restart = ep93xx_restart, -MACHINE_END diff --git a/arch/arm/mach-ep93xx/ts72xx.h b/arch/arm/mach-ep93xx/ts72xx.h deleted file mode 100644 index 00b4941d29c9..000000000000 --- a/arch/arm/mach-ep93xx/ts72xx.h +++ /dev/null @@ -1,94 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/arm/mach-ep93xx/include/mach/ts72xx.h - */ - -/* - * TS72xx memory map: - * - * virt phys size - * febff000 22000000 4K model number register (bits 0-2) - * febfe000 22400000 4K options register - * febfd000 22800000 4K options register #2 - * febfc000 23400000 4K CPLD version register - */ - -#ifndef __TS72XX_H_ -#define __TS72XX_H_ - -#define TS72XX_MODEL_PHYS_BASE 0x22000000 -#define TS72XX_MODEL_VIRT_BASE IOMEM(0xfebff000) -#define TS72XX_MODEL_SIZE 0x00001000 - -#define TS72XX_MODEL_TS7200 0x00 -#define TS72XX_MODEL_TS7250 0x01 -#define TS72XX_MODEL_TS7260 0x02 -#define TS72XX_MODEL_TS7300 0x03 -#define TS72XX_MODEL_TS7400 0x04 -#define TS72XX_MODEL_MASK 0x07 - - -#define TS72XX_OPTIONS_PHYS_BASE 0x22400000 -#define TS72XX_OPTIONS_VIRT_BASE IOMEM(0xfebfe000) -#define TS72XX_OPTIONS_SIZE 0x00001000 - -#define TS72XX_OPTIONS_COM2_RS485 0x02 -#define TS72XX_OPTIONS_MAX197 0x01 - - -#define TS72XX_OPTIONS2_PHYS_BASE 0x22800000 -#define TS72XX_OPTIONS2_VIRT_BASE IOMEM(0xfebfd000) -#define TS72XX_OPTIONS2_SIZE 0x00001000 - -#define TS72XX_OPTIONS2_TS9420 0x04 -#define TS72XX_OPTIONS2_TS9420_BOOT 0x02 - -#define TS72XX_CPLDVER_PHYS_BASE 0x23400000 -#define TS72XX_CPLDVER_VIRT_BASE IOMEM(0xfebfc000) -#define TS72XX_CPLDVER_SIZE 0x00001000 - -#ifndef __ASSEMBLY__ - -static inline int ts72xx_model(void) -{ - return __raw_readb(TS72XX_MODEL_VIRT_BASE) & TS72XX_MODEL_MASK; -} - -static inline int board_is_ts7200(void) -{ - return ts72xx_model() == TS72XX_MODEL_TS7200; -} - -static inline int board_is_ts7250(void) -{ - return ts72xx_model() == TS72XX_MODEL_TS7250; -} - -static inline int board_is_ts7260(void) -{ - return ts72xx_model() == TS72XX_MODEL_TS7260; -} - -static inline int board_is_ts7300(void) -{ - return ts72xx_model() == TS72XX_MODEL_TS7300; -} - -static inline int board_is_ts7400(void) -{ - return ts72xx_model() == TS72XX_MODEL_TS7400; -} - -static inline int is_max197_installed(void) -{ - return !!(__raw_readb(TS72XX_OPTIONS_VIRT_BASE) & - TS72XX_OPTIONS_MAX197); -} - -static inline int is_ts9420_installed(void) -{ - return !!(__raw_readb(TS72XX_OPTIONS2_VIRT_BASE) & - TS72XX_OPTIONS2_TS9420); -} -#endif -#endif /* __TS72XX_H_ */ diff --git a/arch/arm/mach-ep93xx/vision_ep9307.c b/arch/arm/mach-ep93xx/vision_ep9307.c deleted file mode 100644 index 85f0dd7255a9..000000000000 --- a/arch/arm/mach-ep93xx/vision_ep9307.c +++ /dev/null @@ -1,319 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/arm/mach-ep93xx/vision_ep9307.c - * Vision Engraving Systems EP9307 SoM support. - * - * Copyright (C) 2008-2011 Vision Engraving Systems - * H Hartley Sweeten <hsweeten@visionengravers.com> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/platform_device.h> -#include <linux/irq.h> -#include <linux/gpio.h> -#include <linux/gpio/machine.h> -#include <linux/fb.h> -#include <linux/io.h> -#include <linux/mtd/partitions.h> -#include <linux/i2c.h> -#include <linux/platform_data/pca953x.h> -#include <linux/spi/spi.h> -#include <linux/spi/flash.h> -#include <linux/spi/mmc_spi.h> -#include <linux/mmc/host.h> - -#include <sound/cs4271.h> - -#include "hardware.h" -#include <linux/platform_data/video-ep93xx.h> -#include <linux/platform_data/spi-ep93xx.h> -#include "gpio-ep93xx.h" - -#include <asm/mach-types.h> -#include <asm/mach/map.h> -#include <asm/mach/arch.h> - -#include "soc.h" - -/************************************************************************* - * Static I/O mappings for the FPGA - *************************************************************************/ -#define VISION_PHYS_BASE EP93XX_CS7_PHYS_BASE -#define VISION_VIRT_BASE 0xfebff000 - -static struct map_desc vision_io_desc[] __initdata = { - { - .virtual = VISION_VIRT_BASE, - .pfn = __phys_to_pfn(VISION_PHYS_BASE), - .length = SZ_4K, - .type = MT_DEVICE, - }, -}; - -static void __init vision_map_io(void) -{ - ep93xx_map_io(); - - iotable_init(vision_io_desc, ARRAY_SIZE(vision_io_desc)); -} - -/************************************************************************* - * Ethernet - *************************************************************************/ -static struct ep93xx_eth_data vision_eth_data __initdata = { - .phy_id = 1, -}; - -/************************************************************************* - * Framebuffer - *************************************************************************/ -#define VISION_LCD_ENABLE EP93XX_GPIO_LINE_EGPIO1 - -static int vision_lcd_setup(struct platform_device *pdev) -{ - int err; - - err = gpio_request_one(VISION_LCD_ENABLE, GPIOF_OUT_INIT_HIGH, dev_name(&pdev->dev)); - if (err) - return err; - - ep93xx_devcfg_clear_bits(EP93XX_SYSCON_DEVCFG_RAS | - EP93XX_SYSCON_DEVCFG_RASONP3 | - EP93XX_SYSCON_DEVCFG_EXVC); - - return 0; -} - -static void vision_lcd_teardown(struct platform_device *pdev) -{ - gpio_free(VISION_LCD_ENABLE); -} - -static void vision_lcd_blank(int blank_mode, struct fb_info *info) -{ - if (blank_mode) - gpio_set_value(VISION_LCD_ENABLE, 0); - else - gpio_set_value(VISION_LCD_ENABLE, 1); -} - -static struct ep93xxfb_mach_info ep93xxfb_info __initdata = { - .flags = EP93XXFB_USE_SDCSN0 | EP93XXFB_PCLK_FALLING, - .setup = vision_lcd_setup, - .teardown = vision_lcd_teardown, - .blank = vision_lcd_blank, -}; - - -/************************************************************************* - * GPIO Expanders - *************************************************************************/ -#define PCA9539_74_GPIO_BASE (EP93XX_GPIO_LINE_MAX + 1) -#define PCA9539_75_GPIO_BASE (PCA9539_74_GPIO_BASE + 16) -#define PCA9539_76_GPIO_BASE (PCA9539_75_GPIO_BASE + 16) -#define PCA9539_77_GPIO_BASE (PCA9539_76_GPIO_BASE + 16) - -static struct pca953x_platform_data pca953x_74_gpio_data = { - .gpio_base = PCA9539_74_GPIO_BASE, - .irq_base = EP93XX_BOARD_IRQ(0), -}; - -static struct pca953x_platform_data pca953x_75_gpio_data = { - .gpio_base = PCA9539_75_GPIO_BASE, - .irq_base = -1, -}; - -static struct pca953x_platform_data pca953x_76_gpio_data = { - .gpio_base = PCA9539_76_GPIO_BASE, - .irq_base = -1, -}; - -static struct pca953x_platform_data pca953x_77_gpio_data = { - .gpio_base = PCA9539_77_GPIO_BASE, - .irq_base = -1, -}; - -/************************************************************************* - * I2C Bus - *************************************************************************/ - -static struct i2c_board_info vision_i2c_info[] __initdata = { - { - I2C_BOARD_INFO("isl1208", 0x6f), - .irq = IRQ_EP93XX_EXT1, - }, { - I2C_BOARD_INFO("pca9539", 0x74), - .platform_data = &pca953x_74_gpio_data, - }, { - I2C_BOARD_INFO("pca9539", 0x75), - .platform_data = &pca953x_75_gpio_data, - }, { - I2C_BOARD_INFO("pca9539", 0x76), - .platform_data = &pca953x_76_gpio_data, - }, { - I2C_BOARD_INFO("pca9539", 0x77), - .platform_data = &pca953x_77_gpio_data, - }, -}; - -/************************************************************************* - * SPI CS4271 Audio Codec - *************************************************************************/ -static struct cs4271_platform_data vision_cs4271_data = { - /* Intentionally left blank */ -}; - -/************************************************************************* - * SPI Flash - *************************************************************************/ -static struct mtd_partition vision_spi_flash_partitions[] = { - { - .name = "SPI bootstrap", - .offset = 0, - .size = SZ_4K, - }, { - .name = "Bootstrap config", - .offset = MTDPART_OFS_APPEND, - .size = SZ_4K, - }, { - .name = "System config", - .offset = MTDPART_OFS_APPEND, - .size = MTDPART_SIZ_FULL, - }, -}; - -static struct flash_platform_data vision_spi_flash_data = { - .name = "SPI Flash", - .parts = vision_spi_flash_partitions, - .nr_parts = ARRAY_SIZE(vision_spi_flash_partitions), -}; - -/************************************************************************* - * SPI SD/MMC host - *************************************************************************/ -static struct mmc_spi_platform_data vision_spi_mmc_data = { - .detect_delay = 100, - .powerup_msecs = 100, - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .caps2 = MMC_CAP2_RO_ACTIVE_HIGH, -}; - -static struct gpiod_lookup_table vision_spi_mmc_gpio_table = { - .dev_id = "mmc_spi.2", /* "mmc_spi @ CS2 */ - .table = { - /* Card detect */ - GPIO_LOOKUP_IDX("B", 7, NULL, 0, GPIO_ACTIVE_LOW), - /* Write protect */ - GPIO_LOOKUP_IDX("F", 0, NULL, 1, GPIO_ACTIVE_HIGH), - { }, - }, -}; - -/************************************************************************* - * SPI Bus - *************************************************************************/ -static struct spi_board_info vision_spi_board_info[] __initdata = { - { - .modalias = "cs4271", - .platform_data = &vision_cs4271_data, - .max_speed_hz = 6000000, - .bus_num = 0, - .chip_select = 0, - .mode = SPI_MODE_3, - }, { - .modalias = "sst25l", - .platform_data = &vision_spi_flash_data, - .max_speed_hz = 20000000, - .bus_num = 0, - .chip_select = 1, - .mode = SPI_MODE_3, - }, { - .modalias = "mmc_spi", - .platform_data = &vision_spi_mmc_data, - .max_speed_hz = 20000000, - .bus_num = 0, - .chip_select = 2, - .mode = SPI_MODE_3, - }, -}; - -static struct gpiod_lookup_table vision_spi_cs4271_gpio_table = { - .dev_id = "spi0.0", /* cs4271 @ CS0 */ - .table = { - /* RESET */ - GPIO_LOOKUP_IDX("H", 2, NULL, 0, GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct gpiod_lookup_table vision_spi_cs_gpio_table = { - .dev_id = "spi0", - .table = { - GPIO_LOOKUP_IDX("A", 6, "cs", 0, GPIO_ACTIVE_LOW), - GPIO_LOOKUP_IDX("A", 7, "cs", 1, GPIO_ACTIVE_LOW), - GPIO_LOOKUP_IDX("G", 2, "cs", 2, GPIO_ACTIVE_LOW), - { }, - }, -}; - -static struct ep93xx_spi_info vision_spi_master __initdata = { - .use_dma = 1, -}; - -/************************************************************************* - * I2S Audio - *************************************************************************/ -static struct platform_device vision_audio_device = { - .name = "edb93xx-audio", - .id = -1, -}; - -static void __init vision_register_i2s(void) -{ - ep93xx_register_i2s(); - platform_device_register(&vision_audio_device); -} - -/************************************************************************* - * Machine Initialization - *************************************************************************/ -static void __init vision_init_machine(void) -{ - ep93xx_init_devices(); - ep93xx_register_flash(2, EP93XX_CS6_PHYS_BASE, SZ_64M); - ep93xx_register_eth(&vision_eth_data, 1); - ep93xx_register_fb(&ep93xxfb_info); - ep93xx_register_pwm(1, 0); - - /* - * Request the gpio expander's interrupt gpio line now to prevent - * the kernel from doing a WARN in gpiolib:gpio_ensure_requested(). - */ - if (gpio_request_one(EP93XX_GPIO_LINE_F(7), GPIOF_IN, "pca9539:74")) - pr_warn("cannot request interrupt gpio for pca9539:74\n"); - - vision_i2c_info[1].irq = gpio_to_irq(EP93XX_GPIO_LINE_F(7)); - - ep93xx_register_i2c(vision_i2c_info, - ARRAY_SIZE(vision_i2c_info)); - gpiod_add_lookup_table(&vision_spi_cs4271_gpio_table); - gpiod_add_lookup_table(&vision_spi_mmc_gpio_table); - gpiod_add_lookup_table(&vision_spi_cs_gpio_table); - ep93xx_register_spi(&vision_spi_master, vision_spi_board_info, - ARRAY_SIZE(vision_spi_board_info)); - vision_register_i2s(); -} - -MACHINE_START(VISION_EP9307, "Vision Engraving Systems EP9307") - /* Maintainer: H Hartley Sweeten <hsweeten@visionengravers.com> */ - .atag_offset = 0x100, - .nr_irqs = NR_EP93XX_IRQS + EP93XX_BOARD_IRQS, - .map_io = vision_map_io, - .init_irq = ep93xx_init_irq, - .init_time = ep93xx_timer_init, - .init_machine = vision_init_machine, - .restart = ep93xx_restart, -MACHINE_END diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index f8dd0b3cc8e0..3c6ddb1afdc4 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -22,7 +22,7 @@ #include <asm/cp15.h> #include <asm/system_info.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/opcodes.h> #include "fault.h" diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 49f054dcd4de..70d7f4f20225 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -200,7 +200,8 @@ config ARM64 select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_ARGS \ - if $(cc-option,-fpatchable-function-entry=2) + if (GCC_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS || \ + CLANG_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS) select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS \ if DYNAMIC_FTRACE_WITH_ARGS && DYNAMIC_FTRACE_WITH_CALL_OPS select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \ @@ -235,7 +236,7 @@ config ARM64 select HAVE_FUNCTION_ARG_ACCESS_API select MMU_GATHER_RCU_TABLE_FREE select HAVE_RSEQ - select HAVE_RUST if CPU_LITTLE_ENDIAN + select HAVE_RUST if RUSTC_SUPPORTS_ARM64 select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES @@ -270,16 +271,26 @@ config ARM64 help ARM 64-bit (AArch64) Linux support. +config RUSTC_SUPPORTS_ARM64 + def_bool y + depends on CPU_LITTLE_ENDIAN + # Shadow call stack is only supported on certain rustc versions. + # + # When using the UNWIND_PATCH_PAC_INTO_SCS option, rustc version 1.80+ is + # required due to use of the -Zfixed-x18 flag. + # + # Otherwise, rustc version 1.82+ is required due to use of the + # -Zsanitizer=shadow-call-stack flag. + depends on !SHADOW_CALL_STACK || RUSTC_VERSION >= 108200 || RUSTC_VERSION >= 108000 && UNWIND_PATCH_PAC_INTO_SCS + config CLANG_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS def_bool CC_IS_CLANG # https://github.com/ClangBuiltLinux/linux/issues/1507 depends on AS_IS_GNU || (AS_IS_LLVM && (LD_IS_LLD || LD_VERSION >= 23600)) - select HAVE_DYNAMIC_FTRACE_WITH_ARGS config GCC_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS def_bool CC_IS_GCC depends on $(cc-option,-fpatchable-function-entry=2) - select HAVE_DYNAMIC_FTRACE_WITH_ARGS config 64BIT def_bool y @@ -1085,6 +1096,7 @@ config ARM64_ERRATUM_3194386 * ARM Cortex-A78C erratum 3324346 * ARM Cortex-A78C erratum 3324347 * ARM Cortex-A710 erratam 3324338 + * ARM Cortex-A715 errartum 3456084 * ARM Cortex-A720 erratum 3456091 * ARM Cortex-A725 erratum 3456106 * ARM Cortex-X1 erratum 3324344 @@ -1095,6 +1107,7 @@ config ARM64_ERRATUM_3194386 * ARM Cortex-X925 erratum 3324334 * ARM Neoverse-N1 erratum 3324349 * ARM Neoverse N2 erratum 3324339 + * ARM Neoverse-N3 erratum 3456111 * ARM Neoverse-V1 erratum 3324341 * ARM Neoverse V2 erratum 3324336 * ARM Neoverse-V3 erratum 3312417 @@ -2201,6 +2214,7 @@ config ARM64_SME bool "ARM Scalable Matrix Extension support" default y depends on ARM64_SVE + depends on BROKEN help The Scalable Matrix Extension (SME) is an extension to the AArch64 execution state which utilises a substantial subset of the SVE diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index f6bc3da1ef11..9efd3f37c2fd 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=--no-undefined -X +LDFLAGS_vmlinux :=--no-undefined -X --pic-veneer ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour @@ -57,9 +57,11 @@ KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) ifneq ($(CONFIG_UNWIND_TABLES),y) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables KBUILD_AFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables +KBUILD_RUSTFLAGS += -Cforce-unwind-tables=n else KBUILD_CFLAGS += -fasynchronous-unwind-tables KBUILD_AFLAGS += -fasynchronous-unwind-tables +KBUILD_RUSTFLAGS += -Cforce-unwind-tables=y -Zuse-sync-unwind=n endif ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y) @@ -114,6 +116,7 @@ endif ifeq ($(CONFIG_SHADOW_CALL_STACK), y) KBUILD_CFLAGS += -ffixed-x18 +KBUILD_RUSTFLAGS += -Zfixed-x18 endif ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lvds0.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lvds0.dtsi index d00036204a8c..dad0dc8fb431 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lvds0.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lvds0.dtsi @@ -14,7 +14,7 @@ lvds0_subsys: bus@56240000 { compatible = "fsl,imx8qxp-lpcg"; reg = <0x56243000 0x4>; #clock-cells = <1>; - clock-output-names = "mipi1_lis_lpcg_ipg_clk"; + clock-output-names = "lvds0_lis_lpcg_ipg_clk"; power-domains = <&pd IMX_SC_R_MIPI_1>; }; @@ -22,9 +22,9 @@ lvds0_subsys: bus@56240000 { compatible = "fsl,imx8qxp-lpcg"; reg = <0x5624300c 0x4>; #clock-cells = <1>; - clock-output-names = "mipi1_pwm_lpcg_clk", - "mipi1_pwm_lpcg_ipg_clk", - "mipi1_pwm_lpcg_32k_clk"; + clock-output-names = "lvds0_pwm_lpcg_clk", + "lvds0_pwm_lpcg_ipg_clk", + "lvds0_pwm_lpcg_32k_clk"; power-domains = <&pd IMX_SC_R_MIPI_1_PWM_0>; }; @@ -32,8 +32,8 @@ lvds0_subsys: bus@56240000 { compatible = "fsl,imx8qxp-lpcg"; reg = <0x56243010 0x4>; #clock-cells = <1>; - clock-output-names = "mipi1_i2c0_lpcg_clk", - "mipi1_i2c0_lpcg_ipg_clk"; + clock-output-names = "lvds0_i2c0_lpcg_clk", + "lvds0_i2c0_lpcg_ipg_clk"; power-domains = <&pd IMX_SC_R_MIPI_1_I2C_0>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi index c6540768bdb9..87211c18d65a 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi @@ -15,7 +15,7 @@ vpu: vpu@2c000000 { mu_m0: mailbox@2d000000 { compatible = "fsl,imx6sx-mu"; reg = <0x2d000000 0x20000>; - interrupts = <GIC_SPI 469 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 472 IRQ_TYPE_LEVEL_HIGH>; #mbox-cells = <2>; power-domains = <&pd IMX_SC_R_VPU_MU_0>; status = "disabled"; @@ -24,7 +24,7 @@ vpu: vpu@2c000000 { mu1_m0: mailbox@2d020000 { compatible = "fsl,imx6sx-mu"; reg = <0x2d020000 0x20000>; - interrupts = <GIC_SPI 470 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 473 IRQ_TYPE_LEVEL_HIGH>; #mbox-cells = <2>; power-domains = <&pd IMX_SC_R_VPU_MU_1>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts index 50debe821c42..9c102acb8052 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts @@ -218,6 +218,18 @@ }; }; +&media_blk_ctrl { + /* + * The LVDS panel on this device uses 72.4 MHz pixel clock, + * set IMX8MP_VIDEO_PLL1 to 72.4 * 7 = 506.8 MHz so the LDB + * serializer and LCDIFv3 scanout engine can reach accurate + * pixel clock of exactly 72.4 MHz. + */ + assigned-clock-rates = <500000000>, <200000000>, + <0>, <0>, <500000000>, + <506800000>; +}; + &snvs_pwrkey { status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-skov-revb-mi1010ait-1cp1.dts b/arch/arm64/boot/dts/freescale/imx8mp-skov-revb-mi1010ait-1cp1.dts index 3c2efdc59bfa..30962922b361 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-skov-revb-mi1010ait-1cp1.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-skov-revb-mi1010ait-1cp1.dts @@ -71,6 +71,7 @@ assigned-clock-rates = <500000000>, <200000000>, <0>, /* IMX8MP_CLK_MEDIA_DISP2_PIX = pixelclk of lvds panel */ <68900000>, + <500000000>, /* IMX8MP_VIDEO_PLL1 = IMX8MP_CLK_MEDIA_LDB * 2 */ <964600000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index f3531cfb0d79..40e847bc0b7f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1261,7 +1261,7 @@ compatible = "fsl,imx8mp-usdhc", "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b40000 0x10000>; interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MP_CLK_DUMMY>, + clocks = <&clk IMX8MP_CLK_IPG_ROOT>, <&clk IMX8MP_CLK_NAND_USDHC_BUS>, <&clk IMX8MP_CLK_USDHC1_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -1275,7 +1275,7 @@ compatible = "fsl,imx8mp-usdhc", "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b50000 0x10000>; interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MP_CLK_DUMMY>, + clocks = <&clk IMX8MP_CLK_IPG_ROOT>, <&clk IMX8MP_CLK_NAND_USDHC_BUS>, <&clk IMX8MP_CLK_USDHC2_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -1289,7 +1289,7 @@ compatible = "fsl,imx8mp-usdhc", "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b60000 0x10000>; interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MP_CLK_DUMMY>, + clocks = <&clk IMX8MP_CLK_IPG_ROOT>, <&clk IMX8MP_CLK_NAND_USDHC_BUS>, <&clk IMX8MP_CLK_USDHC3_ROOT>; clock-names = "ipg", "ahb", "per"; diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi index 7894a3ab26d6..f81937b5fb72 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi @@ -5,6 +5,14 @@ * Author: Alexander Stein */ +&mu_m0 { + interrupts = <GIC_SPI 469 IRQ_TYPE_LEVEL_HIGH>; +}; + +&mu1_m0 { + interrupts = <GIC_SPI 470 IRQ_TYPE_LEVEL_HIGH>; +}; + &vpu_core0 { reg = <0x2d040000 0x10000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi index e32d5afcf4a9..43f543768444 100644 --- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi @@ -384,7 +384,7 @@ }; flexspi2: spi@29810000 { - compatible = "nxp,imx8mm-fspi"; + compatible = "nxp,imx8ulp-fspi"; reg = <0x29810000 0x10000>, <0x60000000 0x10000000>; reg-names = "fspi_base", "fspi_mmap"; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/marvell/cn9130-sr-som.dtsi b/arch/arm64/boot/dts/marvell/cn9130-sr-som.dtsi index 4676e3488f54..cb8d54895a77 100644 --- a/arch/arm64/boot/dts/marvell/cn9130-sr-som.dtsi +++ b/arch/arm64/boot/dts/marvell/cn9130-sr-som.dtsi @@ -136,7 +136,7 @@ }; cp0_mdio_pins: cp0-mdio-pins { - marvell,pins = "mpp40", "mpp41"; + marvell,pins = "mpp0", "mpp1"; marvell,function = "ge"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7981b.dtsi b/arch/arm64/boot/dts/mediatek/mt7981b.dtsi index b096009ef99c..5cbea9cd411f 100644 --- a/arch/arm64/boot/dts/mediatek/mt7981b.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7981b.dtsi @@ -94,6 +94,39 @@ #pwm-cells = <2>; }; + serial@11002000 { + compatible = "mediatek,mt7981-uart", "mediatek,mt6577-uart"; + reg = <0 0x11002000 0 0x100>; + interrupts = <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "uart", "wakeup"; + clocks = <&infracfg CLK_INFRA_UART0_SEL>, + <&infracfg CLK_INFRA_UART0_CK>; + clock-names = "baud", "bus"; + status = "disabled"; + }; + + serial@11003000 { + compatible = "mediatek,mt7981-uart", "mediatek,mt6577-uart"; + reg = <0 0x11003000 0 0x100>; + interrupts = <GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "uart", "wakeup"; + clocks = <&infracfg CLK_INFRA_UART1_SEL>, + <&infracfg CLK_INFRA_UART1_CK>; + clock-names = "baud", "bus"; + status = "disabled"; + }; + + serial@11004000 { + compatible = "mediatek,mt7981-uart", "mediatek,mt6577-uart"; + reg = <0 0x11004000 0 0x100>; + interrupts = <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "uart", "wakeup"; + clocks = <&infracfg CLK_INFRA_UART2_SEL>, + <&infracfg CLK_INFRA_UART2_CK>; + clock-names = "baud", "bus"; + status = "disabled"; + }; + i2c@11007000 { compatible = "mediatek,mt7981-i2c"; reg = <0 0x11007000 0 0x1000>, diff --git a/arch/arm64/boot/dts/qcom/msm8939.dtsi b/arch/arm64/boot/dts/qcom/msm8939.dtsi index 28634789a8a9..7af210789879 100644 --- a/arch/arm64/boot/dts/qcom/msm8939.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8939.dtsi @@ -248,7 +248,7 @@ smd-edge { interrupts = <GIC_SPI 168 IRQ_TYPE_EDGE_RISING>; - mboxes = <&apcs1_mbox 0>; + qcom,ipc = <&apcs1_mbox 8 0>; qcom,smd-edge = <15>; rpm_requests: rpm-requests { diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 9bafb3b350ff..38cb524cc568 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -1973,7 +1973,7 @@ clocks = <&gcc GCC_PCIE_1_PIPE_CLK>, <&gcc GCC_PCIE_1_PIPE_CLK_SRC>, - <&pcie1_phy>, + <&pcie1_phy QMP_PCIE_PIPE_CLK>, <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_PCIE_1_AUX_CLK>, <&gcc GCC_PCIE_1_CFG_AHB_CLK>, diff --git a/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts b/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts index 941dfddd6713..fdde988ae01e 100644 --- a/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts +++ b/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts @@ -139,6 +139,8 @@ pinctrl-0 = <&nvme_reg_en>; pinctrl-names = "default"; + + regulator-boot-on; }; vph_pwr: regulator-vph-pwr { diff --git a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts index 20616bd4aa6c..fb4a48a1e2a8 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts @@ -134,6 +134,8 @@ pinctrl-0 = <&nvme_reg_en>; pinctrl-names = "default"; + + regulator-boot-on; }; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index 10b28d870f08..c6e0356ed9a2 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -177,9 +177,9 @@ compatible = "qcom,x1e80100-sndcard"; model = "X1E80100-CRD"; audio-routing = "WooferLeft IN", "WSA WSA_SPK1 OUT", - "TwitterLeft IN", "WSA WSA_SPK2 OUT", + "TweeterLeft IN", "WSA WSA_SPK2 OUT", "WooferRight IN", "WSA2 WSA_SPK2 OUT", - "TwitterRight IN", "WSA2 WSA_SPK2 OUT", + "TweeterRight IN", "WSA2 WSA_SPK2 OUT", "IN1_HPHL", "HPHL_OUT", "IN2_HPHR", "HPHR_OUT", "AMIC2", "MIC BIAS2", @@ -300,6 +300,8 @@ pinctrl-names = "default"; pinctrl-0 = <&nvme_reg_en>; + + regulator-boot-on; }; vreg_wwan: regulator-wwan { @@ -933,7 +935,7 @@ reg = <0 1>; reset-gpios = <&lpass_tlmm 12 GPIO_ACTIVE_LOW>; #sound-dai-cells = <0>; - sound-name-prefix = "TwitterLeft"; + sound-name-prefix = "TweeterLeft"; vdd-1p8-supply = <&vreg_l15b_1p8>; vdd-io-supply = <&vreg_l12b_1p2>; qcom,port-mapping = <4 5 6 7 11 13>; @@ -986,7 +988,7 @@ reg = <0 1>; reset-gpios = <&lpass_tlmm 13 GPIO_ACTIVE_LOW>; #sound-dai-cells = <0>; - sound-name-prefix = "TwitterRight"; + sound-name-prefix = "TweeterRight"; vdd-1p8-supply = <&vreg_l15b_1p8>; vdd-io-supply = <&vreg_l12b_1p2>; qcom,port-mapping = <4 5 6 7 11 13>; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts index 3c13331a9ef4..0cdaff9c8cf0 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts @@ -205,6 +205,8 @@ pinctrl-0 = <&nvme_reg_en>; pinctrl-names = "default"; + + regulator-boot-on; }; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi b/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi index 42e02ad6a9c3..cdb401767c42 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi @@ -164,6 +164,8 @@ pinctrl-0 = <&nvme_reg_en>; pinctrl-names = "default"; + + regulator-boot-on; }; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts index 1c3a6a7b3ed6..5ef030c60abe 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts @@ -253,6 +253,8 @@ pinctrl-names = "default"; pinctrl-0 = <&nvme_reg_en>; + + regulator-boot-on; }; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index a36076e3c56b..0510abc0edf0 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -2924,14 +2924,14 @@ "mhi"; #address-cells = <3>; #size-cells = <2>; - ranges = <0x01000000 0 0x00000000 0 0x70200000 0 0x100000>, - <0x02000000 0 0x70300000 0 0x70300000 0 0x3d00000>; - bus-range = <0 0xff>; + ranges = <0x01000000 0x0 0x00000000 0x0 0x70200000 0x0 0x100000>, + <0x02000000 0x0 0x70300000 0x0 0x70300000 0x0 0x1d00000>; + bus-range = <0x00 0xff>; dma-coherent; linux,pci-domain = <6>; - num-lanes = <2>; + num-lanes = <4>; interrupts = <GIC_SPI 773 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 774 IRQ_TYPE_LEVEL_HIGH>, @@ -2997,19 +2997,22 @@ }; pcie6a_phy: phy@1bfc000 { - compatible = "qcom,x1e80100-qmp-gen4x2-pcie-phy"; - reg = <0 0x01bfc000 0 0x2000>; + compatible = "qcom,x1e80100-qmp-gen4x4-pcie-phy"; + reg = <0 0x01bfc000 0 0x2000>, + <0 0x01bfe000 0 0x2000>; clocks = <&gcc GCC_PCIE_6A_PHY_AUX_CLK>, <&gcc GCC_PCIE_6A_CFG_AHB_CLK>, - <&rpmhcc RPMH_CXO_CLK>, + <&tcsr TCSR_PCIE_4L_CLKREF_EN>, <&gcc GCC_PCIE_6A_PHY_RCHNG_CLK>, - <&gcc GCC_PCIE_6A_PIPE_CLK>; + <&gcc GCC_PCIE_6A_PIPE_CLK>, + <&gcc GCC_PCIE_6A_PIPEDIV2_CLK>; clock-names = "aux", "cfg_ahb", "ref", "rchng", - "pipe"; + "pipe", + "pipediv2"; resets = <&gcc GCC_PCIE_6A_PHY_BCR>, <&gcc GCC_PCIE_6A_NOCSR_COM_PHY_BCR>; @@ -3021,6 +3024,8 @@ power-domains = <&gcc GCC_PCIE_6_PHY_GDSC>; + qcom,4ln-config-sel = <&tcsr 0x1a000 0>; + #clock-cells = <0>; clock-output-names = "pcie6a_pipe_clk"; @@ -3097,7 +3102,7 @@ assigned-clocks = <&gcc GCC_PCIE_5_AUX_CLK>; assigned-clock-rates = <19200000>; - interconnects = <&pcie_south_anoc MASTER_PCIE_5 QCOM_ICC_TAG_ALWAYS + interconnects = <&pcie_north_anoc MASTER_PCIE_5 QCOM_ICC_TAG_ALWAYS &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS &cnoc_main SLAVE_PCIE_5 QCOM_ICC_TAG_ALWAYS>; @@ -3124,14 +3129,16 @@ clocks = <&gcc GCC_PCIE_5_AUX_CLK>, <&gcc GCC_PCIE_5_CFG_AHB_CLK>, - <&rpmhcc RPMH_CXO_CLK>, + <&tcsr TCSR_PCIE_2L_5_CLKREF_EN>, <&gcc GCC_PCIE_5_PHY_RCHNG_CLK>, - <&gcc GCC_PCIE_5_PIPE_CLK>; + <&gcc GCC_PCIE_5_PIPE_CLK>, + <&gcc GCC_PCIE_5_PIPEDIV2_CLK>; clock-names = "aux", "cfg_ahb", "ref", "rchng", - "pipe"; + "pipe", + "pipediv2"; resets = <&gcc GCC_PCIE_5_PHY_BCR>; reset-names = "phy"; @@ -3166,8 +3173,8 @@ "mhi"; #address-cells = <3>; #size-cells = <2>; - ranges = <0x01000000 0 0x00000000 0 0x7c200000 0 0x100000>, - <0x02000000 0 0x7c300000 0 0x7c300000 0 0x3d00000>; + ranges = <0x01000000 0x0 0x00000000 0x0 0x7c200000 0x0 0x100000>, + <0x02000000 0x0 0x7c300000 0x0 0x7c300000 0x0 0x1d00000>; bus-range = <0x00 0xff>; dma-coherent; @@ -3217,7 +3224,7 @@ assigned-clocks = <&gcc GCC_PCIE_4_AUX_CLK>; assigned-clock-rates = <19200000>; - interconnects = <&pcie_south_anoc MASTER_PCIE_4 QCOM_ICC_TAG_ALWAYS + interconnects = <&pcie_north_anoc MASTER_PCIE_4 QCOM_ICC_TAG_ALWAYS &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS &cnoc_main SLAVE_PCIE_4 QCOM_ICC_TAG_ALWAYS>; @@ -3254,14 +3261,16 @@ clocks = <&gcc GCC_PCIE_4_AUX_CLK>, <&gcc GCC_PCIE_4_CFG_AHB_CLK>, - <&rpmhcc RPMH_CXO_CLK>, + <&tcsr TCSR_PCIE_2L_4_CLKREF_EN>, <&gcc GCC_PCIE_4_PHY_RCHNG_CLK>, - <&gcc GCC_PCIE_4_PIPE_CLK>; + <&gcc GCC_PCIE_4_PIPE_CLK>, + <&gcc GCC_PCIE_4_PIPEDIV2_CLK>; clock-names = "aux", "cfg_ahb", "ref", "rchng", - "pipe"; + "pipe", + "pipediv2"; resets = <&gcc GCC_PCIE_4_PHY_BCR>; reset-names = "phy"; @@ -6084,7 +6093,8 @@ <0 0x25a00000 0 0x200000>, <0 0x25c00000 0 0x200000>, <0 0x25e00000 0 0x200000>, - <0 0x26000000 0 0x200000>; + <0 0x26000000 0 0x200000>, + <0 0x26200000 0 0x200000>; reg-names = "llcc0_base", "llcc1_base", "llcc2_base", @@ -6093,7 +6103,8 @@ "llcc5_base", "llcc6_base", "llcc7_base", - "llcc_broadcast_base"; + "llcc_broadcast_base", + "llcc_broadcast_and_base"; interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi index bb1aea82e666..b7163ed74232 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi @@ -66,7 +66,6 @@ bus-width = <8>; cap-mmc-highspeed; mmc-hs200-1_8v; - supports-emmc; mmc-pwrseq = <&emmc_pwrseq>; non-removable; vmmc-supply = <&vcc_3v3>; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index 9232357f4fec..d9e191ad1d77 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -36,14 +36,14 @@ power_led: led-0 { label = "firefly:red:power"; - linux,default-trigger = "ir-power-click"; + linux,default-trigger = "default-on"; default-state = "on"; gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>; }; user_led: led-1 { label = "firefly:blue:user"; - linux,default-trigger = "ir-user-click"; + linux,default-trigger = "rc-feedback"; default-state = "off"; gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_HIGH>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts index cb81ba3f23ff..4b9ced67742d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts @@ -24,9 +24,7 @@ disable-wp; mmc-hs200-1_8v; non-removable; - num-slots = <1>; pinctrl-names = "default"; pinctrl-0 = <&emmc_clk &emmc_cmd &emmc_bus8>; - supports-emmc; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 16b4faa22e4f..c01a4cad48f3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -754,8 +754,7 @@ compatible = "rockchip,rk3328-dw-hdmi"; reg = <0x0 0xff3c0000 0x0 0x20000>; reg-io-width = <4>; - interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru PCLK_HDMI>, <&cru SCLK_HDMI_SFC>, <&cru SCLK_RTC32K>; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi b/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi index 8ac8acf4082d..ab3fda69a1fb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi @@ -61,7 +61,6 @@ fan: fan@18 { compatible = "ti,amc6821"; reg = <0x18>; - #cooling-cells = <2>; }; rtc_twi: rtc@6f { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts b/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts index 1489eb32e266..4feb78797982 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts @@ -541,7 +541,7 @@ status = "okay"; rt5651: audio-codec@1a { - compatible = "rockchip,rt5651"; + compatible = "realtek,rt5651"; reg = <0x1a>; clocks = <&cru SCLK_I2S_8CH_OUT>; clock-names = "mclk"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts index 1a44582a49fb..09a016ea8c76 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts @@ -166,7 +166,6 @@ regulator-max-microvolt = <1800000>; vin-supply = <&vcc3v3_sys>; gpio = <&gpio3 RK_PA5 GPIO_ACTIVE_HIGH>; - pinctrl-names = "default"; }; /* MIPI DSI panel 2.8v supply */ @@ -178,7 +177,6 @@ regulator-max-microvolt = <2800000>; vin-supply = <&vcc3v3_sys>; gpio = <&gpio3 RK_PA1 GPIO_ACTIVE_HIGH>; - pinctrl-names = "default"; }; vibrator { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts index 7ba1c28f70a9..2f06bfdd70bf 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts @@ -114,7 +114,6 @@ es8388: es8388@11 { compatible = "everest,es8388"; reg = <0x11>; - clock-names = "mclk"; clocks = <&cru SCLK_I2S_8CH_OUT>; #sound-dai-cells = <0>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi index 8146f870d2bd..ab890e7b6c59 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi @@ -576,7 +576,7 @@ bluetooth { compatible = "brcm,bcm43438-bt"; clocks = <&rk808 1>; - clock-names = "ext_clock"; + clock-names = "txco"; device-wakeup-gpios = <&gpio2 RK_PD3 GPIO_ACTIVE_HIGH>; host-wakeup-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_HIGH>; shutdown-gpios = <&gpio0 RK_PB1 GPIO_ACTIVE_HIGH>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts index dbec2b7173a0..31ea3d0182c0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts @@ -163,7 +163,7 @@ status = "okay"; rt5651: rt5651@1a { - compatible = "rockchip,rt5651"; + compatible = "realtek,rt5651"; reg = <0x1a>; clocks = <&cru SCLK_I2S_8CH_OUT>; clock-names = "mclk"; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353p.dts b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353p.dts index a73cf30801ec..9816a4ed4599 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353p.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353p.dts @@ -92,7 +92,7 @@ }; &i2c2 { - pintctrl-names = "default"; + pinctrl-names = "default"; pinctrl-0 = <&i2c2m1_xfer>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353v.dts b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353v.dts index e9954a33e8cd..a79a5614bcc8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353v.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353v.dts @@ -79,7 +79,7 @@ }; &i2c2 { - pintctrl-names = "default"; + pinctrl-names = "default"; pinctrl-0 = <&i2c2m1_xfer>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts index 0c18406e4c59..7d4680933823 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts @@ -449,9 +449,9 @@ bluetooth { compatible = "brcm,bcm43438-bt"; clocks = <&pmucru CLK_RTC_32K>; - clock-names = "ext_clock"; - device-wake-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; - host-wake-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; + clock-names = "txco"; + device-wakeup-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; + host-wakeup-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; shutdown-gpios = <&gpio2 RK_PB7 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&bt_host_wake_l &bt_wake_l &bt_enable_h>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts index c1194d1e438d..9a2f59a351de 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts @@ -507,7 +507,6 @@ non-removable; pinctrl-names = "default"; pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd>; - supports-emmc; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi index ae2536c65a83..0131f2cdd312 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi @@ -684,11 +684,11 @@ compatible = "brcm,bcm43438-bt"; clocks = <&rk817 1>; clock-names = "lpo"; - device-wake-gpios = <&gpio0 RK_PC2 GPIO_ACTIVE_HIGH>; - host-wake-gpios = <&gpio0 RK_PC3 GPIO_ACTIVE_HIGH>; - reset-gpios = <&gpio0 RK_PC4 GPIO_ACTIVE_LOW>; + device-wakeup-gpios = <&gpio0 RK_PC2 GPIO_ACTIVE_HIGH>; + host-wakeup-gpios = <&gpio0 RK_PC3 GPIO_ACTIVE_HIGH>; pinctrl-0 = <&bt_enable_h>, <&bt_host_wake_l>, <&bt_wake_h>; pinctrl-names = "default"; + shutdown-gpios = <&gpio0 RK_PC4 GPIO_ACTIVE_HIGH>; vbat-supply = <&vcc_wl>; vddio-supply = <&vcca_1v8_pmu>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi index 45de2630bb50..1e36f73840da 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi @@ -402,9 +402,9 @@ clock-names = "lpo"; device-wakeup-gpios = <&gpio2 RK_PB2 GPIO_ACTIVE_HIGH>; host-wakeup-gpios = <&gpio2 RK_PB1 GPIO_ACTIVE_HIGH>; - reset-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; pinctrl-0 = <&bt_host_wake_h &bt_reg_on_h &bt_wake_host_h>; + shutdown-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; vbat-supply = <&vcc_3v3>; vddio-supply = <&vcc_1v8>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts index a3112d5df200..b505a4537ee8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts @@ -589,7 +589,6 @@ non-removable; pinctrl-names = "default"; pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd>; - supports-emmc; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3568-roc-pc.dts index e333449ead04..2fa89a0eeafc 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-roc-pc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-roc-pc.dts @@ -272,7 +272,6 @@ regulator-name = "vdd_logic"; regulator-always-on; regulator-boot-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -285,7 +284,6 @@ vdd_gpu: DCDC_REG2 { regulator-name = "vdd_gpu"; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -309,7 +307,6 @@ vdd_npu: DCDC_REG4 { regulator-name = "vdd_npu"; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi index d97d84b88837..fc67585b64b7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi @@ -337,15 +337,19 @@ cache-unified; next-level-cache = <&l3_cache>; }; + }; - l3_cache: l3-cache { - compatible = "cache"; - cache-size = <3145728>; - cache-line-size = <64>; - cache-sets = <4096>; - cache-level = <3>; - cache-unified; - }; + /* + * The L3 cache belongs to the DynamIQ Shared Unit (DSU), + * so it's represented here, outside the "cpus" node + */ + l3_cache: l3-cache { + compatible = "cache"; + cache-size = <3145728>; + cache-line-size = <64>; + cache-sets = <4096>; + cache-level = <3>; + cache-unified; }; display_subsystem: display-subsystem { diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts index c3a6812cc93a..dd4c79bcad87 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts @@ -328,7 +328,6 @@ compatible = "everest,es8388"; reg = <0x11>; clocks = <&cru I2S0_8CH_MCLKOUT>; - clock-names = "mclk"; AVDD-supply = <&vcc_1v8_s0>; DVDD-supply = <&vcc_1v8_s0>; HPVDD-supply = <&vcc_3v3_s0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts index e4a20cda65ed..b38dab009ccc 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts @@ -316,7 +316,6 @@ assigned-clocks = <&cru I2S0_8CH_MCLKOUT>; assigned-clock-rates = <12288000>; clocks = <&cru I2S0_8CH_MCLKOUT>; - clock-names = "mclk"; AVDD-supply = <&avcc_1v8_codec_s0>; DVDD-supply = <&avcc_1v8_codec_s0>; HPVDD-supply = <&vcc_3v3_s0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts index 966bbc582d89..6bd06e46a101 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts @@ -304,12 +304,12 @@ }; cooling-maps { - map1 { + map0 { trip = <&package_fan0>; cooling-device = <&fan THERMAL_NO_LIMIT 1>; }; - map2 { + map1 { trip = <&package_fan1>; cooling-device = <&fan 2 THERMAL_NO_LIMIT>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts b/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts index d0021524e7f9..328dcb894ccb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts @@ -428,7 +428,6 @@ regulator-boot-on; regulator-min-microvolt = <550000>; regulator-max-microvolt = <950000>; - regulator-init-microvolt = <750000>; regulator-ramp-delay = <12500>; regulator-state-mem { diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi index dbaa94ca69f4..432133251e31 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi @@ -296,6 +296,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>, <&rk806_dvs2_null>, <&rk806_dvs3_null>; + system-power-controller; vcc1-supply = <&vcc5v0_sys>; vcc2-supply = <&vcc5v0_sys>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts index d8c50fdcca3b..8ba111d9283f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts @@ -377,7 +377,6 @@ assigned-clock-rates = <12288000>; assigned-clocks = <&cru I2S0_8CH_MCLKOUT>; AVDD-supply = <&vcc_3v3_s3>; - clock-names = "mclk"; clocks = <&cru I2S0_8CH_MCLKOUT>; DVDD-supply = <&vcc_1v8_s3>; HPVDD-supply = <&vcc_3v3_s3>; diff --git a/arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts b/arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts index 70de288d728e..a1cd47d7f5e3 100644 --- a/arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts +++ b/arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts @@ -888,7 +888,8 @@ mcu { compatible = "ti,cc1352p7"; - reset-gpios = <&main_gpio0 72 GPIO_ACTIVE_LOW>; + bootloader-backdoor-gpios = <&main_gpio0 13 GPIO_ACTIVE_HIGH>; + reset-gpios = <&main_gpio0 14 GPIO_ACTIVE_HIGH>; vdds-supply = <&vdd_3v3>; }; }; diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index ce9b28e3c7d6..a523b519700f 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -9,7 +9,7 @@ */ #include <asm/neon.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/aes.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c index e921823ca103..00b8749013c5 100644 --- a/arch/arm64/crypto/aes-ce-glue.c +++ b/arch/arm64/crypto/aes-ce-glue.c @@ -7,7 +7,7 @@ #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/aes.h> #include <crypto/algapi.h> #include <crypto/internal/simd.h> diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 97331b454ea8..da7b7ec1a664 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -7,7 +7,7 @@ #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/aes.h> #include <crypto/gcm.h> #include <crypto/algapi.h> diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c index 9c4bfd62e789..18883ea438f3 100644 --- a/arch/arm64/crypto/poly1305-glue.c +++ b/arch/arm64/crypto/poly1305-glue.c @@ -8,7 +8,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/algapi.h> #include <crypto/internal/hash.h> #include <crypto/internal/poly1305.h> diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 1dd93e1fcb39..cbd14f208f83 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -7,7 +7,7 @@ #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <crypto/sha1.h> diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 0a44d2e7ee1f..6b4866a88ded 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -7,7 +7,7 @@ #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <crypto/sha2.h> diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index 250e1377c481..5662c3ac49e9 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -12,7 +12,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <crypto/sha3.h> diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index f3431fc62315..071f64293227 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -11,7 +11,7 @@ #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <crypto/sha2.h> diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index 54bf6ebcfffb..1a71788c4cda 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -7,7 +7,7 @@ #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <crypto/sm3.h> diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c index 7182ee683f14..8dd71ce79b69 100644 --- a/arch/arm64/crypto/sm3-neon-glue.c +++ b/arch/arm64/crypto/sm3-neon-glue.c @@ -7,7 +7,7 @@ #include <asm/neon.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <crypto/sm3.h> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 5a7dfeb8e8eb..488f8e751349 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -94,6 +94,7 @@ #define ARM_CPU_PART_NEOVERSE_V3 0xD84 #define ARM_CPU_PART_CORTEX_X925 0xD85 #define ARM_CPU_PART_CORTEX_A725 0xD87 +#define ARM_CPU_PART_NEOVERSE_N3 0xD8E #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -176,6 +177,7 @@ #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) #define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) +#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index b36a3b6cc011..67afac659231 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -178,6 +178,7 @@ struct kvm_nvhe_init_params { unsigned long hcr_el2; unsigned long vttbr; unsigned long vtcr; + unsigned long tmp; }; /* diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 329619c6fa96..bf64fed9820e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -51,6 +51,7 @@ #define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) #define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) #define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7) +#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) @@ -212,6 +213,12 @@ struct kvm_s2_mmu { bool nested_stage2_enabled; /* + * true when this MMU needs to be unmapped before being used for a new + * purpose. + */ + bool pending_unmap; + + /* * 0: Nobody is currently using this, check vttbr for validity * >0: Somebody is actively using this. */ @@ -1441,11 +1448,6 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); sign_extend64(__val, id##_##fld##_WIDTH - 1); \ }) -#define expand_field_sign(id, fld, val) \ - (id##_##fld##_SIGNED ? \ - __expand_field_sign_signed(id, fld, val) : \ - __expand_field_sign_unsigned(id, fld, val)) - #define get_idreg_field_unsigned(kvm, id, fld) \ ({ \ u64 __val = kvm_read_vm_id_reg((kvm), SYS_##id); \ @@ -1461,20 +1463,26 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); #define get_idreg_field_enum(kvm, id, fld) \ get_idreg_field_unsigned(kvm, id, fld) -#define get_idreg_field(kvm, id, fld) \ +#define kvm_cmp_feat_signed(kvm, id, fld, op, limit) \ + (get_idreg_field_signed((kvm), id, fld) op __expand_field_sign_signed(id, fld, limit)) + +#define kvm_cmp_feat_unsigned(kvm, id, fld, op, limit) \ + (get_idreg_field_unsigned((kvm), id, fld) op __expand_field_sign_unsigned(id, fld, limit)) + +#define kvm_cmp_feat(kvm, id, fld, op, limit) \ (id##_##fld##_SIGNED ? \ - get_idreg_field_signed(kvm, id, fld) : \ - get_idreg_field_unsigned(kvm, id, fld)) + kvm_cmp_feat_signed(kvm, id, fld, op, limit) : \ + kvm_cmp_feat_unsigned(kvm, id, fld, op, limit)) #define kvm_has_feat(kvm, id, fld, limit) \ - (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, limit)) + kvm_cmp_feat(kvm, id, fld, >=, limit) #define kvm_has_feat_enum(kvm, id, fld, val) \ - (get_idreg_field_unsigned((kvm), id, fld) == __expand_field_sign_unsigned(id, fld, val)) + kvm_cmp_feat_unsigned(kvm, id, fld, ==, val) #define kvm_has_feat_range(kvm, id, fld, min, max) \ - (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, min) && \ - get_idreg_field((kvm), id, fld) <= expand_field_sign(id, fld, max)) + (kvm_cmp_feat(kvm, id, fld, >=, min) && \ + kvm_cmp_feat(kvm, id, fld, <=, max)) /* Check for a given level of PAuth support */ #define kvm_has_pauth(k, l) \ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index cd4087fbda9a..66d93e320ec8 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -166,7 +166,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr); void __init free_hyp_pgds(void); -void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size); +void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, + u64 size, bool may_block); void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end); void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end); diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index e8bc6d67aba2..233e65522716 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -78,6 +78,8 @@ extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid, extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu); extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu); +extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu); + struct kvm_s2_trans { phys_addr_t output; unsigned long block_size; @@ -124,7 +126,7 @@ extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans); extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2); extern void kvm_nested_s2_wp(struct kvm *kvm); -extern void kvm_nested_s2_unmap(struct kvm *kvm); +extern void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block); extern void kvm_nested_s2_flush(struct kvm *kvm); unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val); diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 9e39217b4afb..798d965760d4 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -6,6 +6,8 @@ #ifndef BUILD_VDSO #include <linux/compiler.h> +#include <linux/fs.h> +#include <linux/shmem_fs.h> #include <linux/types.h> static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, @@ -31,19 +33,21 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) -static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) +static inline unsigned long arch_calc_vm_flag_bits(struct file *file, + unsigned long flags) { /* * Only allow MTE on anonymous mappings as these are guaranteed to be * backed by tags-capable memory. The vm_flags may be overridden by a * filesystem supporting MTE (RAM-based). */ - if (system_supports_mte() && (flags & MAP_ANONYMOUS)) + if (system_supports_mte() && + ((flags & MAP_ANONYMOUS) || shmem_file(file))) return VM_MTE_ALLOWED; return 0; } -#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) +#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) static inline bool arch_validate_prot(unsigned long prot, unsigned long addr __always_unused) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 5fc3af9f8f29..341174bf9106 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -26,10 +26,6 @@ void update_freq_counters_refs(void); #define arch_scale_freq_invariant topology_scale_freq_invariant #define arch_scale_freq_ref topology_get_freq_ref -#ifdef CONFIG_ACPI_CPPC_LIB -#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc -#endif - /* Replace task scheduler's default cpu-invariant accounting */ #define arch_scale_cpu_capacity topology_get_cpu_scale diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 2b09495499c6..014b02897f8e 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -10,11 +10,9 @@ #include <asm/insn.h> #include <asm/probes.h> -#define MAX_UINSN_BYTES AARCH64_INSN_SIZE - #define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE -#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES +#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE typedef __le32 uprobe_opcode_t; @@ -23,8 +21,8 @@ struct arch_uprobe_task { struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; + __le32 insn; + __le32 ixol; }; struct arch_probe_insn api; bool simulate; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 27de1dddb0ab..b21dd24b8efc 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -146,6 +146,7 @@ int main(void) DEFINE(NVHE_INIT_HCR_EL2, offsetof(struct kvm_nvhe_init_params, hcr_el2)); DEFINE(NVHE_INIT_VTTBR, offsetof(struct kvm_nvhe_init_params, vttbr)); DEFINE(NVHE_INIT_VTCR, offsetof(struct kvm_nvhe_init_params, vtcr)); + DEFINE(NVHE_INIT_TMP, offsetof(struct kvm_nvhe_init_params, tmp)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index dfefbdf4073a..a78f247029ae 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -439,6 +439,7 @@ static const struct midr_range erratum_spec_ssbs_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), @@ -447,8 +448,10 @@ static const struct midr_range erratum_spec_ssbs_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N3), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 77006df20a75..6d21971ae559 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1367,6 +1367,7 @@ static void sve_init_regs(void) } else { fpsimd_to_sve(current); current->thread.fp_type = FP_STATE_SVE; + fpsimd_flush_task_state(current); } } diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index 968d5fffe233..3496d6169e59 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -99,10 +99,6 @@ arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api) aarch64_insn_is_blr(insn) || aarch64_insn_is_ret(insn)) { api->handler = simulate_br_blr_ret; - } else if (aarch64_insn_is_ldr_lit(insn)) { - api->handler = simulate_ldr_literal; - } else if (aarch64_insn_is_ldrsw_lit(insn)) { - api->handler = simulate_ldrsw_literal; } else { /* * Instruction cannot be stepped out-of-line and we don't @@ -140,6 +136,17 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) probe_opcode_t insn = le32_to_cpu(*addr); probe_opcode_t *scan_end = NULL; unsigned long size = 0, offset = 0; + struct arch_probe_insn *api = &asi->api; + + if (aarch64_insn_is_ldr_lit(insn)) { + api->handler = simulate_ldr_literal; + decoded = INSN_GOOD_NO_SLOT; + } else if (aarch64_insn_is_ldrsw_lit(insn)) { + api->handler = simulate_ldrsw_literal; + decoded = INSN_GOOD_NO_SLOT; + } else { + decoded = arm_probe_decode_insn(insn, &asi->api); + } /* * If there's a symbol defined in front of and near enough to @@ -157,7 +164,6 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) else scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; } - decoded = arm_probe_decode_insn(insn, &asi->api); if (decoded != INSN_REJECTED && scan_end) if (is_probed_address_atomic(addr - 1, scan_end)) diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c index 22d0b3252476..b65334ab79d2 100644 --- a/arch/arm64/kernel/probes/simulate-insn.c +++ b/arch/arm64/kernel/probes/simulate-insn.c @@ -171,17 +171,15 @@ simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs) void __kprobes simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs) { - u64 *load_addr; + unsigned long load_addr; int xn = opcode & 0x1f; - int disp; - disp = ldr_displacement(opcode); - load_addr = (u64 *) (addr + disp); + load_addr = addr + ldr_displacement(opcode); if (opcode & (1 << 30)) /* x0-x30 */ - set_x_reg(regs, xn, *load_addr); + set_x_reg(regs, xn, READ_ONCE(*(u64 *)load_addr)); else /* w0-w30 */ - set_w_reg(regs, xn, *load_addr); + set_w_reg(regs, xn, READ_ONCE(*(u32 *)load_addr)); instruction_pointer_set(regs, instruction_pointer(regs) + 4); } @@ -189,14 +187,12 @@ simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs) void __kprobes simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs) { - s32 *load_addr; + unsigned long load_addr; int xn = opcode & 0x1f; - int disp; - disp = ldr_displacement(opcode); - load_addr = (s32 *) (addr + disp); + load_addr = addr + ldr_displacement(opcode); - set_x_reg(regs, xn, *load_addr); + set_x_reg(regs, xn, READ_ONCE(*(s32 *)load_addr)); instruction_pointer_set(regs, instruction_pointer(regs) + 4); } diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index d49aef2657cd..a2f137a595fc 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); switch (arm_probe_decode_insn(insn, &auprobe->api)) { case INSN_REJECTED: @@ -108,7 +108,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) if (!auprobe->simulate) return false; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); addr = instruction_pointer(regs); if (auprobe->api.handler) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 0540653fbf38..3e7c8c8195c3 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -412,6 +412,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.cpu_context.x19 = (unsigned long)args->fn; p->thread.cpu_context.x20 = (unsigned long)args->fn_arg; + + if (system_supports_poe()) + p->thread.por_el0 = POR_EL0_INIT; } p->thread.cpu_context.pc = (unsigned long)ret_from_fork; p->thread.cpu_context.sp = (unsigned long)childregs; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 561986947530..c7d311d8b92a 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -19,6 +19,7 @@ #include <linux/ratelimit.h> #include <linux/rseq.h> #include <linux/syscalls.h> +#include <linux/pkeys.h> #include <asm/daifflags.h> #include <asm/debug-monitors.h> @@ -66,10 +67,63 @@ struct rt_sigframe_user_layout { unsigned long end_offset; }; +/* + * Holds any EL0-controlled state that influences unprivileged memory accesses. + * This includes both accesses done in userspace and uaccess done in the kernel. + * + * This state needs to be carefully managed to ensure that it doesn't cause + * uaccess to fail when setting up the signal frame, and the signal handler + * itself also expects a well-defined state when entered. + */ +struct user_access_state { + u64 por_el0; +}; + #define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16) #define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) #define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) +/* + * Save the user access state into ua_state and reset it to disable any + * restrictions. + */ +static void save_reset_user_access_state(struct user_access_state *ua_state) +{ + if (system_supports_poe()) { + u64 por_enable_all = 0; + + for (int pkey = 0; pkey < arch_max_pkey(); pkey++) + por_enable_all |= POE_RXW << (pkey * POR_BITS_PER_PKEY); + + ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0); + write_sysreg_s(por_enable_all, SYS_POR_EL0); + /* Ensure that any subsequent uaccess observes the updated value */ + isb(); + } +} + +/* + * Set the user access state for invoking the signal handler. + * + * No uaccess should be done after that function is called. + */ +static void set_handler_user_access_state(void) +{ + if (system_supports_poe()) + write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0); +} + +/* + * Restore the user access state to the values saved in ua_state. + * + * No uaccess should be done after that function is called. + */ +static void restore_user_access_state(const struct user_access_state *ua_state) +{ + if (system_supports_poe()) + write_sysreg_s(ua_state->por_el0, SYS_POR_EL0); +} + static void init_user_layout(struct rt_sigframe_user_layout *user) { const size_t reserved_size = @@ -261,18 +315,20 @@ static int restore_fpmr_context(struct user_ctxs *user) return err; } -static int preserve_poe_context(struct poe_context __user *ctx) +static int preserve_poe_context(struct poe_context __user *ctx, + const struct user_access_state *ua_state) { int err = 0; __put_user_error(POE_MAGIC, &ctx->head.magic, err); __put_user_error(sizeof(*ctx), &ctx->head.size, err); - __put_user_error(read_sysreg_s(SYS_POR_EL0), &ctx->por_el0, err); + __put_user_error(ua_state->por_el0, &ctx->por_el0, err); return err; } -static int restore_poe_context(struct user_ctxs *user) +static int restore_poe_context(struct user_ctxs *user, + struct user_access_state *ua_state) { u64 por_el0; int err = 0; @@ -282,7 +338,7 @@ static int restore_poe_context(struct user_ctxs *user) __get_user_error(por_el0, &(user->poe->por_el0), err); if (!err) - write_sysreg_s(por_el0, SYS_POR_EL0); + ua_state->por_el0 = por_el0; return err; } @@ -850,7 +906,8 @@ invalid: } static int restore_sigframe(struct pt_regs *regs, - struct rt_sigframe __user *sf) + struct rt_sigframe __user *sf, + struct user_access_state *ua_state) { sigset_t set; int i, err; @@ -899,7 +956,7 @@ static int restore_sigframe(struct pt_regs *regs, err = restore_zt_context(&user); if (err == 0 && system_supports_poe() && user.poe) - err = restore_poe_context(&user); + err = restore_poe_context(&user, ua_state); return err; } @@ -908,6 +965,7 @@ SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; + struct user_access_state ua_state; /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -924,12 +982,14 @@ SYSCALL_DEFINE0(rt_sigreturn) if (!access_ok(frame, sizeof (*frame))) goto badframe; - if (restore_sigframe(regs, frame)) + if (restore_sigframe(regs, frame, &ua_state)) goto badframe; if (restore_altstack(&frame->uc.uc_stack)) goto badframe; + restore_user_access_state(&ua_state); + return regs->regs[0]; badframe: @@ -1035,7 +1095,8 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, } static int setup_sigframe(struct rt_sigframe_user_layout *user, - struct pt_regs *regs, sigset_t *set) + struct pt_regs *regs, sigset_t *set, + const struct user_access_state *ua_state) { int i, err = 0; struct rt_sigframe __user *sf = user->sigframe; @@ -1097,10 +1158,9 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, struct poe_context __user *poe_ctx = apply_user_offset(user, user->poe_offset); - err |= preserve_poe_context(poe_ctx); + err |= preserve_poe_context(poe_ctx, ua_state); } - /* ZA state if present */ if (system_supports_sme() && err == 0 && user->za_offset) { struct za_context __user *za_ctx = @@ -1237,9 +1297,6 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, sme_smstop(); } - if (system_supports_poe()) - write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0); - if (ka->sa.sa_flags & SA_RESTORER) sigtramp = ka->sa.sa_restorer; else @@ -1253,6 +1310,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, { struct rt_sigframe_user_layout user; struct rt_sigframe __user *frame; + struct user_access_state ua_state; int err = 0; fpsimd_signal_preserve_current_state(); @@ -1260,13 +1318,14 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, if (get_sigframe(&user, ksig, regs)) return 1; + save_reset_user_access_state(&ua_state); frame = user.sigframe; __put_user_error(0, &frame->uc.uc_flags, err); __put_user_error(NULL, &frame->uc.uc_link, err); err |= __save_altstack(&frame->uc.uc_stack, regs->sp); - err |= setup_sigframe(&user, regs, set); + err |= setup_sigframe(&user, regs, set, &ua_state); if (err == 0) { setup_return(regs, &ksig->ka, &user, usig); if (ksig->ka.sa.sa_flags & SA_SIGINFO) { @@ -1276,6 +1335,11 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, } } + if (err == 0) + set_handler_user_access_state(); + else + restore_user_access_state(&ua_state); + return err; } diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S index 487381164ff6..2def9d0dd3dd 100644 --- a/arch/arm64/kernel/smccc-call.S +++ b/arch/arm64/kernel/smccc-call.S @@ -7,48 +7,19 @@ #include <asm/asm-offsets.h> #include <asm/assembler.h> -#include <asm/thread_info.h> - -/* - * If we have SMCCC v1.3 and (as is likely) no SVE state in - * the registers then set the SMCCC hint bit to say there's no - * need to preserve it. Do this by directly adjusting the SMCCC - * function value which is already stored in x0 ready to be called. - */ -SYM_FUNC_START(__arm_smccc_sve_check) - - ldr_l x16, smccc_has_sve_hint - cbz x16, 2f - - get_current_task x16 - ldr x16, [x16, #TSK_TI_FLAGS] - tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state? - tbnz x16, #TIF_SVE, 2f // Does that state include SVE? - -1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT - -2: ret -SYM_FUNC_END(__arm_smccc_sve_check) -EXPORT_SYMBOL(__arm_smccc_sve_check) .macro SMCCC instr - stp x29, x30, [sp, #-16]! - mov x29, sp -alternative_if ARM64_SVE - bl __arm_smccc_sve_check -alternative_else_nop_endif \instr #0 - ldr x4, [sp, #16] + ldr x4, [sp] stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS] - ldr x4, [sp, #24] + ldr x4, [sp, #8] cbz x4, 1f /* no quirk structure */ ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS] cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6 b.ne 1f str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS] -1: ldp x29, x30, [sp], #16 - ret +1: ret .endm /* diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 55a8e310ea12..58d89d997d05 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -264,8 +264,7 @@ SECTIONS EXIT_DATA } - RUNTIME_CONST(shift, d_hash_shift) - RUNTIME_CONST(ptr, dentry_hashtable) + RUNTIME_CONST_VARIABLES PERCPU_SECTION(L1_CACHE_BYTES) HYPERVISOR_PERCPU_SECTION diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fe0764173cd0..48cafb65d6ac 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -997,6 +997,9 @@ static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu) static int check_vcpu_requests(struct kvm_vcpu *vcpu) { if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) + return -EIO; + if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) kvm_vcpu_sleep(vcpu); @@ -1031,6 +1034,8 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_dirty_ring_check_request(vcpu)) return 0; + + check_nested_vcpu_requests(vcpu); } return 1; @@ -2164,7 +2169,7 @@ static void cpu_hyp_uninit(void *discard) } } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { /* * Most calls to this function are made with migration @@ -2184,7 +2189,7 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { kvm_timer_cpu_down(); kvm_vgic_cpu_down(); @@ -2380,7 +2385,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits) /* * The stub hypercalls are now disabled, so set our local flag to - * prevent a later re-init attempt in kvm_arch_hardware_enable(). + * prevent a later re-init attempt in kvm_arch_enable_virtualization_cpu(). */ __this_cpu_write(kvm_hyp_initialized, 1); preempt_enable(); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 46d52e8a3df3..5310fe1da616 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -338,7 +338,7 @@ static inline void __hyp_sve_save_host(void) struct cpu_sve_state *sve_state = *host_data_ptr(sve_state); sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR); - write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); + write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2); __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl), &sve_state->fpsr, true); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 401af1835be6..fc1866226067 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -24,28 +24,25 @@ .align 11 SYM_CODE_START(__kvm_hyp_init) - ventry __invalid // Synchronous EL2t - ventry __invalid // IRQ EL2t - ventry __invalid // FIQ EL2t - ventry __invalid // Error EL2t + ventry . // Synchronous EL2t + ventry . // IRQ EL2t + ventry . // FIQ EL2t + ventry . // Error EL2t - ventry __invalid // Synchronous EL2h - ventry __invalid // IRQ EL2h - ventry __invalid // FIQ EL2h - ventry __invalid // Error EL2h + ventry . // Synchronous EL2h + ventry . // IRQ EL2h + ventry . // FIQ EL2h + ventry . // Error EL2h ventry __do_hyp_init // Synchronous 64-bit EL1 - ventry __invalid // IRQ 64-bit EL1 - ventry __invalid // FIQ 64-bit EL1 - ventry __invalid // Error 64-bit EL1 + ventry . // IRQ 64-bit EL1 + ventry . // FIQ 64-bit EL1 + ventry . // Error 64-bit EL1 - ventry __invalid // Synchronous 32-bit EL1 - ventry __invalid // IRQ 32-bit EL1 - ventry __invalid // FIQ 32-bit EL1 - ventry __invalid // Error 32-bit EL1 - -__invalid: - b . + ventry . // Synchronous 32-bit EL1 + ventry . // IRQ 32-bit EL1 + ventry . // FIQ 32-bit EL1 + ventry . // Error 32-bit EL1 /* * Only uses x0..x3 so as to not clobber callee-saved SMCCC registers. @@ -76,6 +73,13 @@ __do_hyp_init: eret SYM_CODE_END(__kvm_hyp_init) +SYM_CODE_START_LOCAL(__kvm_init_el2_state) + /* Initialize EL2 CPU state to sane values. */ + init_el2_state // Clobbers x0..x2 + finalise_el2_state + ret +SYM_CODE_END(__kvm_init_el2_state) + /* * Initialize the hypervisor in EL2. * @@ -102,9 +106,12 @@ SYM_CODE_START_LOCAL(___kvm_hyp_init) // TPIDR_EL2 is used to preserve x0 across the macro maze... isb msr tpidr_el2, x0 - init_el2_state - finalise_el2_state + str lr, [x0, #NVHE_INIT_TMP] + + bl __kvm_init_el2_state + mrs x0, tpidr_el2 + ldr lr, [x0, #NVHE_INIT_TMP] 1: ldr x1, [x0, #NVHE_INIT_TPIDR_EL2] @@ -199,9 +206,8 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) 2: msr SPsel, #1 // We want to use SP_EL{1,2} - /* Initialize EL2 CPU state to sane values. */ - init_el2_state // Clobbers x0..x2 - finalise_el2_state + bl __kvm_init_el2_state + __init_el2_nvhe_prepare_eret /* Enable MMU, set vectors and stack. */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 87692b566d90..fefc89209f9e 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -33,7 +33,7 @@ static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu) */ sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true); - write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); + write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2); } static void __hyp_sve_restore_host(void) @@ -45,10 +45,11 @@ static void __hyp_sve_restore_host(void) * the host. The layout of the data when saving the sve state depends * on the VL, so use a consistent (i.e., the maximum) host VL. * - * Setting ZCR_EL2 to ZCR_ELx_LEN_MASK sets the effective length - * supported by the system (or limited at EL3). + * Note that this constrains the PE to the maximum shared VL + * that was discovered, if we wish to use larger VLs this will + * need to be revisited. */ - write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); + write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2); __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl), &sve_state->fpsr, true); @@ -488,7 +489,8 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_SVE: cpacr_clear_set(0, CPACR_ELx_ZEN); isb(); - sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); + sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, + SYS_ZCR_EL2); break; case ESR_ELx_EC_IABT_LOW: case ESR_ELx_EC_DABT_LOW: diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 187a5f4d56c0..077d4098548d 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -574,12 +574,14 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, unlock: hyp_spin_unlock(&vm_table_lock); - if (ret) + if (ret) { unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); + return ret; + } hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu); - return ret; + return 0; } static void diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 5763d979d8ca..ee6573befb81 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -317,7 +317,7 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) * to the guest, and hide SSBS so that the * guest stays protected. */ - if (cpus_have_final_cap(ARM64_SSBS)) + if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SSBS, IMP)) break; fallthrough; case SPECTRE_UNAFFECTED: @@ -428,7 +428,7 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) * Convert the workaround level into an easy-to-compare number, where higher * values mean better protection. */ -static int get_kernel_wa_level(u64 regid) +static int get_kernel_wa_level(struct kvm_vcpu *vcpu, u64 regid) { switch (regid) { case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: @@ -449,7 +449,7 @@ static int get_kernel_wa_level(u64 regid) * don't have any FW mitigation if SSBS is there at * all times. */ - if (cpus_have_final_cap(ARM64_SSBS)) + if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SSBS, IMP)) return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; fallthrough; case SPECTRE_UNAFFECTED: @@ -486,7 +486,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: - val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; + val = get_kernel_wa_level(vcpu, reg->id) & KVM_REG_FEATURE_LEVEL_MASK; break; case KVM_REG_ARM_STD_BMAP: val = READ_ONCE(smccc_feat->std_bmap); @@ -588,7 +588,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (val & ~KVM_REG_FEATURE_LEVEL_MASK) return -EINVAL; - if (get_kernel_wa_level(reg->id) < val) + if (get_kernel_wa_level(vcpu, reg->id) < val) return -EINVAL; return 0; @@ -624,7 +624,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the * other way around. */ - if (get_kernel_wa_level(reg->id) < wa_level) + if (get_kernel_wa_level(vcpu, reg->id) < wa_level) return -EINVAL; return 0; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index a509b63bd4dd..0f7658aefa1a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -328,9 +328,10 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 may_block)); } -void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) +void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, + u64 size, bool may_block) { - __unmap_stage2_range(mmu, start, size, true); + __unmap_stage2_range(mmu, start, size, may_block); } void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) @@ -1015,7 +1016,7 @@ static void stage2_unmap_memslot(struct kvm *kvm, if (!(vma->vm_flags & VM_PFNMAP)) { gpa_t gpa = addr + (vm_start - memslot->userspace_addr); - kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start); + kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start, true); } hva = vm_end; } while (hva < reg_end); @@ -1042,7 +1043,7 @@ void stage2_unmap_vm(struct kvm *kvm) kvm_for_each_memslot(memslot, bkt, slots) stage2_unmap_memslot(kvm, memslot); - kvm_nested_s2_unmap(kvm); + kvm_nested_s2_unmap(kvm, true); write_unlock(&kvm->mmu_lock); mmap_read_unlock(current->mm); @@ -1912,7 +1913,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) (range->end - range->start) << PAGE_SHIFT, range->may_block); - kvm_nested_s2_unmap(kvm); + kvm_nested_s2_unmap(kvm, range->may_block); return false; } @@ -2179,8 +2180,8 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, phys_addr_t size = slot->npages << PAGE_SHIFT; write_lock(&kvm->mmu_lock); - kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size); - kvm_nested_s2_unmap(kvm); + kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size, true); + kvm_nested_s2_unmap(kvm, true); write_unlock(&kvm->mmu_lock); } diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index f9e30dd34c7a..c4b17d90fc49 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -632,9 +632,9 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu) /* Set the scene for the next search */ kvm->arch.nested_mmus_next = (i + 1) % kvm->arch.nested_mmus_size; - /* Clear the old state */ + /* Make sure we don't forget to do the laundry */ if (kvm_s2_mmu_valid(s2_mmu)) - kvm_stage2_unmap_range(s2_mmu, 0, kvm_phys_size(s2_mmu)); + s2_mmu->pending_unmap = true; /* * The virtual VMID (modulo CnP) will be used as a key when matching @@ -650,6 +650,16 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu) out: atomic_inc(&s2_mmu->refcnt); + + /* + * Set the vCPU request to perform an unmap, even if the pending unmap + * originates from another vCPU. This guarantees that the MMU has been + * completely unmapped before any vCPU actually uses it, and allows + * multiple vCPUs to lend a hand with completing the unmap. + */ + if (s2_mmu->pending_unmap) + kvm_make_request(KVM_REQ_NESTED_S2_UNMAP, vcpu); + return s2_mmu; } @@ -663,6 +673,13 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu) void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu) { + /* + * The vCPU kept its reference on the MMU after the last put, keep + * rolling with it. + */ + if (vcpu->arch.hw_mmu) + return; + if (is_hyp_ctxt(vcpu)) { vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; } else { @@ -674,10 +691,18 @@ void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu) void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu) { - if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu)) { + /* + * Keep a reference on the associated stage-2 MMU if the vCPU is + * scheduling out and not in WFI emulation, suggesting it is likely to + * reuse the MMU sometime soon. + */ + if (vcpu->scheduled_out && !vcpu_get_flag(vcpu, IN_WFI)) + return; + + if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu)) atomic_dec(&vcpu->arch.hw_mmu->refcnt); - vcpu->arch.hw_mmu = NULL; - } + + vcpu->arch.hw_mmu = NULL; } /* @@ -730,7 +755,7 @@ void kvm_nested_s2_wp(struct kvm *kvm) } } -void kvm_nested_s2_unmap(struct kvm *kvm) +void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block) { int i; @@ -740,7 +765,7 @@ void kvm_nested_s2_unmap(struct kvm *kvm) struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; if (kvm_s2_mmu_valid(mmu)) - kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu)); + kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), may_block); } } @@ -1184,3 +1209,17 @@ int kvm_init_nv_sysregs(struct kvm *kvm) return 0; } + +void check_nested_vcpu_requests(struct kvm_vcpu *vcpu) +{ + if (kvm_check_request(KVM_REQ_NESTED_S2_UNMAP, vcpu)) { + struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu; + + write_lock(&vcpu->kvm->mmu_lock); + if (mmu->pending_unmap) { + kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), true); + mmu->pending_unmap = false; + } + write_unlock(&vcpu->kvm->mmu_lock); + } +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index dad88e31f953..ff8c4e1b847e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1527,6 +1527,14 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR); break; case SYS_ID_AA64PFR2_EL1: /* We only expose FPMR */ @@ -1550,7 +1558,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK; break; case SYS_ID_AA64MMFR3_EL1: - val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE; + val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE | + ID_AA64MMFR3_EL1_S1PIE; break; case SYS_ID_MMFR4_EL1: val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX); @@ -1985,7 +1994,7 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * one cache line. */ if (kvm_has_mte(vcpu->kvm)) - clidr |= 2 << CLIDR_TTYPE_SHIFT(loc); + clidr |= 2ULL << CLIDR_TTYPE_SHIFT(loc); __vcpu_sys_reg(vcpu, r->reg) = clidr; @@ -2376,7 +2385,19 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64PFR0_EL1_RAS | ID_AA64PFR0_EL1_AdvSIMD | ID_AA64PFR0_EL1_FP), }, - ID_SANITISED(ID_AA64PFR1_EL1), + ID_WRITABLE(ID_AA64PFR1_EL1, ~(ID_AA64PFR1_EL1_PFAR | + ID_AA64PFR1_EL1_DF2 | + ID_AA64PFR1_EL1_MTEX | + ID_AA64PFR1_EL1_THE | + ID_AA64PFR1_EL1_GCS | + ID_AA64PFR1_EL1_MTE_frac | + ID_AA64PFR1_EL1_NMI | + ID_AA64PFR1_EL1_RNDR_trap | + ID_AA64PFR1_EL1_SME | + ID_AA64PFR1_EL1_RES0 | + ID_AA64PFR1_EL1_MPAM_frac | + ID_AA64PFR1_EL1_RAS_frac | + ID_AA64PFR1_EL1_MTE)), ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR), ID_UNALLOCATED(4,3), ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0), @@ -2390,7 +2411,21 @@ static const struct sys_reg_desc sys_reg_descs[] = { .get_user = get_id_reg, .set_user = set_id_aa64dfr0_el1, .reset = read_sanitised_id_aa64dfr0_el1, - .val = ID_AA64DFR0_EL1_PMUVer_MASK | + /* + * Prior to FEAT_Debugv8.9, the architecture defines context-aware + * breakpoints (CTX_CMPs) as the highest numbered breakpoints (BRPs). + * KVM does not trap + emulate the breakpoint registers, and as such + * cannot support a layout that misaligns with the underlying hardware. + * While it may be possible to describe a subset that aligns with + * hardware, just prevent changes to BRPs and CTX_CMPs altogether for + * simplicity. + * + * See DDI0487K.a, section D2.8.3 Breakpoint types and linking + * of breakpoints for more details. + */ + .val = ID_AA64DFR0_EL1_DoubleLock_MASK | + ID_AA64DFR0_EL1_WRPs_MASK | + ID_AA64DFR0_EL1_PMUVer_MASK | ID_AA64DFR0_EL1_DebugVer_MASK, }, ID_SANITISED(ID_AA64DFR1_EL1), ID_UNALLOCATED(5,2), @@ -2433,6 +2468,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64MMFR2_EL1_NV | ID_AA64MMFR2_EL1_CCIDX)), ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX | + ID_AA64MMFR3_EL1_S1PIE | ID_AA64MMFR3_EL1_S1POE)), ID_SANITISED(ID_AA64MMFR4_EL1), ID_UNALLOCATED(7,5), @@ -2903,7 +2939,7 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, * Drop all shadow S2s, resulting in S1/S2 TLBIs for each of the * corresponding VMIDs. */ - kvm_nested_s2_unmap(vcpu->kvm); + kvm_nested_s2_unmap(vcpu->kvm, true); write_unlock(&vcpu->kvm->mmu_lock); @@ -2955,7 +2991,30 @@ union tlbi_info { static void s2_mmu_unmap_range(struct kvm_s2_mmu *mmu, const union tlbi_info *info) { - kvm_stage2_unmap_range(mmu, info->range.start, info->range.size); + /* + * The unmap operation is allowed to drop the MMU lock and block, which + * means that @mmu could be used for a different context than the one + * currently being invalidated. + * + * This behavior is still safe, as: + * + * 1) The vCPU(s) that recycled the MMU are responsible for invalidating + * the entire MMU before reusing it, which still honors the intent + * of a TLBI. + * + * 2) Until the guest TLBI instruction is 'retired' (i.e. increment PC + * and ERET to the guest), other vCPUs are allowed to use stale + * translations. + * + * 3) Accidentally unmapping an unrelated MMU context is nonfatal, and + * at worst may cause more aborts for shadow stage-2 fills. + * + * Dropping the MMU lock also implies that shadow stage-2 fills could + * happen behind the back of the TLBI. This is still safe, though, as + * the L1 needs to put its stage-2 in a consistent state before doing + * the TLBI. + */ + kvm_stage2_unmap_range(mmu, info->range.start, info->range.size, true); } static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, @@ -3050,7 +3109,11 @@ static void s2_mmu_unmap_ipa(struct kvm_s2_mmu *mmu, max_size = compute_tlb_inval_range(mmu, info->ipa.addr); base_addr &= ~(max_size - 1); - kvm_stage2_unmap_range(mmu, base_addr, max_size); + /* + * See comment in s2_mmu_unmap_range() for why this is allowed to + * reschedule. + */ + kvm_stage2_unmap_range(mmu, base_addr, max_size, true); } static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index e7c53e8af3d1..48c952563e85 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -417,8 +417,28 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) kfree(vgic_cpu->private_irqs); vgic_cpu->private_irqs = NULL; - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + /* + * If this vCPU is being destroyed because of a failed creation + * then unregister the redistributor to avoid leaving behind a + * dangling pointer to the vCPU struct. + * + * vCPUs that have been successfully created (i.e. added to + * kvm->vcpu_array) get unregistered in kvm_vgic_destroy(), as + * this function gets called while holding kvm->arch.config_lock + * in the VM teardown path and would otherwise introduce a lock + * inversion w.r.t. kvm->srcu. + * + * vCPUs that failed creation are torn down outside of the + * kvm->arch.config_lock and do not get unregistered in + * kvm_vgic_destroy(), meaning it is both safe and necessary to + * do so here. + */ + if (kvm_get_vcpu_by_id(vcpu->kvm, vcpu->vcpu_id) != vcpu) + vgic_unregister_redist_iodev(vcpu); + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + } } void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -524,22 +544,31 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (ret) goto out; - dist->ready = true; dist_base = dist->vgic_dist_base; mutex_unlock(&kvm->arch.config_lock); ret = vgic_register_dist_iodev(kvm, dist_base, type); - if (ret) + if (ret) { kvm_err("Unable to register VGIC dist MMIO regions\n"); + goto out_slots; + } + /* + * kvm_io_bus_register_dev() guarantees all readers see the new MMIO + * registration before returning through synchronize_srcu(), which also + * implies a full memory barrier. As such, marking the distributor as + * 'ready' here is guaranteed to be ordered after all vCPUs having seen + * a completely configured distributor. + */ + dist->ready = true; goto out_slots; out: mutex_unlock(&kvm->arch.config_lock); out_slots: - mutex_unlock(&kvm->slots_lock); - if (ret) - kvm_vgic_destroy(kvm); + kvm_vm_dead(kvm); + + mutex_unlock(&kvm->slots_lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 1d26bb5b02f4..5f4f57aaa23e 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -236,7 +236,12 @@ static int vgic_set_common_attr(struct kvm_device *dev, mutex_lock(&dev->kvm->arch.config_lock); - if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis) + /* + * Either userspace has already configured NR_IRQS or + * the vgic has already been initialized and vgic_init() + * supplied a default amount of SPIs. + */ + if (dev->kvm->arch.vgic.nr_spis) ret = -EBUSY; else dev->kvm->arch.vgic.nr_spis = diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 8bbd0b20136a..5db82bfc9dc1 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -2220,7 +2220,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); if (flags & BPF_TRAMP_F_CALL_ORIG) { - emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); + /* for the first pass, assume the worst case */ + if (!ctx->image) + ctx->idx += 4; + else + emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); emit_call((const u64)__bpf_tramp_enter, ctx); } @@ -2264,7 +2268,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_CALL_ORIG) { im->ip_epilogue = ctx->ro_image + ctx->idx; - emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); + /* for the first pass, assume the worst case */ + if (!ctx->image) + ctx->idx += 4; + else + emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); emit_call((const u64)__bpf_tramp_exit, ctx); } diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 0eb0436ad4ce..bb35c34f86d2 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -25,6 +25,8 @@ config LOONGARCH select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_SET_MEMORY + select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_INLINE_READ_LOCK if !PREEMPTION select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION @@ -82,6 +84,7 @@ config LOONGARCH select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_DEVICES + select GENERIC_CPU_VULNERABILITIES select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY select GENERIC_IOREMAP if !ARCH_IOREMAP @@ -147,7 +150,7 @@ config LOONGARCH select HAVE_LIVEPATCH select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI - select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB && !CC_IS_CLANG + select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -267,7 +270,7 @@ config AS_HAS_FCSR_CLASS def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0) config AS_HAS_THIN_ADD_SUB - def_bool $(cc-option,-Wa$(comma)-mthin-add-sub) + def_bool $(cc-option,-Wa$(comma)-mthin-add-sub) || AS_IS_LLVM config AS_HAS_LSX_EXTENSION def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0) diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c index 3eebea3a7b47..b7d9782827f5 100644 --- a/arch/loongarch/crypto/crc32-loongarch.c +++ b/arch/loongarch/crypto/crc32-loongarch.c @@ -13,7 +13,7 @@ #include <crypto/internal/hash.h> #include <asm/cpu-features.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #define _CRC32(crc, value, size, type) \ do { \ diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index 99af8b3160a8..c86f0ab922ec 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -15,6 +15,7 @@ #define __LL "ll.w " #define __SC "sc.w " #define __AMADD "amadd.w " +#define __AMOR "amor.w " #define __AMAND_DB "amand_db.w " #define __AMOR_DB "amor_db.w " #define __AMXOR_DB "amxor_db.w " @@ -22,6 +23,7 @@ #define __LL "ll.d " #define __SC "sc.d " #define __AMADD "amadd.d " +#define __AMOR "amor.d " #define __AMAND_DB "amand_db.d " #define __AMOR_DB "amor_db.d " #define __AMXOR_DB "amxor_db.d " diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index 6d5846dd075c..7657e016233f 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -26,6 +26,10 @@ struct loongson_board_info { #define NR_WORDS DIV_ROUND_UP(NR_CPUS, BITS_PER_LONG) +/* + * The "core" of cores_per_node and cores_per_package stands for a + * logical core, which means in a SMT system it stands for a thread. + */ struct loongson_system_configuration { int nr_cpus; int nr_nodes; diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index 16a716f88a5c..fc83bb32f9f0 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -51,6 +51,7 @@ #define cpu_has_lbt_mips cpu_opt(LOONGARCH_CPU_LBT_MIPS) #define cpu_has_lbt (cpu_has_lbt_x86|cpu_has_lbt_arm|cpu_has_lbt_mips) #define cpu_has_csr cpu_opt(LOONGARCH_CPU_CSR) +#define cpu_has_iocsr cpu_opt(LOONGARCH_CPU_IOCSR) #define cpu_has_tlb cpu_opt(LOONGARCH_CPU_TLB) #define cpu_has_watch cpu_opt(LOONGARCH_CPU_WATCH) #define cpu_has_vint cpu_opt(LOONGARCH_CPU_VINT) @@ -65,6 +66,7 @@ #define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID) #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) #define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) +#define cpu_has_lspw cpu_opt(LOONGARCH_CPU_LSPW) #define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT) #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 843f9c4ec980..98cf4d7b4b0a 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -87,19 +87,21 @@ enum cpu_type_enum { #define CPU_FEATURE_LBT_MIPS 12 /* CPU has MIPS Binary Translation */ #define CPU_FEATURE_TLB 13 /* CPU has TLB */ #define CPU_FEATURE_CSR 14 /* CPU has CSR */ -#define CPU_FEATURE_WATCH 15 /* CPU has watchpoint registers */ -#define CPU_FEATURE_VINT 16 /* CPU has vectored interrupts */ -#define CPU_FEATURE_CSRIPI 17 /* CPU has CSR-IPI */ -#define CPU_FEATURE_EXTIOI 18 /* CPU has EXT-IOI */ -#define CPU_FEATURE_PREFETCH 19 /* CPU has prefetch instructions */ -#define CPU_FEATURE_PMP 20 /* CPU has perfermance counter */ -#define CPU_FEATURE_SCALEFREQ 21 /* CPU supports cpufreq scaling */ -#define CPU_FEATURE_FLATMODE 22 /* CPU has flat mode */ -#define CPU_FEATURE_EIODECODE 23 /* CPU has EXTIOI interrupt pin decode mode */ -#define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */ -#define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */ -#define CPU_FEATURE_PTW 26 /* CPU has hardware page table walker */ -#define CPU_FEATURE_AVECINT 27 /* CPU has avec interrupt */ +#define CPU_FEATURE_IOCSR 15 /* CPU has IOCSR */ +#define CPU_FEATURE_WATCH 16 /* CPU has watchpoint registers */ +#define CPU_FEATURE_VINT 17 /* CPU has vectored interrupts */ +#define CPU_FEATURE_CSRIPI 18 /* CPU has CSR-IPI */ +#define CPU_FEATURE_EXTIOI 19 /* CPU has EXT-IOI */ +#define CPU_FEATURE_PREFETCH 20 /* CPU has prefetch instructions */ +#define CPU_FEATURE_PMP 21 /* CPU has perfermance counter */ +#define CPU_FEATURE_SCALEFREQ 22 /* CPU supports cpufreq scaling */ +#define CPU_FEATURE_FLATMODE 23 /* CPU has flat mode */ +#define CPU_FEATURE_EIODECODE 24 /* CPU has EXTIOI interrupt pin decode mode */ +#define CPU_FEATURE_GUESTID 25 /* CPU has GuestID feature */ +#define CPU_FEATURE_HYPERVISOR 26 /* CPU has hypervisor (running in VM) */ +#define CPU_FEATURE_PTW 27 /* CPU has hardware page table walker */ +#define CPU_FEATURE_LSPW 28 /* CPU has LSPW (lddir/ldpte instructions) */ +#define CPU_FEATURE_AVECINT 29 /* CPU has AVEC interrupt */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -115,6 +117,7 @@ enum cpu_type_enum { #define LOONGARCH_CPU_LBT_ARM BIT_ULL(CPU_FEATURE_LBT_ARM) #define LOONGARCH_CPU_LBT_MIPS BIT_ULL(CPU_FEATURE_LBT_MIPS) #define LOONGARCH_CPU_TLB BIT_ULL(CPU_FEATURE_TLB) +#define LOONGARCH_CPU_IOCSR BIT_ULL(CPU_FEATURE_IOCSR) #define LOONGARCH_CPU_CSR BIT_ULL(CPU_FEATURE_CSR) #define LOONGARCH_CPU_WATCH BIT_ULL(CPU_FEATURE_WATCH) #define LOONGARCH_CPU_VINT BIT_ULL(CPU_FEATURE_VINT) @@ -128,6 +131,7 @@ enum cpu_type_enum { #define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID) #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) #define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW) +#define LOONGARCH_CPU_LSPW BIT_ULL(CPU_FEATURE_LSPW) #define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT) #endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index cd6084f4e153..7f52bd31b9d4 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -16,7 +16,7 @@ #define XRANGE_SHIFT (48) /* Valid address length */ -#define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) +#define XRANGE_SHADOW_SHIFT min(cpu_vabits, VA_BITS) /* Used for taking out the valid address */ #define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0) /* One segment whole address space size */ @@ -25,6 +25,7 @@ /* 64-bit segment value. */ #define XKPRANGE_UC_SEG (0x8000) #define XKPRANGE_CC_SEG (0x9000) +#define XKPRANGE_WC_SEG (0xa000) #define XKVRANGE_VC_SEG (0xffff) /* Cached */ @@ -41,20 +42,28 @@ #define XKPRANGE_UC_SHADOW_SIZE (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) #define XKPRANGE_UC_SHADOW_END (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE) +/* WriteCombine */ +#define XKPRANGE_WC_START WRITECOMBINE_BASE +#define XKPRANGE_WC_SIZE XRANGE_SIZE +#define XKPRANGE_WC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END +#define XKPRANGE_WC_SHADOW_SIZE (XKPRANGE_WC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKPRANGE_WC_SHADOW_END (XKPRANGE_WC_KASAN_OFFSET + XKPRANGE_WC_SHADOW_SIZE) + /* VMALLOC (Cached or UnCached) */ #define XKVRANGE_VC_START MODULES_VADDR #define XKVRANGE_VC_SIZE round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE) -#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END +#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_WC_SHADOW_END #define XKVRANGE_VC_SHADOW_SIZE (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) #define XKVRANGE_VC_SHADOW_END (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE) /* KAsan shadow memory start right after vmalloc. */ #define KASAN_SHADOW_START round_up(KFENCE_AREA_END, PGDIR_SIZE) #define KASAN_SHADOW_SIZE (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET) -#define KASAN_SHADOW_END round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE) +#define KASAN_SHADOW_END (round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE) - 1) #define XKPRANGE_CC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET) #define XKPRANGE_UC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET) +#define XKPRANGE_WC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_WC_KASAN_OFFSET) #define XKVRANGE_VC_SHADOW_OFFSET (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET) extern bool kasan_early_stage; diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 04bf1a7f903a..64ad277e096e 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -62,6 +62,7 @@ #define LOONGARCH_CPUCFG1 0x1 #define CPUCFG1_ISGR32 BIT(0) #define CPUCFG1_ISGR64 BIT(1) +#define CPUCFG1_ISA GENMASK(1, 0) #define CPUCFG1_PAGING BIT(2) #define CPUCFG1_IOCSR BIT(3) #define CPUCFG1_PABITS GENMASK(11, 4) @@ -249,7 +250,7 @@ #define CSR_ESTAT_IS_WIDTH 15 #define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT) -#define LOONGARCH_CSR_ERA 0x6 /* ERA */ +#define LOONGARCH_CSR_ERA 0x6 /* Exception return address */ #define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */ diff --git a/arch/loongarch/include/asm/mmu_context.h b/arch/loongarch/include/asm/mmu_context.h index 9f97c3453b9c..304363bd3935 100644 --- a/arch/loongarch/include/asm/mmu_context.h +++ b/arch/loongarch/include/asm/mmu_context.h @@ -49,12 +49,12 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) /* Normal, classic get_new_mmu_context */ static inline void -get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) +get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, bool *need_flush) { u64 asid = asid_cache(cpu); if (!((++asid) & cpu_asid_mask(&cpu_data[cpu]))) - local_flush_tlb_user(); /* start new asid cycle */ + *need_flush = true; /* start new asid cycle */ cpu_context(cpu, mm) = asid_cache(cpu) = asid; } @@ -74,21 +74,34 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) return 0; } +static inline void atomic_update_pgd_asid(unsigned long asid, unsigned long pgdl) +{ + __asm__ __volatile__( + "csrwr %[pgdl_val], %[pgdl_reg] \n\t" + "csrwr %[asid_val], %[asid_reg] \n\t" + : [asid_val] "+r" (asid), [pgdl_val] "+r" (pgdl) + : [asid_reg] "i" (LOONGARCH_CSR_ASID), [pgdl_reg] "i" (LOONGARCH_CSR_PGDL) + : "memory" + ); +} + static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { + bool need_flush = false; unsigned int cpu = smp_processor_id(); /* Check if our ASID is of an older version and thus invalid */ if (!asid_valid(next, cpu)) - get_new_mmu_context(next, cpu); - - write_csr_asid(cpu_asid(cpu, next)); + get_new_mmu_context(next, cpu, &need_flush); if (next != &init_mm) - csr_write64((unsigned long)next->pgd, LOONGARCH_CSR_PGDL); + atomic_update_pgd_asid(cpu_asid(cpu, next), (unsigned long)next->pgd); else - csr_write64((unsigned long)invalid_pg_dir, LOONGARCH_CSR_PGDL); + atomic_update_pgd_asid(cpu_asid(cpu, next), (unsigned long)invalid_pg_dir); + + if (need_flush) + local_flush_tlb_user(); /* Flush tlb after update ASID */ /* * Mark current->active_mm as not "active" anymore. @@ -135,9 +148,15 @@ drop_mmu_context(struct mm_struct *mm, unsigned int cpu) asid = read_csr_asid() & cpu_asid_mask(¤t_cpu_data); if (asid == cpu_asid(cpu, mm)) { + bool need_flush = false; + if (!current->mm || (current->mm == mm)) { - get_new_mmu_context(mm, cpu); + get_new_mmu_context(mm, cpu, &need_flush); + write_csr_asid(cpu_asid(cpu, mm)); + if (need_flush) + local_flush_tlb_user(); /* Flush tlb after update ASID */ + goto out; } } diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index e85df33f11c7..8f21567a3188 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -113,10 +113,7 @@ struct page *tlb_virt_to_page(unsigned long kaddr); extern int __virt_addr_valid(volatile void *kaddr); #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr)) -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 8f290e5546cf..87be9b14e9da 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -68,75 +68,6 @@ PERCPU_OP(and, and, &) PERCPU_OP(or, or, |) #undef PERCPU_OP -static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size) -{ - unsigned long ret; - - switch (size) { - case 1: - __asm__ __volatile__ ("ldx.b %[ret], $r21, %[ptr] \n" - : [ret] "=&r"(ret) - : [ptr] "r"(ptr) - : "memory"); - break; - case 2: - __asm__ __volatile__ ("ldx.h %[ret], $r21, %[ptr] \n" - : [ret] "=&r"(ret) - : [ptr] "r"(ptr) - : "memory"); - break; - case 4: - __asm__ __volatile__ ("ldx.w %[ret], $r21, %[ptr] \n" - : [ret] "=&r"(ret) - : [ptr] "r"(ptr) - : "memory"); - break; - case 8: - __asm__ __volatile__ ("ldx.d %[ret], $r21, %[ptr] \n" - : [ret] "=&r"(ret) - : [ptr] "r"(ptr) - : "memory"); - break; - default: - ret = 0; - BUILD_BUG(); - } - - return ret; -} - -static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size) -{ - switch (size) { - case 1: - __asm__ __volatile__("stx.b %[val], $r21, %[ptr] \n" - : - : [val] "r" (val), [ptr] "r" (ptr) - : "memory"); - break; - case 2: - __asm__ __volatile__("stx.h %[val], $r21, %[ptr] \n" - : - : [val] "r" (val), [ptr] "r" (ptr) - : "memory"); - break; - case 4: - __asm__ __volatile__("stx.w %[val], $r21, %[ptr] \n" - : - : [val] "r" (val), [ptr] "r" (ptr) - : "memory"); - break; - case 8: - __asm__ __volatile__("stx.d %[val], $r21, %[ptr] \n" - : - : [val] "r" (val), [ptr] "r" (ptr) - : "memory"); - break; - default: - BUILD_BUG(); - } -} - static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size) { switch (size) { @@ -157,6 +88,33 @@ static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, return 0; } +#define __pcpu_op_1(op) op ".b " +#define __pcpu_op_2(op) op ".h " +#define __pcpu_op_4(op) op ".w " +#define __pcpu_op_8(op) op ".d " + +#define _percpu_read(size, _pcp) \ +({ \ + typeof(_pcp) __pcp_ret; \ + \ + __asm__ __volatile__( \ + __pcpu_op_##size("ldx") "%[ret], $r21, %[ptr] \n" \ + : [ret] "=&r"(__pcp_ret) \ + : [ptr] "r"(&(_pcp)) \ + : "memory"); \ + \ + __pcp_ret; \ +}) + +#define _percpu_write(size, _pcp, _val) \ +do { \ + __asm__ __volatile__( \ + __pcpu_op_##size("stx") "%[val], $r21, %[ptr] \n" \ + : \ + : [val] "r"(_val), [ptr] "r"(&(_pcp)) \ + : "memory"); \ +} while (0) + /* this_cpu_cmpxchg */ #define _protect_cmpxchg_local(pcp, o, n) \ ({ \ @@ -167,18 +125,6 @@ static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, __ret; \ }) -#define _percpu_read(pcp) \ -({ \ - typeof(pcp) __retval; \ - __retval = (typeof(pcp))__percpu_read(&(pcp), sizeof(pcp)); \ - __retval; \ -}) - -#define _percpu_write(pcp, val) \ -do { \ - __percpu_write(&(pcp), (unsigned long)(val), sizeof(pcp)); \ -} while (0) \ - #define _pcp_protect(operation, pcp, val) \ ({ \ typeof(pcp) __retval; \ @@ -215,15 +161,15 @@ do { \ #define this_cpu_or_4(pcp, val) _percpu_or(pcp, val) #define this_cpu_or_8(pcp, val) _percpu_or(pcp, val) -#define this_cpu_read_1(pcp) _percpu_read(pcp) -#define this_cpu_read_2(pcp) _percpu_read(pcp) -#define this_cpu_read_4(pcp) _percpu_read(pcp) -#define this_cpu_read_8(pcp) _percpu_read(pcp) +#define this_cpu_read_1(pcp) _percpu_read(1, pcp) +#define this_cpu_read_2(pcp) _percpu_read(2, pcp) +#define this_cpu_read_4(pcp) _percpu_read(4, pcp) +#define this_cpu_read_8(pcp) _percpu_read(8, pcp) -#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val) -#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val) -#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val) -#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_1(pcp, val) _percpu_write(1, pcp, val) +#define this_cpu_write_2(pcp, val) _percpu_write(2, pcp, val) +#define this_cpu_write_4(pcp, val) _percpu_write(4, pcp, val) +#define this_cpu_write_8(pcp, val) _percpu_write(8, pcp, val) #define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val) #define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val) diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 4e2d6b7ca2ee..a7b9c9e73593 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -10,6 +10,7 @@ #define __HAVE_ARCH_PMD_ALLOC_ONE #define __HAVE_ARCH_PUD_ALLOC_ONE +#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL #include <asm-generic/pgalloc.h> static inline void pmd_populate_kernel(struct mm_struct *mm, @@ -44,6 +45,16 @@ extern void pagetable_init(void); extern pgd_t *pgd_alloc(struct mm_struct *mm); +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) +{ + pte_t *pte = __pte_alloc_one_kernel(mm); + + if (pte) + kernel_pte_init(pte); + + return pte; +} + #define __pte_free_tlb(tlb, pte, address) \ do { \ pagetable_pte_dtor(page_ptdesc(pte)); \ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 85431f20a14d..20714b73f14c 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -269,6 +269,7 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pm extern void pgd_init(void *addr); extern void pud_init(void *addr); extern void pmd_init(void *addr); +extern void kernel_pte_init(void *addr); /* * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that @@ -325,45 +326,17 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) { WRITE_ONCE(*ptep, pteval); - if (pte_val(pteval) & _PAGE_GLOBAL) { - pte_t *buddy = ptep_buddy(ptep); - /* - * Make sure the buddy is global too (if it's !none, - * it better already be global) - */ #ifdef CONFIG_SMP - /* - * For SMP, multiple CPUs can race, so we need to do - * this atomically. - */ - unsigned long page_global = _PAGE_GLOBAL; - unsigned long tmp; - - __asm__ __volatile__ ( - "1:" __LL "%[tmp], %[buddy] \n" - " bnez %[tmp], 2f \n" - " or %[tmp], %[tmp], %[global] \n" - __SC "%[tmp], %[buddy] \n" - " beqz %[tmp], 1b \n" - " nop \n" - "2: \n" - __WEAK_LLSC_MB - : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) - : [global] "r" (page_global)); -#else /* !CONFIG_SMP */ - if (pte_none(ptep_get(buddy))) - WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL)); -#endif /* CONFIG_SMP */ - } + if (pte_val(pteval) & _PAGE_GLOBAL) + DBAR(0b11000); /* o_wrw = 0b11000 */ +#endif } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - /* Preserve global status for the pair */ - if (pte_val(ptep_get(ptep_buddy(ptep))) & _PAGE_GLOBAL) - set_pte(ptep, __pte(_PAGE_GLOBAL)); - else - set_pte(ptep, __pte(0)); + pte_t pte = ptep_get(ptep); + pte_val(pte) &= _PAGE_GLOBAL; + set_pte(ptep, pte); } #define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) diff --git a/arch/loongarch/include/asm/set_memory.h b/arch/loongarch/include/asm/set_memory.h new file mode 100644 index 000000000000..d70505b6676c --- /dev/null +++ b/arch/loongarch/include/asm/set_memory.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_LOONGARCH_SET_MEMORY_H +#define _ASM_LOONGARCH_SET_MEMORY_H + +/* + * Functions to change memory attributes. + */ +int set_memory_x(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); + +bool kernel_page_present(struct page *page); +int set_direct_map_default_noflush(struct page *page); +int set_direct_map_invalid_noflush(struct page *page); + +#endif /* _ASM_LOONGARCH_SET_MEMORY_H */ diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h index 6955a7cb2c65..2b34e56cfa9e 100644 --- a/arch/loongarch/include/uapi/asm/hwcap.h +++ b/arch/loongarch/include/uapi/asm/hwcap.h @@ -17,5 +17,6 @@ #define HWCAP_LOONGARCH_LBT_ARM (1 << 11) #define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) #define HWCAP_LOONGARCH_PTW (1 << 13) +#define HWCAP_LOONGARCH_LSPW (1 << 14) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h index 6c22f616b8f1..5cd121275bac 100644 --- a/arch/loongarch/include/uapi/asm/sigcontext.h +++ b/arch/loongarch/include/uapi/asm/sigcontext.h @@ -9,7 +9,6 @@ #define _UAPI_ASM_SIGCONTEXT_H #include <linux/types.h> -#include <linux/posix_types.h> /* FP context was used */ #define SC_USED_FP (1 << 0) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 929a497c987e..382a09a7152c 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -9,6 +9,7 @@ #include <linux/init.h> #include <linux/acpi.h> +#include <linux/efi-bgrt.h> #include <linux/irq.h> #include <linux/irqdomain.h> #include <linux/memblock.h> @@ -57,48 +58,48 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) return ioremap_cache(phys, size); } -static int cpu_enumerated = 0; - #ifdef CONFIG_SMP -static int set_processor_mask(u32 id, u32 flags) +static int set_processor_mask(u32 id, u32 pass) { - int nr_cpus; - int cpu, cpuid = id; - - if (!cpu_enumerated) - nr_cpus = NR_CPUS; - else - nr_cpus = nr_cpu_ids; + int cpu = -1, cpuid = id; - if (num_processors >= nr_cpus) { + if (num_processors >= NR_CPUS) { pr_warn(PREFIX "nr_cpus limit of %i reached." - " processor 0x%x ignored.\n", nr_cpus, cpuid); + " processor 0x%x ignored.\n", NR_CPUS, cpuid); return -ENODEV; } + if (cpuid == loongson_sysconf.boot_cpu_id) cpu = 0; - else - cpu = find_first_zero_bit(cpumask_bits(cpu_present_mask), NR_CPUS); - - if (!cpu_enumerated) - set_cpu_possible(cpu, true); - if (flags & ACPI_MADT_ENABLED) { + switch (pass) { + case 1: /* Pass 1 handle enabled processors */ + if (cpu < 0) + cpu = find_first_zero_bit(cpumask_bits(cpu_present_mask), NR_CPUS); num_processors++; set_cpu_present(cpu, true); - __cpu_number_map[cpuid] = cpu; - __cpu_logical_map[cpu] = cpuid; - } else + break; + case 2: /* Pass 2 handle disabled processors */ + if (cpu < 0) + cpu = find_first_zero_bit(cpumask_bits(cpu_possible_mask), NR_CPUS); disabled_cpus++; + break; + default: + return cpu; + } + + set_cpu_possible(cpu, true); + __cpu_number_map[cpuid] = cpu; + __cpu_logical_map[cpu] = cpuid; return cpu; } #endif static int __init -acpi_parse_processor(union acpi_subtable_headers *header, const unsigned long end) +acpi_parse_p1_processor(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_core_pic *processor = NULL; @@ -109,13 +110,30 @@ acpi_parse_processor(union acpi_subtable_headers *header, const unsigned long en acpi_table_print_madt_entry(&header->common); #ifdef CONFIG_SMP acpi_core_pic[processor->core_id] = *processor; - set_processor_mask(processor->core_id, processor->flags); + if (processor->flags & ACPI_MADT_ENABLED) + set_processor_mask(processor->core_id, 1); #endif return 0; } static int __init +acpi_parse_p2_processor(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_core_pic *processor = NULL; + + processor = (struct acpi_madt_core_pic *)header; + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + +#ifdef CONFIG_SMP + if (!(processor->flags & ACPI_MADT_ENABLED)) + set_processor_mask(processor->core_id, 2); +#endif + + return 0; +} +static int __init acpi_parse_eio_master(union acpi_subtable_headers *header, const unsigned long end) { static int core = 0; @@ -142,12 +160,14 @@ static void __init acpi_process_madt(void) } #endif acpi_table_parse_madt(ACPI_MADT_TYPE_CORE_PIC, - acpi_parse_processor, MAX_CORE_PIC); + acpi_parse_p1_processor, MAX_CORE_PIC); + + acpi_table_parse_madt(ACPI_MADT_TYPE_CORE_PIC, + acpi_parse_p2_processor, MAX_CORE_PIC); acpi_table_parse_madt(ACPI_MADT_TYPE_EIO_PIC, acpi_parse_eio_master, MAX_IO_PICS); - cpu_enumerated = 1; loongson_sysconf.nr_cpus = num_processors; } @@ -212,6 +232,9 @@ void __init acpi_boot_table_init(void) /* Do not enable ACPI SPCR console by default */ acpi_parse_spcr(earlycon_acpi_spcr_enable, false); + if (IS_ENABLED(CONFIG_ACPI_BGRT)) + acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); + return; fdt_earlycon: @@ -306,6 +329,10 @@ static int __ref acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) int nid; nid = acpi_get_node(handle); + + if (nid != NUMA_NO_NODE) + nid = early_cpu_to_node(cpu); + if (nid != NUMA_NO_NODE) { set_cpuid_to_node(physid, nid); node_set(nid, numa_nodes_parsed); @@ -320,12 +347,14 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu { int cpu; - cpu = set_processor_mask(physid, ACPI_MADT_ENABLED); - if (cpu < 0) { + cpu = cpu_number_map(physid); + if (cpu < 0 || cpu >= nr_cpu_ids) { pr_info(PREFIX "Unable to map lapic to logical cpu number\n"); - return cpu; + return -ERANGE; } + num_processors++; + set_cpu_present(cpu, true); acpi_map_cpu2node(handle, cpu, physid); *pcpu = cpu; diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 14f0449f5452..cbce099037b2 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -91,12 +91,30 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) unsigned int config; unsigned long asid_mask; - c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR | - LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH; + c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR | LOONGARCH_CPU_VINT; elf_hwcap = HWCAP_LOONGARCH_CPUCFG; config = read_cpucfg(LOONGARCH_CPUCFG1); + + switch (config & CPUCFG1_ISA) { + case 0: + set_isa(c, LOONGARCH_CPU_ISA_LA32R); + break; + case 1: + set_isa(c, LOONGARCH_CPU_ISA_LA32S); + break; + case 2: + set_isa(c, LOONGARCH_CPU_ISA_LA64); + break; + default: + pr_warn("Warning: unknown ISA level\n"); + } + + if (config & CPUCFG1_PAGING) + c->options |= LOONGARCH_CPU_TLB; + if (config & CPUCFG1_IOCSR) + c->options |= LOONGARCH_CPU_IOCSR; if (config & CPUCFG1_UAL) { c->options |= LOONGARCH_CPU_UAL; elf_hwcap |= HWCAP_LOONGARCH_UAL; @@ -139,6 +157,10 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_PTW; elf_hwcap |= HWCAP_LOONGARCH_PTW; } + if (config & CPUCFG2_LSPW) { + c->options |= LOONGARCH_CPU_LSPW; + elf_hwcap |= HWCAP_LOONGARCH_LSPW; + } if (config & CPUCFG2_LVZP) { c->options |= LOONGARCH_CPU_LVZ; elf_hwcap |= HWCAP_LOONGARCH_LVZ; @@ -162,22 +184,6 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) if (config & CPUCFG6_PMP) c->options |= LOONGARCH_CPU_PMP; - config = iocsr_read32(LOONGARCH_IOCSR_FEATURES); - if (config & IOCSRF_CSRIPI) - c->options |= LOONGARCH_CPU_CSRIPI; - if (config & IOCSRF_EXTIOI) - c->options |= LOONGARCH_CPU_EXTIOI; - if (config & IOCSRF_FREQSCALE) - c->options |= LOONGARCH_CPU_SCALEFREQ; - if (config & IOCSRF_FLATMODE) - c->options |= LOONGARCH_CPU_FLATMODE; - if (config & IOCSRF_EIODECODE) - c->options |= LOONGARCH_CPU_EIODECODE; - if (config & IOCSRF_AVEC) - c->options |= LOONGARCH_CPU_AVECINT; - if (config & IOCSRF_VM) - c->options |= LOONGARCH_CPU_HYPERVISOR; - config = csr_read32(LOONGARCH_CSR_ASID); config = (config & CSR_ASID_BIT) >> CSR_ASID_BIT_SHIFT; asid_mask = GENMASK(config - 1, 0); @@ -210,6 +216,9 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) default: pr_warn("Warning: unknown TLB type\n"); } + + if (get_num_brps() + get_num_wrps()) + c->options |= LOONGARCH_CPU_WATCH; } #define MAX_NAME_LEN 32 @@ -220,52 +229,67 @@ static char cpu_full_name[MAX_NAME_LEN] = " - "; static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int cpu) { + uint32_t config; uint64_t *vendor = (void *)(&cpu_full_name[VENDOR_OFFSET]); uint64_t *cpuname = (void *)(&cpu_full_name[CPUNAME_OFFSET]); + const char *core_name = "Unknown"; - if (!__cpu_full_name[cpu]) - __cpu_full_name[cpu] = cpu_full_name; - - *vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR); - *cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME); - - switch (c->processor_id & PRID_SERIES_MASK) { - case PRID_SERIES_LA132: + switch (BIT(fls(c->isa_level) - 1)) { + case LOONGARCH_CPU_ISA_LA32R: + case LOONGARCH_CPU_ISA_LA32S: c->cputype = CPU_LOONGSON32; - set_isa(c, LOONGARCH_CPU_ISA_LA32S); __cpu_family[cpu] = "Loongson-32bit"; - pr_info("32-bit Loongson Processor probed (LA132 Core)\n"); break; - case PRID_SERIES_LA264: + case LOONGARCH_CPU_ISA_LA64: c->cputype = CPU_LOONGSON64; - set_isa(c, LOONGARCH_CPU_ISA_LA64); __cpu_family[cpu] = "Loongson-64bit"; - pr_info("64-bit Loongson Processor probed (LA264 Core)\n"); + break; + } + + switch (c->processor_id & PRID_SERIES_MASK) { + case PRID_SERIES_LA132: + core_name = "LA132"; + break; + case PRID_SERIES_LA264: + core_name = "LA264"; break; case PRID_SERIES_LA364: - c->cputype = CPU_LOONGSON64; - set_isa(c, LOONGARCH_CPU_ISA_LA64); - __cpu_family[cpu] = "Loongson-64bit"; - pr_info("64-bit Loongson Processor probed (LA364 Core)\n"); + core_name = "LA364"; break; case PRID_SERIES_LA464: - c->cputype = CPU_LOONGSON64; - set_isa(c, LOONGARCH_CPU_ISA_LA64); - __cpu_family[cpu] = "Loongson-64bit"; - pr_info("64-bit Loongson Processor probed (LA464 Core)\n"); + core_name = "LA464"; break; case PRID_SERIES_LA664: - c->cputype = CPU_LOONGSON64; - set_isa(c, LOONGARCH_CPU_ISA_LA64); - __cpu_family[cpu] = "Loongson-64bit"; - pr_info("64-bit Loongson Processor probed (LA664 Core)\n"); + core_name = "LA664"; break; - default: /* Default to 64 bit */ - c->cputype = CPU_LOONGSON64; - set_isa(c, LOONGARCH_CPU_ISA_LA64); - __cpu_family[cpu] = "Loongson-64bit"; - pr_info("64-bit Loongson Processor probed (Unknown Core)\n"); } + + pr_info("%s Processor probed (%s Core)\n", __cpu_family[cpu], core_name); + + if (!cpu_has_iocsr) + return; + + if (!__cpu_full_name[cpu]) + __cpu_full_name[cpu] = cpu_full_name; + + *vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR); + *cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME); + + config = iocsr_read32(LOONGARCH_IOCSR_FEATURES); + if (config & IOCSRF_CSRIPI) + c->options |= LOONGARCH_CPU_CSRIPI; + if (config & IOCSRF_EXTIOI) + c->options |= LOONGARCH_CPU_EXTIOI; + if (config & IOCSRF_FREQSCALE) + c->options |= LOONGARCH_CPU_SCALEFREQ; + if (config & IOCSRF_FLATMODE) + c->options |= LOONGARCH_CPU_FLATMODE; + if (config & IOCSRF_EIODECODE) + c->options |= LOONGARCH_CPU_EIODECODE; + if (config & IOCSRF_AVEC) + c->options |= LOONGARCH_CPU_AVECINT; + if (config & IOCSRF_VM) + c->options |= LOONGARCH_CPU_HYPERVISOR; } #ifdef CONFIG_64BIT diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index a5fc61f8b348..e5a39bbad078 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -51,11 +51,18 @@ static u64 paravt_steal_clock(int cpu) } #ifdef CONFIG_SMP +static struct smp_ops native_ops; + static void pv_send_ipi_single(int cpu, unsigned int action) { int min, old; irq_cpustat_t *info = &per_cpu(irq_stat, cpu); + if (unlikely(action == ACTION_BOOT_CPU)) { + native_ops.send_ipi_single(cpu, action); + return; + } + old = atomic_fetch_or(BIT(action), &info->message); if (old) return; @@ -75,6 +82,11 @@ static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action) if (cpumask_empty(mask)) return; + if (unlikely(action == ACTION_BOOT_CPU)) { + native_ops.send_ipi_mask(mask, action); + return; + } + action = BIT(action); for_each_cpu(i, mask) { info = &per_cpu(irq_stat, i); @@ -147,6 +159,8 @@ static void pv_init_ipi(void) { int r, swi; + /* Init native ipi irq for ACTION_BOOT_CPU */ + native_ops.init_ipi(); swi = get_percpu_irq(INT_SWI0); if (swi < 0) panic("SWI0 IRQ mapping failed\n"); @@ -193,6 +207,7 @@ int __init pv_ipi_init(void) return 0; #ifdef CONFIG_SMP + native_ops = mp_ops; mp_ops.init_ipi = pv_init_ipi; mp_ops.send_ipi_single = pv_send_ipi_single; mp_ops.send_ipi_mask = pv_send_ipi_mask; diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index 0d33cbc47e51..6ce46d92f1f1 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -31,6 +31,7 @@ int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v) static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; + unsigned int isa = cpu_data[n].isa_level; unsigned int version = cpu_data[n].processor_id & 0xff; unsigned int fp_version = cpu_data[n].fpu_vers; struct proc_cpuinfo_notifier_args proc_cpuinfo_notifier_args; @@ -64,9 +65,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) cpu_pabits + 1, cpu_vabits + 1); seq_printf(m, "ISA\t\t\t:"); - if (cpu_has_loongarch32) - seq_printf(m, " loongarch32"); - if (cpu_has_loongarch64) + if (isa & LOONGARCH_CPU_ISA_LA32R) + seq_printf(m, " loongarch32r"); + if (isa & LOONGARCH_CPU_ISA_LA32S) + seq_printf(m, " loongarch32s"); + if (isa & LOONGARCH_CPU_ISA_LA64) seq_printf(m, " loongarch64"); seq_printf(m, "\n"); @@ -81,6 +84,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has_complex) seq_printf(m, " complex"); if (cpu_has_crypto) seq_printf(m, " crypto"); if (cpu_has_ptw) seq_printf(m, " ptw"); + if (cpu_has_lspw) seq_printf(m, " lspw"); if (cpu_has_lvz) seq_printf(m, " lvz"); if (cpu_has_lbt_x86) seq_printf(m, " lbt_x86"); if (cpu_has_lbt_arm) seq_printf(m, " lbt_arm"); diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index f2ff8b5d591e..6e58f65455c7 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -293,13 +293,15 @@ unsigned long stack_top(void) { unsigned long top = TASK_SIZE & PAGE_MASK; - /* Space for the VDSO & data page */ - top -= PAGE_ALIGN(current->thread.vdso->size); - top -= VVAR_SIZE; - - /* Space to randomize the VDSO base */ - if (current->flags & PF_RANDOMIZE) - top -= VDSO_RANDOMIZE_SIZE; + if (current->thread.vdso) { + /* Space for the VDSO & data page */ + top -= PAGE_ALIGN(current->thread.vdso->size); + top -= VVAR_SIZE; + + /* Space to randomize the VDSO base */ + if (current->flags & PF_RANDOMIZE) + top -= VDSO_RANDOMIZE_SIZE; + } return top; } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 00e307203ddb..cbd3c09a93c1 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -55,6 +55,7 @@ #define SMBIOS_FREQHIGH_OFFSET 0x17 #define SMBIOS_FREQLOW_MASK 0xFF #define SMBIOS_CORE_PACKAGE_OFFSET 0x23 +#define SMBIOS_THREAD_PACKAGE_OFFSET 0x25 #define LOONGSON_EFI_ENABLE (1 << 3) unsigned long fw_arg0, fw_arg1, fw_arg2; @@ -125,7 +126,7 @@ static void __init parse_cpu_table(const struct dmi_header *dm) cpu_clock_freq = freq_temp * 1000000; loongson_sysconf.cpuname = (void *)dmi_string_parse(dm, dmi_data[16]); - loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_CORE_PACKAGE_OFFSET); + loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_THREAD_PACKAGE_OFFSET); pr_info("CpuClock = %llu\n", cpu_clock_freq); } diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 9afc2d8b3414..5d59e9ce2772 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -302,7 +302,7 @@ static void __init fdt_smp_setup(void) __cpu_number_map[cpuid] = cpu; __cpu_logical_map[cpu] = cpuid; - early_numa_add_cpu(cpu, 0); + early_numa_add_cpu(cpuid, 0); set_cpuid_to_node(cpuid, 0); } @@ -331,11 +331,11 @@ void __init loongson_prepare_cpus(unsigned int max_cpus) int i = 0; parse_acpi_topology(); + cpu_data[0].global_id = cpu_logical_map(0); for (i = 0; i < loongson_sysconf.nr_cpus; i++) { set_cpu_present(i, true); csr_mail_send(0, __cpu_logical_map[i], 0); - cpu_data[i].global_id = __cpu_logical_map[i]; } per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; @@ -380,6 +380,7 @@ void loongson_init_secondary(void) cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; cpu_data[cpu].core = pptt_enabled ? cpu_data[cpu].core : cpu_logical_map(cpu) % loongson_sysconf.cores_per_package; + cpu_data[cpu].global_id = cpu_logical_map(cpu); } void loongson_smp_finish(void) diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index ba5d0930a74f..168bd97540f8 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -79,7 +79,3 @@ void noinstr __no_stack_protector do_syscall(struct pt_regs *regs) syscall_exit_to_user_mode(regs); } - -#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET -STACK_FRAME_NON_STANDARD(do_syscall); -#endif diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index f9f4eb00c92e..c57b4134f3e8 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -555,6 +555,9 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) #else unsigned int *pc; + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_enable(); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr); /* @@ -579,6 +582,8 @@ sigbus: die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); out: + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_disable(); #endif irqentry_exit(regs, state); } diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c index f6fcc52aefae..2c0d852ca536 100644 --- a/arch/loongarch/kernel/vdso.c +++ b/arch/loongarch/kernel/vdso.c @@ -34,7 +34,6 @@ static union { struct loongarch_vdso_data vdata; } loongarch_vdso_data __page_aligned_data; -static struct page *vdso_pages[] = { NULL }; struct vdso_data *vdso_data = generic_vdso_data.data; struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata; struct vdso_rng_data *vdso_rng_data = &loongarch_vdso_data.vdata.rng_data; @@ -85,10 +84,8 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct loongarch_vdso_info vdso_info = { .vdso = vdso_start, - .size = PAGE_SIZE, .code_mapping = { .name = "[vdso]", - .pages = vdso_pages, .mremap = vdso_mremap, }, .data_mapping = { @@ -103,11 +100,14 @@ static int __init init_vdso(void) unsigned long i, cpu, pfn; BUG_ON(!PAGE_ALIGNED(vdso_info.vdso)); - BUG_ON(!PAGE_ALIGNED(vdso_info.size)); for_each_possible_cpu(cpu) vdso_pdata[cpu].node = cpu_to_node(cpu); + vdso_info.size = PAGE_ALIGN(vdso_end - vdso_start); + vdso_info.code_mapping.pages = + kcalloc(vdso_info.size / PAGE_SIZE, sizeof(struct page *), GFP_KERNEL); + pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso)); for (i = 0; i < vdso_info.size / PAGE_SIZE; i++) vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i); diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 844736b99d38..27e9b94c0a0b 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -261,7 +261,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -ENOIOCTLCMD; } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { unsigned long env, gcfg = 0; @@ -300,7 +300,7 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { write_csr_gcfg(0); write_csr_gstat(0); diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 74a4b5c272d6..32dc213374be 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -161,10 +161,11 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu) if (kvm_vcpu_is_blocking(vcpu)) { /* - * HRTIMER_MODE_PINNED is suggested since vcpu may run in - * the same physical cpu in next time + * HRTIMER_MODE_PINNED_HARD is suggested since vcpu may run in + * the same physical cpu in next time, and the timer should run + * in hardirq context even in the PREEMPT_RT case. */ - hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); + hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED_HARD); } } diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 0697b1064251..174734a23d0a 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1457,7 +1457,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.vpid = 0; vcpu->arch.flush_gpa = INVALID_GPA; - hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); vcpu->arch.swtimer.function = kvm_swtimer_wakeup; vcpu->arch.handle_exit = kvm_handle_exit; diff --git a/arch/loongarch/mm/Makefile b/arch/loongarch/mm/Makefile index e4d1e581dbae..278be2c8fc36 100644 --- a/arch/loongarch/mm/Makefile +++ b/arch/loongarch/mm/Makefile @@ -4,7 +4,8 @@ # obj-y += init.o cache.o tlb.o tlbex.o extable.o \ - fault.o ioremap.o maccess.o mmap.o pgtable.o page.o + fault.o ioremap.o maccess.o mmap.o pgtable.o \ + page.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index 97b40defde06..deefd9617d00 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -31,11 +31,52 @@ int show_unhandled_signals = 1; +static int __kprobes spurious_fault(unsigned long write, unsigned long address) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (!(address & __UA_LIMIT)) + return 0; + + pgd = pgd_offset_k(address); + if (!pgd_present(pgdp_get(pgd))) + return 0; + + p4d = p4d_offset(pgd, address); + if (!p4d_present(p4dp_get(p4d))) + return 0; + + pud = pud_offset(p4d, address); + if (!pud_present(pudp_get(pud))) + return 0; + + pmd = pmd_offset(pud, address); + if (!pmd_present(pmdp_get(pmd))) + return 0; + + if (pmd_leaf(*pmd)) { + return write ? pmd_write(pmdp_get(pmd)) : 1; + } else { + pte = pte_offset_kernel(pmd, address); + if (!pte_present(ptep_get(pte))) + return 0; + + return write ? pte_write(ptep_get(pte)) : 1; + } +} + static void __kprobes no_context(struct pt_regs *regs, unsigned long write, unsigned long address) { const int field = sizeof(unsigned long) * 2; + if (spurious_fault(write, address)) + return; + /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 8a87a482c8f4..188b52bbb254 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -201,7 +201,9 @@ pte_t * __init populate_kernel_pte(unsigned long addr) pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pte) panic("%s: Failed to allocate memory\n", __func__); + pmd_populate_kernel(&init_mm, pmd, pte); + kernel_pte_init(pte); } return pte_offset_kernel(pmd, addr); diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index 427d6b1aec09..d2681272d8f0 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -13,6 +13,13 @@ static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); +#ifdef __PAGETABLE_P4D_FOLDED +#define __pgd_none(early, pgd) (0) +#else +#define __pgd_none(early, pgd) (early ? (pgd_val(pgd) == 0) : \ +(__pa(pgd_val(pgd)) == (unsigned long)__pa(kasan_early_shadow_p4d))) +#endif + #ifdef __PAGETABLE_PUD_FOLDED #define __p4d_none(early, p4d) (0) #else @@ -55,6 +62,9 @@ void *kasan_mem_to_shadow(const void *addr) case XKPRANGE_UC_SEG: offset = XKPRANGE_UC_SHADOW_OFFSET; break; + case XKPRANGE_WC_SEG: + offset = XKPRANGE_WC_SHADOW_OFFSET; + break; case XKVRANGE_VC_SEG: offset = XKVRANGE_VC_SHADOW_OFFSET; break; @@ -79,6 +89,8 @@ const void *kasan_shadow_to_mem(const void *shadow_addr) if (addr >= XKVRANGE_VC_SHADOW_OFFSET) return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); + else if (addr >= XKPRANGE_WC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_WC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_WC_START); else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) @@ -142,6 +154,19 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, return pud_offset(p4dp, addr); } +static p4d_t *__init kasan_p4d_offset(pgd_t *pgdp, unsigned long addr, int node, bool early) +{ + if (__pgd_none(early, pgdp_get(pgdp))) { + phys_addr_t p4d_phys = early ? + __pa_symbol(kasan_early_shadow_p4d) : kasan_alloc_zeroed_page(node); + if (!early) + memcpy(__va(p4d_phys), kasan_early_shadow_p4d, sizeof(kasan_early_shadow_p4d)); + pgd_populate(&init_mm, pgdp, (p4d_t *)__va(p4d_phys)); + } + + return p4d_offset(pgdp, addr); +} + static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, unsigned long end, int node, bool early) { @@ -178,19 +203,19 @@ static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr, do { next = pud_addr_end(addr, end); kasan_pmd_populate(pudp, addr, next, node, early); - } while (pudp++, addr = next, addr != end); + } while (pudp++, addr = next, addr != end && __pud_none(early, READ_ONCE(*pudp))); } static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr, unsigned long end, int node, bool early) { unsigned long next; - p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t *p4dp = kasan_p4d_offset(pgdp, addr, node, early); do { next = p4d_addr_end(addr, end); kasan_pud_populate(p4dp, addr, next, node, early); - } while (p4dp++, addr = next, addr != end); + } while (p4dp++, addr = next, addr != end && __p4d_none(early, READ_ONCE(*p4dp))); } static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, @@ -218,7 +243,7 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end, asmlinkage void __init kasan_early_init(void) { BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END + 1, PGDIR_SIZE)); } static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval) @@ -233,7 +258,7 @@ static void __init clear_pgds(unsigned long start, unsigned long end) * swapper_pg_dir. pgd_clear() can't be used * here because it's nop on 2,3-level pagetable setups */ - for (; start < end; start += PGDIR_SIZE) + for (; start < end; start = pgd_addr_end(start, end)) kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0)); } @@ -243,6 +268,17 @@ void __init kasan_init(void) phys_addr_t pa_start, pa_end; /* + * If PGDIR_SIZE is too large for cpu_vabits, KASAN_SHADOW_END will + * overflow UINTPTR_MAX and then looks like a user space address. + * For example, PGDIR_SIZE of CONFIG_4KB_4LEVEL is 2^39, which is too + * large for Loongson-2K series whose cpu_vabits = 39. + */ + if (KASAN_SHADOW_END < vm_map_base) { + pr_warn("PGDIR_SIZE too large for cpu_vabits, KernelAddressSanitizer disabled.\n"); + return; + } + + /* * PGD was populated as invalid_pmd_table or invalid_pud_table * in pagetable_init() which depends on how many levels of page * table you are using, but we had to clean the gpd of kasan diff --git a/arch/loongarch/mm/pageattr.c b/arch/loongarch/mm/pageattr.c new file mode 100644 index 000000000000..ffd8d76021d4 --- /dev/null +++ b/arch/loongarch/mm/pageattr.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include <linux/pagewalk.h> +#include <linux/pgtable.h> +#include <asm/set_memory.h> +#include <asm/tlbflush.h> + +struct pageattr_masks { + pgprot_t set_mask; + pgprot_t clear_mask; +}; + +static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk) +{ + unsigned long new_val = val; + struct pageattr_masks *masks = walk->private; + + new_val &= ~(pgprot_val(masks->clear_mask)); + new_val |= (pgprot_val(masks->set_mask)); + + return new_val; +} + +static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pgd_t val = pgdp_get(pgd); + + if (pgd_leaf(val)) { + val = __pgd(set_pageattr_masks(pgd_val(val), walk)); + set_pgd(pgd, val); + } + + return 0; +} + +static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + p4d_t val = p4dp_get(p4d); + + if (p4d_leaf(val)) { + val = __p4d(set_pageattr_masks(p4d_val(val), walk)); + set_p4d(p4d, val); + } + + return 0; +} + +static int pageattr_pud_entry(pud_t *pud, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pud_t val = pudp_get(pud); + + if (pud_leaf(val)) { + val = __pud(set_pageattr_masks(pud_val(val), walk)); + set_pud(pud, val); + } + + return 0; +} + +static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pmd_t val = pmdp_get(pmd); + + if (pmd_leaf(val)) { + val = __pmd(set_pageattr_masks(pmd_val(val), walk)); + set_pmd(pmd, val); + } + + return 0; +} + +static int pageattr_pte_entry(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t val = ptep_get(pte); + + val = __pte(set_pageattr_masks(pte_val(val), walk)); + set_pte(pte, val); + + return 0; +} + +static int pageattr_pte_hole(unsigned long addr, unsigned long next, + int depth, struct mm_walk *walk) +{ + return 0; +} + +static const struct mm_walk_ops pageattr_ops = { + .pgd_entry = pageattr_pgd_entry, + .p4d_entry = pageattr_p4d_entry, + .pud_entry = pageattr_pud_entry, + .pmd_entry = pageattr_pmd_entry, + .pte_entry = pageattr_pte_entry, + .pte_hole = pageattr_pte_hole, + .walk_lock = PGWALK_RDLOCK, +}; + +static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) +{ + int ret; + unsigned long start = addr; + unsigned long end = start + PAGE_SIZE * numpages; + struct pageattr_masks masks = { + .set_mask = set_mask, + .clear_mask = clear_mask + }; + + if (!numpages) + return 0; + + mmap_write_lock(&init_mm); + ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks); + mmap_write_unlock(&init_mm); + + flush_tlb_kernel_range(start, end); + + return ret; +} + +int set_memory_x(unsigned long addr, int numpages) +{ + if (addr < vm_map_base) + return 0; + + return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_NO_EXEC)); +} + +int set_memory_nx(unsigned long addr, int numpages) +{ + if (addr < vm_map_base) + return 0; + + return __set_memory(addr, numpages, __pgprot(_PAGE_NO_EXEC), __pgprot(0)); +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + if (addr < vm_map_base) + return 0; + + return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_WRITE | _PAGE_DIRTY)); +} + +int set_memory_rw(unsigned long addr, int numpages) +{ + if (addr < vm_map_base) + return 0; + + return __set_memory(addr, numpages, __pgprot(_PAGE_WRITE | _PAGE_DIRTY), __pgprot(0)); +} + +bool kernel_page_present(struct page *page) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long addr = (unsigned long)page_address(page); + + if (addr < vm_map_base) + return true; + + pgd = pgd_offset_k(addr); + if (pgd_none(pgdp_get(pgd))) + return false; + if (pgd_leaf(pgdp_get(pgd))) + return true; + + p4d = p4d_offset(pgd, addr); + if (p4d_none(p4dp_get(p4d))) + return false; + if (p4d_leaf(p4dp_get(p4d))) + return true; + + pud = pud_offset(p4d, addr); + if (pud_none(pudp_get(pud))) + return false; + if (pud_leaf(pudp_get(pud))) + return true; + + pmd = pmd_offset(pud, addr); + if (pmd_none(pmdp_get(pmd))) + return false; + if (pmd_leaf(pmdp_get(pmd))) + return true; + + pte = pte_offset_kernel(pmd, addr); + return pte_present(ptep_get(pte)); +} + +int set_direct_map_default_noflush(struct page *page) +{ + unsigned long addr = (unsigned long)page_address(page); + + if (addr < vm_map_base) + return 0; + + return __set_memory(addr, 1, PAGE_KERNEL, __pgprot(0)); +} + +int set_direct_map_invalid_noflush(struct page *page) +{ + unsigned long addr = (unsigned long)page_address(page); + + if (addr < vm_map_base) + return 0; + + return __set_memory(addr, 1, __pgprot(0), __pgprot(_PAGE_PRESENT | _PAGE_VALID)); +} diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index eb6a29b491a7..3fa69b23ff84 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -116,6 +116,26 @@ void pud_init(void *addr) EXPORT_SYMBOL_GPL(pud_init); #endif +void kernel_pte_init(void *addr) +{ + unsigned long *p, *end; + + p = (unsigned long *)addr; + end = p + PTRS_PER_PTE; + + do { + p[0] = _PAGE_GLOBAL; + p[1] = _PAGE_GLOBAL; + p[2] = _PAGE_GLOBAL; + p[3] = _PAGE_GLOBAL; + p[4] = _PAGE_GLOBAL; + p += 8; + p[-3] = _PAGE_GLOBAL; + p[-2] = _PAGE_GLOBAL; + p[-1] = _PAGE_GLOBAL; + } while (p != end); +} + pmd_t mk_pmd(struct page *page, pgprot_t prot) { pmd_t pmd; diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index 365f7de771cb..1da4dc46df43 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -225,6 +225,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) if (bus) { memcpy(bus->sysdata, info->cfg, sizeof(struct pci_config_window)); kfree(info); + kfree(root_ops); } else { struct pci_bus *child; diff --git a/arch/loongarch/vdso/vgetrandom-chacha.S b/arch/loongarch/vdso/vgetrandom-chacha.S index 7e86a50f6e85..c2733e6c3a8d 100644 --- a/arch/loongarch/vdso/vgetrandom-chacha.S +++ b/arch/loongarch/vdso/vgetrandom-chacha.S @@ -9,23 +9,11 @@ .text -/* Salsa20 quarter-round */ -.macro QR a b c d - add.w \a, \a, \b - xor \d, \d, \a - rotri.w \d, \d, 16 - - add.w \c, \c, \d - xor \b, \b, \c - rotri.w \b, \b, 20 - - add.w \a, \a, \b - xor \d, \d, \a - rotri.w \d, \d, 24 - - add.w \c, \c, \d - xor \b, \b, \c - rotri.w \b, \b, 25 +.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3 + \op \d0, \d0, \s0 + \op \d1, \d1, \s1 + \op \d2, \d2, \s2 + \op \d3, \d3, \s3 .endm /* @@ -74,6 +62,23 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) /* Reuse i as copy3 */ #define copy3 i +/* Packs to be used with OP_4REG */ +#define line0 state0, state1, state2, state3 +#define line1 state4, state5, state6, state7 +#define line2 state8, state9, state10, state11 +#define line3 state12, state13, state14, state15 + +#define line1_perm state5, state6, state7, state4 +#define line2_perm state10, state11, state8, state9 +#define line3_perm state15, state12, state13, state14 + +#define copy copy0, copy1, copy2, copy3 + +#define _16 16, 16, 16, 16 +#define _20 20, 20, 20, 20 +#define _24 24, 24, 24, 24 +#define _25 25, 25, 25, 25 + /* * The ABI requires s0-s9 saved, and sp aligned to 16-byte. * This does not violate the stack-less requirement: no sensitive data @@ -126,16 +131,38 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) li.w i, 10 .Lpermute: /* odd round */ - QR state0, state4, state8, state12 - QR state1, state5, state9, state13 - QR state2, state6, state10, state14 - QR state3, state7, state11, state15 + OP_4REG add.w line0, line1 + OP_4REG xor line3, line0 + OP_4REG rotri.w line3, _16 + + OP_4REG add.w line2, line3 + OP_4REG xor line1, line2 + OP_4REG rotri.w line1, _20 + + OP_4REG add.w line0, line1 + OP_4REG xor line3, line0 + OP_4REG rotri.w line3, _24 + + OP_4REG add.w line2, line3 + OP_4REG xor line1, line2 + OP_4REG rotri.w line1, _25 /* even round */ - QR state0, state5, state10, state15 - QR state1, state6, state11, state12 - QR state2, state7, state8, state13 - QR state3, state4, state9, state14 + OP_4REG add.w line0, line1_perm + OP_4REG xor line3_perm, line0 + OP_4REG rotri.w line3_perm, _16 + + OP_4REG add.w line2_perm, line3_perm + OP_4REG xor line1_perm, line2_perm + OP_4REG rotri.w line1_perm, _20 + + OP_4REG add.w line0, line1_perm + OP_4REG xor line3_perm, line0 + OP_4REG rotri.w line3_perm, _24 + + OP_4REG add.w line2_perm, line3_perm + OP_4REG xor line1_perm, line2_perm + OP_4REG rotri.w line1_perm, _25 addi.w i, i, -1 bnez i, .Lpermute @@ -147,10 +174,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) li.w copy3, 0x6b206574 /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ - add.w state0, state0, copy0 - add.w state1, state1, copy1 - add.w state2, state2, copy2 - add.w state3, state3, copy3 + OP_4REG add.w line0, copy st.w state0, output, 0 st.w state1, output, 4 st.w state2, output, 8 @@ -165,10 +189,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) ld.w state3, key, 12 /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */ - add.w state4, state4, state0 - add.w state5, state5, state1 - add.w state6, state6, state2 - add.w state7, state7, state3 + OP_4REG add.w line1, line0 st.w state4, output, 16 st.w state5, output, 20 st.w state6, output, 24 @@ -181,10 +202,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) ld.w state3, key, 28 /* output[8,9,10,11] = state[0,1,2,3] + state[8,9,10,11] */ - add.w state8, state8, state0 - add.w state9, state9, state1 - add.w state10, state10, state2 - add.w state11, state11, state3 + OP_4REG add.w line2, line0 st.w state8, output, 32 st.w state9, output, 36 st.w state10, output, 40 diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h index 79a749f4ad04..edff4306fa70 100644 --- a/arch/microblaze/include/asm/flat.h +++ b/arch/microblaze/include/asm/flat.h @@ -8,7 +8,7 @@ #ifndef _ASM_MICROBLAZE_FLAT_H #define _ASM_MICROBLAZE_FLAT_H -#include <asm/unaligned.h> +#include <linux/unaligned.h> /* * Microblaze works a little differently from other arches, because diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index adb6d5b0e6eb..90021c6a8cab 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -16,7 +16,7 @@ #include <linux/libfdt.h> #include <asm/addrspace.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm-generic/vmlinux.lds.h> #include "decompress.h" diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c index 2a59b85f88aa..a7a1d43a1b2c 100644 --- a/arch/mips/crypto/crc32-mips.c +++ b/arch/mips/crypto/crc32-mips.c @@ -14,7 +14,7 @@ #include <linux/module.h> #include <linux/string.h> #include <asm/mipsregs.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/internal/hash.h> diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c index 867728ee535a..c03ad0bbe69c 100644 --- a/arch/mips/crypto/poly1305-glue.c +++ b/arch/mips/crypto/poly1305-glue.c @@ -5,7 +5,7 @@ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/algapi.h> #include <crypto/internal/hash.h> #include <crypto/internal/poly1305.h> diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 6743a57c1ab4..f7222eb594ea 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -728,8 +728,8 @@ struct kvm_mips_callbacks { int (*handle_fpe)(struct kvm_vcpu *vcpu); int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); int (*handle_guest_exit)(struct kvm_vcpu *vcpu); - int (*hardware_enable)(void); - void (*hardware_disable)(void); + int (*enable_virtualization_cpu)(void); + void (*disable_virtualization_cpu)(void); int (*check_extension)(struct kvm *kvm, long ext); int (*vcpu_init)(struct kvm_vcpu *vcpu); void (*vcpu_uninit)(struct kvm_vcpu *vcpu); diff --git a/arch/mips/kernel/cmpxchg.c b/arch/mips/kernel/cmpxchg.c index e974a4954df8..c371def2302d 100644 --- a/arch/mips/kernel/cmpxchg.c +++ b/arch/mips/kernel/cmpxchg.c @@ -102,3 +102,4 @@ unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, return old; } } +EXPORT_SYMBOL(__cmpxchg_small); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index b5de770b092e..60b43ea85c12 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -125,14 +125,14 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return 1; } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { - return kvm_mips_callbacks->hardware_enable(); + return kvm_mips_callbacks->enable_virtualization_cpu(); } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { - kvm_mips_callbacks->hardware_disable(); + kvm_mips_callbacks->disable_virtualization_cpu(); } int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 99d5a71e4300..ccab4d76b126 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -2869,7 +2869,7 @@ static unsigned int kvm_vz_resize_guest_vtlb(unsigned int size) return ret + 1; } -static int kvm_vz_hardware_enable(void) +static int kvm_vz_enable_virtualization_cpu(void) { unsigned int mmu_size, guest_mmu_size, ftlb_size; u64 guest_cvmctl, cvmvmconfig; @@ -2983,7 +2983,7 @@ static int kvm_vz_hardware_enable(void) return 0; } -static void kvm_vz_hardware_disable(void) +static void kvm_vz_disable_virtualization_cpu(void) { u64 cvmvmconfig; unsigned int mmu_size; @@ -3280,8 +3280,8 @@ static struct kvm_mips_callbacks kvm_vz_callbacks = { .handle_msa_disabled = kvm_trap_vz_handle_msa_disabled, .handle_guest_exit = kvm_trap_vz_handle_guest_exit, - .hardware_enable = kvm_vz_hardware_enable, - .hardware_disable = kvm_vz_hardware_disable, + .enable_virtualization_cpu = kvm_vz_enable_virtualization_cpu, + .disable_virtualization_cpu = kvm_vz_disable_virtualization_cpu, .check_extension = kvm_vz_check_extension, .vcpu_init = kvm_vz_vcpu_init, .vcpu_uninit = kvm_vz_vcpu_uninit, diff --git a/arch/nios2/kernel/misaligned.c b/arch/nios2/kernel/misaligned.c index 23e0544e117c..2f2862eab3c6 100644 --- a/arch/nios2/kernel/misaligned.c +++ b/arch/nios2/kernel/misaligned.c @@ -23,7 +23,7 @@ #include <linux/seq_file.h> #include <asm/traps.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> /* instructions we emulate */ #define INST_LDHU 0x0b diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c index d389359e22ac..9c83bd06ef15 100644 --- a/arch/parisc/boot/compressed/misc.c +++ b/arch/parisc/boot/compressed/misc.c @@ -6,7 +6,7 @@ #include <linux/uaccess.h> #include <linux/elf.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/page.h> #include "sizes.h" diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h index 89b6beeda0b8..663f587dc789 100644 --- a/arch/parisc/include/asm/mman.h +++ b/arch/parisc/include/asm/mman.h @@ -2,6 +2,7 @@ #ifndef __ASM_MMAN_H__ #define __ASM_MMAN_H__ +#include <linux/fs.h> #include <uapi/asm/mman.h> /* PARISC cannot allow mdwe as it needs writable stacks */ @@ -11,7 +12,7 @@ static inline bool arch_memory_deny_write_exec_supported(void) } #define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported -static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) +static inline unsigned long arch_calc_vm_flag_bits(struct file *file, unsigned long flags) { /* * The stack on parisc grows upwards, so if userspace requests memory @@ -23,6 +24,6 @@ static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) return 0; } -#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) +#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) #endif /* __ASM_MMAN_H__ */ diff --git a/arch/parisc/include/asm/unaligned.h b/arch/parisc/include/asm/unaligned.h deleted file mode 100644 index c0621295100d..000000000000 --- a/arch/parisc/include/asm/unaligned.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_PARISC_UNALIGNED_H -#define _ASM_PARISC_UNALIGNED_H - -#include <asm-generic/unaligned.h> - -struct pt_regs; -void handle_unaligned(struct pt_regs *regs); -int check_unaligned(struct pt_regs *regs); - -#endif /* _ASM_PARISC_UNALIGNED_H */ diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index b0f0816879df..5e8e37a722ef 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -466,7 +466,6 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } static const struct file_operations perf_fops = { - .llseek = no_llseek, .read = perf_read, .write = perf_write, .unlocked_ioctl = perf_ioctl, diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 294b0e026c9a..b9b3d527bc90 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -36,7 +36,7 @@ #include <asm/io.h> #include <asm/irq.h> #include <asm/traps.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/atomic.h> #include <asm/smp.h> #include <asm/pdc.h> @@ -47,6 +47,8 @@ #include <linux/kgdb.h> #include <linux/kprobes.h> +#include "unaligned.h" + #if defined(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK) #include <asm/spinlock.h> #endif diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 3e79e40e361d..f4626943633a 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -12,9 +12,10 @@ #include <linux/ratelimit.h> #include <linux/uaccess.h> #include <linux/sysctl.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/hardirq.h> #include <asm/traps.h> +#include "unaligned.h" /* #define DEBUG_UNALIGNED 1 */ diff --git a/arch/parisc/kernel/unaligned.h b/arch/parisc/kernel/unaligned.h new file mode 100644 index 000000000000..c1aa4b12e284 --- /dev/null +++ b/arch/parisc/kernel/unaligned.h @@ -0,0 +1,3 @@ +struct pt_regs; +void handle_unaligned(struct pt_regs *regs); +int check_unaligned(struct pt_regs *regs); diff --git a/arch/powerpc/crypto/aes-gcm-p10-glue.c b/arch/powerpc/crypto/aes-gcm-p10-glue.c index f62ee54076c0..f66ad56e765f 100644 --- a/arch/powerpc/crypto/aes-gcm-p10-glue.c +++ b/arch/powerpc/crypto/aes-gcm-p10-glue.c @@ -5,7 +5,7 @@ * Copyright 2022- IBM Inc. All rights reserved */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/simd.h> #include <asm/switch_to.h> #include <crypto/aes.h> diff --git a/arch/powerpc/crypto/poly1305-p10-glue.c b/arch/powerpc/crypto/poly1305-p10-glue.c index 95dd708573ee..369686e9370b 100644 --- a/arch/powerpc/crypto/poly1305-p10-glue.c +++ b/arch/powerpc/crypto/poly1305-p10-glue.c @@ -14,7 +14,7 @@ #include <crypto/internal/poly1305.h> #include <crypto/internal/simd.h> #include <linux/cpufeature.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/simd.h> #include <asm/switch_to.h> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 0e59b8fd9bc6..83fe99861eb1 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1574,6 +1574,104 @@ static int proc_eeh_show(struct seq_file *m, void *v) } #endif /* CONFIG_PROC_FS */ +static int eeh_break_device(struct pci_dev *pdev) +{ + struct resource *bar = NULL; + void __iomem *mapped; + u16 old, bit; + int i, pos; + + /* Do we have an MMIO BAR to disable? */ + for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { + struct resource *r = &pdev->resource[i]; + + if (!r->flags || !r->start) + continue; + if (r->flags & IORESOURCE_IO) + continue; + if (r->flags & IORESOURCE_UNSET) + continue; + + bar = r; + break; + } + + if (!bar) { + pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n"); + return -ENXIO; + } + + pci_err(pdev, "Going to break: %pR\n", bar); + + if (pdev->is_virtfn) { +#ifndef CONFIG_PCI_IOV + return -ENXIO; +#else + /* + * VFs don't have a per-function COMMAND register, so the best + * we can do is clear the Memory Space Enable bit in the PF's + * SRIOV control reg. + * + * Unfortunately, this requires that we have a PF (i.e doesn't + * work for a passed-through VF) and it has the potential side + * effect of also causing an EEH on every other VF under the + * PF. Oh well. + */ + pdev = pdev->physfn; + if (!pdev) + return -ENXIO; /* passed through VFs have no PF */ + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + pos += PCI_SRIOV_CTRL; + bit = PCI_SRIOV_CTRL_MSE; +#endif /* !CONFIG_PCI_IOV */ + } else { + bit = PCI_COMMAND_MEMORY; + pos = PCI_COMMAND; + } + + /* + * Process here is: + * + * 1. Disable Memory space. + * + * 2. Perform an MMIO to the device. This should result in an error + * (CA / UR) being raised by the device which results in an EEH + * PE freeze. Using the in_8() accessor skips the eeh detection hook + * so the freeze hook so the EEH Detection machinery won't be + * triggered here. This is to match the usual behaviour of EEH + * where the HW will asynchronously freeze a PE and it's up to + * the kernel to notice and deal with it. + * + * 3. Turn Memory space back on. This is more important for VFs + * since recovery will probably fail if we don't. For normal + * the COMMAND register is reset as a part of re-initialising + * the device. + * + * Breaking stuff is the point so who cares if it's racy ;) + */ + pci_read_config_word(pdev, pos, &old); + + mapped = ioremap(bar->start, PAGE_SIZE); + if (!mapped) { + pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar); + return -ENXIO; + } + + pci_write_config_word(pdev, pos, old & ~bit); + in_8(mapped); + pci_write_config_word(pdev, pos, old); + + iounmap(mapped); + + return 0; +} + +int eeh_pe_inject_mmio_error(struct pci_dev *pdev) +{ + return eeh_break_device(pdev); +} + #ifdef CONFIG_DEBUG_FS @@ -1725,99 +1823,6 @@ static const struct file_operations eeh_dev_check_fops = { .read = eeh_debugfs_dev_usage, }; -static int eeh_debugfs_break_device(struct pci_dev *pdev) -{ - struct resource *bar = NULL; - void __iomem *mapped; - u16 old, bit; - int i, pos; - - /* Do we have an MMIO BAR to disable? */ - for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { - struct resource *r = &pdev->resource[i]; - - if (!r->flags || !r->start) - continue; - if (r->flags & IORESOURCE_IO) - continue; - if (r->flags & IORESOURCE_UNSET) - continue; - - bar = r; - break; - } - - if (!bar) { - pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n"); - return -ENXIO; - } - - pci_err(pdev, "Going to break: %pR\n", bar); - - if (pdev->is_virtfn) { -#ifndef CONFIG_PCI_IOV - return -ENXIO; -#else - /* - * VFs don't have a per-function COMMAND register, so the best - * we can do is clear the Memory Space Enable bit in the PF's - * SRIOV control reg. - * - * Unfortunately, this requires that we have a PF (i.e doesn't - * work for a passed-through VF) and it has the potential side - * effect of also causing an EEH on every other VF under the - * PF. Oh well. - */ - pdev = pdev->physfn; - if (!pdev) - return -ENXIO; /* passed through VFs have no PF */ - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); - pos += PCI_SRIOV_CTRL; - bit = PCI_SRIOV_CTRL_MSE; -#endif /* !CONFIG_PCI_IOV */ - } else { - bit = PCI_COMMAND_MEMORY; - pos = PCI_COMMAND; - } - - /* - * Process here is: - * - * 1. Disable Memory space. - * - * 2. Perform an MMIO to the device. This should result in an error - * (CA / UR) being raised by the device which results in an EEH - * PE freeze. Using the in_8() accessor skips the eeh detection hook - * so the freeze hook so the EEH Detection machinery won't be - * triggered here. This is to match the usual behaviour of EEH - * where the HW will asynchronously freeze a PE and it's up to - * the kernel to notice and deal with it. - * - * 3. Turn Memory space back on. This is more important for VFs - * since recovery will probably fail if we don't. For normal - * the COMMAND register is reset as a part of re-initialising - * the device. - * - * Breaking stuff is the point so who cares if it's racy ;) - */ - pci_read_config_word(pdev, pos, &old); - - mapped = ioremap(bar->start, PAGE_SIZE); - if (!mapped) { - pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar); - return -ENXIO; - } - - pci_write_config_word(pdev, pos, old & ~bit); - in_8(mapped); - pci_write_config_word(pdev, pos, old); - - iounmap(mapped); - - return 0; -} - static ssize_t eeh_dev_break_write(struct file *filp, const char __user *user_buf, size_t count, loff_t *ppos) @@ -1829,7 +1834,7 @@ static ssize_t eeh_dev_break_write(struct file *filp, if (IS_ERR(pdev)) return PTR_ERR(pdev); - ret = eeh_debugfs_break_device(pdev); + ret = eeh_break_device(pdev); pci_dev_put(pdev); if (ret < 0) @@ -1844,11 +1849,6 @@ static const struct file_operations eeh_dev_break_fops = { .read = eeh_debugfs_dev_usage, }; -int eeh_pe_inject_mmio_error(struct pci_dev *pdev) -{ - return eeh_debugfs_break_device(pdev); -} - static ssize_t eeh_dev_can_recover(struct file *filp, const char __user *user_buf, size_t count, loff_t *ppos) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 811a7130505c..56c5ebe21b99 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -494,6 +494,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ bctr /* jump into table */ 152: mfdar r11 + mtdar r10 mtctr r11 /* restore ctr reg from DAR */ mfspr r11, SPRN_SPRG_THREAD stw r10, DAR(r11) diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 56fb1633529a..31ca5a547004 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -22,7 +22,7 @@ endif ifneq ($(c-getrandom-y),) CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y) - CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) $(call cc-option, -ffixed-r30) + CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) endif # Build rules diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ba0492f9de65..ad8dc4ccdaab 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4898,6 +4898,18 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, BOOK3S_INTERRUPT_EXTERNAL, 0); else lpcr |= LPCR_MER; + } else { + /* + * L1's copy of L2's LPCR (vcpu->arch.vcore->lpcr) can get its MER bit + * unexpectedly set - for e.g. during NMI handling when all register + * states are synchronized from L0 to L1. L1 needs to inform L0 about + * MER=1 only when there are pending external interrupts. + * In the above if check, MER bit is set if there are pending + * external interrupts. Hence, explicity mask off MER bit + * here as otherwise it may generate spurious interrupts in L2 KVM + * causing an endless loop, which results in L2 guest getting hung. + */ + lpcr &= ~LPCR_MER; } } else if (vcpu->arch.pending_exceptions || vcpu->arch.doorbell_request || diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S index 7e5e1c28e56a..8967903c15e9 100644 --- a/arch/powerpc/lib/crtsavres.S +++ b/arch/powerpc/lib/crtsavres.S @@ -46,7 +46,7 @@ .section ".text" -#ifndef CONFIG_PPC64 +#ifndef __powerpc64__ /* Routines for saving integer registers, called by the compiler. */ /* Called with r11 pointing to the stack header word of the caller of the */ diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index 56a1f7ce78d2..d92759c21fae 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -282,6 +282,7 @@ int __init opal_event_init(void) name, NULL); if (rc) { pr_warn("Error %d requesting OPAL irq %d\n", rc, (int)r->start); + kfree(name); continue; } } diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index d95e03b3d3e3..9e297f88adc5 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -19,7 +19,7 @@ #include <uapi/linux/papr_pdsm.h> #include <linux/papr_scm.h> #include <asm/mce.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/perf_event.h> #define BIND_ANY_ADDR (~0ul) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index b6d515db869b..f4c570538d55 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -177,7 +177,7 @@ config RISCV select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RETHOOK if !XIP_KERNEL select HAVE_RSEQ - select HAVE_RUST if 64BIT + select HAVE_RUST if RUSTC_SUPPORTS_RISCV && CC_IS_CLANG select HAVE_SAMPLE_FTRACE_DIRECT select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_STACKPROTECTOR @@ -209,6 +209,13 @@ config RISCV select USER_STACKTRACE_SUPPORT select ZONE_DMA32 if 64BIT +config RUSTC_SUPPORTS_RISCV + def_bool y + depends on 64BIT + # Shadow call stack requires rustc version 1.82+ due to use of the + # -Zsanitizer=shadow-call-stack flag. + depends on !SHADOW_CALL_STACK || RUSTC_VERSION >= 108200 + config CLANG_SUPPORTS_DYNAMIC_FTRACE def_bool CC_IS_CLANG # https://github.com/ClangBuiltLinux/linux/issues/1817 @@ -770,8 +777,7 @@ config IRQ_STACKS config THREAD_SIZE_ORDER int "Kernel stack size (in power-of-two numbers of page size)" if VMAP_STACK && EXPERT range 0 4 - default 1 if 32BIT && !KASAN - default 3 if 64BIT && KASAN + default 1 if 32BIT default 2 help Specify the Pages of thread stack size (from 4KB to 64KB), which also diff --git a/arch/riscv/boot/dts/sophgo/sg2042.dtsi b/arch/riscv/boot/dts/sophgo/sg2042.dtsi index 4e5fa6591623..e62ac51ac55a 100644 --- a/arch/riscv/boot/dts/sophgo/sg2042.dtsi +++ b/arch/riscv/boot/dts/sophgo/sg2042.dtsi @@ -112,7 +112,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; @@ -134,7 +134,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; @@ -156,7 +156,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi index c7771b3b6475..d6c55f1cc96a 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi @@ -128,7 +128,6 @@ assigned-clocks = <&ispcrg JH7110_ISPCLK_DOM4_APB_FUNC>, <&ispcrg JH7110_ISPCLK_MIPI_RX0_PXL>; assigned-clock-rates = <49500000>, <198000000>; - status = "okay"; ports { #address-cells = <1>; @@ -151,7 +150,6 @@ &csi2rx { assigned-clocks = <&ispcrg JH7110_ISPCLK_VIN_SYS>; assigned-clock-rates = <297000000>; - status = "okay"; ports { #address-cells = <1>; diff --git a/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts b/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts index b720cdd15ed6..8e39fdc73ecb 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts +++ b/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts @@ -44,8 +44,7 @@ }; &phy0 { - rx-internal-delay-ps = <1900>; - tx-internal-delay-ps = <1500>; + rx-internal-delay-ps = <1500>; motorcomm,rx-clk-drv-microamp = <2910>; motorcomm,rx-data-drv-microamp = <2910>; motorcomm,tx-clk-adj-enabled; diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile index 8a2739485123..f0da9d7b39c3 100644 --- a/arch/riscv/errata/Makefile +++ b/arch/riscv/errata/Makefile @@ -2,6 +2,12 @@ ifdef CONFIG_RELOCATABLE KBUILD_CFLAGS += -fno-pie endif +ifdef CONFIG_RISCV_ALTERNATIVE_EARLY +ifdef CONFIG_FORTIFY_SOURCE +KBUILD_CFLAGS += -D__NO_FORTIFY +endif +endif + obj-$(CONFIG_ERRATA_ANDES) += andes/ obj-$(CONFIG_ERRATA_SIFIVE) += sifive/ obj-$(CONFIG_ERRATA_THEAD) += thead/ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index ebe52f96da34..9c10fb180f43 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -13,7 +13,12 @@ #include <linux/sizes.h> /* thread information allocation */ -#define THREAD_SIZE_ORDER CONFIG_THREAD_SIZE_ORDER +#ifdef CONFIG_KASAN +#define KASAN_STACK_ORDER 1 +#else +#define KASAN_STACK_ORDER 0 +#endif +#define THREAD_SIZE_ORDER (CONFIG_THREAD_SIZE_ORDER + KASAN_STACK_ORDER) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) /* diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 7f88cc4931f5..69dc8aaab3fb 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -36,6 +36,11 @@ KASAN_SANITIZE_alternative.o := n KASAN_SANITIZE_cpufeature.o := n KASAN_SANITIZE_sbi_ecall.o := n endif +ifdef CONFIG_FORTIFY_SOURCE +CFLAGS_alternative.o += -D__NO_FORTIFY +CFLAGS_cpufeature.o += -D__NO_FORTIFY +CFLAGS_sbi_ecall.o += -D__NO_FORTIFY +endif endif extra-y += vmlinux.lds diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index 6e0d333f57e5..2fd29695a788 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -210,7 +210,7 @@ void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size) if (!size) return NULL; - return early_ioremap(phys, size); + return early_memremap(phys, size); } void __init __acpi_unmap_table(void __iomem *map, unsigned long size) @@ -218,7 +218,7 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) if (!map || !size) return; - early_iounmap(map, size); + early_memunmap(map, size); } void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index e94180ba432f..c2f3129a8e5c 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -4,8 +4,6 @@ * Copyright (C) 2017 SiFive */ -#define GENERATING_ASM_OFFSETS - #include <linux/kbuild.h> #include <linux/mm.h> #include <linux/sched.h> diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index b320b1d9aa01..2d40736fc37c 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -80,8 +80,7 @@ int populate_cache_leaves(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; - struct device_node *np = of_cpu_device_node_get(cpu); - struct device_node *prev = NULL; + struct device_node *np, *prev; int levels = 1, level = 1; if (!acpi_disabled) { @@ -105,6 +104,10 @@ int populate_cache_leaves(unsigned int cpu) return 0; } + np = of_cpu_device_node_get(cpu); + if (!np) + return -ENOENT; + if (of_property_read_bool(np, "cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); if (of_property_read_bool(np, "i-cache-size")) diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index 28b58fc5ad19..a1e38ecfc8be 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -58,7 +58,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) if (cpu_ops->cpu_is_stopped) ret = cpu_ops->cpu_is_stopped(cpu); if (ret) - pr_warn("CPU%d may not have stopped: %d\n", cpu, ret); + pr_warn("CPU%u may not have stopped: %d\n", cpu, ret); } /* diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S index 515b2dfbca75..c5f17c2710b5 100644 --- a/arch/riscv/kernel/efi-header.S +++ b/arch/riscv/kernel/efi-header.S @@ -64,7 +64,7 @@ extra_header_fields: .long efi_header_end - _start // SizeOfHeaders .long 0 // CheckSum .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem - .short 0 // DllCharacteristics + .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics .quad 0 // SizeOfStackReserve .quad 0 // SizeOfStackCommit .quad 0 // SizeOfHeapReserve diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile index d5bf1bc7de62..81d69d45c06c 100644 --- a/arch/riscv/kernel/pi/Makefile +++ b/arch/riscv/kernel/pi/Makefile @@ -16,8 +16,12 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) KBUILD_CFLAGS += -mcmodel=medany CFLAGS_cmdline_early.o += -D__NO_FORTIFY -CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY CFLAGS_fdt_early.o += -D__NO_FORTIFY +# lib/string.c already defines __NO_FORTIFY +CFLAGS_ctype.o += -D__NO_FORTIFY +CFLAGS_lib-fdt.o += -D__NO_FORTIFY +CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY +CFLAGS_archrandom_early.o += -D__NO_FORTIFY $(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \ --remove-section=.note.gnu.property \ diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index d4fd8af7aaf5..1b9867136b61 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -136,8 +136,6 @@ #define REG_PTR(insn, pos, regs) \ (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)) -#define GET_RM(insn) (((insn) >> 12) & 7) - #define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) #define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) #define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 960feb1526ca..3f1c4b2d0b06 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -18,6 +18,7 @@ obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ccflags-y := -fno-stack-protector ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -fno-builtin ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index 0a1e859323b4..a8085cd8215e 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -55,7 +55,7 @@ struct imsic { /* IMSIC SW-file */ struct imsic_mrif *swfile; phys_addr_t swfile_pa; - spinlock_t swfile_extirq_lock; + raw_spinlock_t swfile_extirq_lock; }; #define imsic_vs_csr_read(__c) \ @@ -622,7 +622,7 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu) * interruptions between reading topei and updating pending status. */ - spin_lock_irqsave(&imsic->swfile_extirq_lock, flags); + raw_spin_lock_irqsave(&imsic->swfile_extirq_lock, flags); if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) && imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis)) @@ -630,7 +630,7 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu) else kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT); - spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags); + raw_spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags); } static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear, @@ -1051,7 +1051,7 @@ int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu) } imsic->swfile = page_to_virt(swfile_page); imsic->swfile_pa = page_to_phys(swfile_page); - spin_lock_init(&imsic->swfile_extirq_lock); + raw_spin_lock_init(&imsic->swfile_extirq_lock); /* Setup IO device */ kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops); diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index bab2ec34cd87..f3427f6de608 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -20,7 +20,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT); csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT); @@ -35,7 +35,7 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { kvm_riscv_aia_disable(); diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 99f34409fb60..4cc631fa7039 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -18,6 +18,7 @@ #define RV_MAX_REG_ARGS 8 #define RV_FENTRY_NINSNS 2 #define RV_FENTRY_NBYTES (RV_FENTRY_NINSNS * 4) +#define RV_KCFI_NINSNS (IS_ENABLED(CONFIG_CFI_CLANG) ? 1 : 0) /* imm that allows emit_imm to emit max count insns */ #define RV_MAX_COUNT_IMM 0x7FFF7FF7FF7FF7FF @@ -271,7 +272,8 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) if (!is_tail_call) emit_addiw(RV_REG_A0, RV_REG_A5, 0, ctx); emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, - is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */ + /* kcfi, fentry and TCC init insns will be skipped on tailcall */ + is_tail_call ? (RV_KCFI_NINSNS + RV_FENTRY_NINSNS + 1) * 4 : 0, ctx); } @@ -548,8 +550,8 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, rv_lr_w(r0, 0, rd, 0, 0), ctx); jmp_offset = ninsns_rvoff(8); emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx); - emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 0) : - rv_sc_w(RV_REG_T3, rs, rd, 0, 0), ctx); + emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 1) : + rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx); jmp_offset = ninsns_rvoff(-6); emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx); emit(rv_fence(0x3, 0x3), ctx); diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 7ec1b8cd0de9..fb0e9a1d9be2 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -50,7 +50,6 @@ CONFIG_NUMA=y CONFIG_HZ_100=y CONFIG_CERT_STORE=y CONFIG_EXPOLINE=y -# CONFIG_EXPOLINE_EXTERN is not set CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m @@ -59,6 +58,7 @@ CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_S390_HYPFS_FS=y CONFIG_KVM=m +CONFIG_KVM_S390_UCONTROL=y CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m CONFIG_S390_MODULES_SANITY_TEST=m @@ -94,6 +94,7 @@ CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y +CONFIG_SLAB_BUCKETS=y CONFIG_SLUB_STATS=y # CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y @@ -425,6 +426,13 @@ CONFIG_DEVTMPFS_SAFE=y # CONFIG_FW_LOADER is not set CONFIG_CONNECTOR=y CONFIG_ZRAM=y +CONFIG_ZRAM_BACKEND_LZ4=y +CONFIG_ZRAM_BACKEND_LZ4HC=y +CONFIG_ZRAM_BACKEND_ZSTD=y +CONFIG_ZRAM_BACKEND_DEFLATE=y +CONFIG_ZRAM_BACKEND_842=y +CONFIG_ZRAM_BACKEND_LZO=y +CONFIG_ZRAM_DEF_COMP_DEFLATE=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m @@ -485,6 +493,7 @@ CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y +CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING=y CONFIG_DM_SWITCH=m CONFIG_DM_INTEGRITY=m CONFIG_DM_VDO=m @@ -534,6 +543,7 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y +# CONFIG_NET_VENDOR_META is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -694,6 +704,7 @@ CONFIG_NFSD=m CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_V4_SECURITY_LABEL=y +# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set CONFIG_CIFS=m CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y @@ -739,7 +750,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index df4addd1834a..88be0a734b60 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -48,7 +48,6 @@ CONFIG_NUMA=y CONFIG_HZ_100=y CONFIG_CERT_STORE=y CONFIG_EXPOLINE=y -# CONFIG_EXPOLINE_EXTERN is not set CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m @@ -89,6 +88,7 @@ CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y +CONFIG_SLAB_BUCKETS=y # CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y @@ -416,6 +416,13 @@ CONFIG_DEVTMPFS_SAFE=y # CONFIG_FW_LOADER is not set CONFIG_CONNECTOR=y CONFIG_ZRAM=y +CONFIG_ZRAM_BACKEND_LZ4=y +CONFIG_ZRAM_BACKEND_LZ4HC=y +CONFIG_ZRAM_BACKEND_ZSTD=y +CONFIG_ZRAM_BACKEND_DEFLATE=y +CONFIG_ZRAM_BACKEND_842=y +CONFIG_ZRAM_BACKEND_LZO=y +CONFIG_ZRAM_DEF_COMP_DEFLATE=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m @@ -476,6 +483,7 @@ CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y +CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING=y CONFIG_DM_SWITCH=m CONFIG_DM_INTEGRITY=m CONFIG_DM_VDO=m @@ -525,6 +533,7 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y +# CONFIG_NET_VENDOR_META is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -682,6 +691,7 @@ CONFIG_NFSD=m CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_V4_SECURITY_LABEL=y +# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set CONFIG_CIFS=m CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y @@ -726,7 +736,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m @@ -767,6 +776,7 @@ CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_ZSTD=m CONFIG_CRYPTO_ANSI_CPRNG=m +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 8c2b61363bab..bcbaa069de96 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -49,6 +49,7 @@ CONFIG_ZFCP=y # CONFIG_HVC_IUCV is not set # CONFIG_HW_RANDOM_S390 is not set # CONFIG_HMC_DRV is not set +# CONFIG_S390_UV_UAPI is not set # CONFIG_S390_TAPE is not set # CONFIG_VMCP is not set # CONFIG_MONWRITER is not set diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index 0e855c5e91c5..5d9effb0867c 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -76,7 +76,6 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static const struct file_operations dbfs_ops = { .read = dbfs_read, - .llseek = no_llseek, .unlocked_ioctl = dbfs_ioctl, }; diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 858beaf4a8cb..d428635abf08 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -443,7 +443,6 @@ static const struct file_operations hypfs_file_ops = { .release = hypfs_release, .read_iter = hypfs_read_iter, .write_iter = hypfs_write_iter, - .llseek = no_llseek, }; static struct file_system_type hypfs_type = { diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 0fbc992d7a5e..fc9933a743d6 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -16,8 +16,10 @@ #include <asm/pci_io.h> #define xlate_dev_mem_ptr xlate_dev_mem_ptr +#define kc_xlate_dev_mem_ptr xlate_dev_mem_ptr void *xlate_dev_mem_ptr(phys_addr_t phys); #define unxlate_dev_mem_ptr unxlate_dev_mem_ptr +#define kc_unxlate_dev_mem_ptr unxlate_dev_mem_ptr void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define IO_SPACE_LIMIT 0 diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 66200d4a2134..29ee289108c5 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -49,6 +49,7 @@ struct perf_sf_sde_regs { }; #define perf_arch_fetch_caller_regs(regs, __ip) do { \ + (regs)->psw.mask = 0; \ (regs)->psw.addr = (__ip); \ (regs)->gprs[15] = (unsigned long)__builtin_frame_address(0) - \ offsetof(struct stack_frame, back_chain); \ diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index bce50ca75ea7..e62bea9ab21e 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -163,7 +163,6 @@ static const struct file_operations debug_file_ops = { .write = debug_input, .open = debug_open, .release = debug_close, - .llseek = no_llseek, }; static struct dentry *debug_debugfs_root_entry; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 18b0d025f3a2..e2e0aa463fbd 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1698,7 +1698,6 @@ static const struct file_operations cfset_fops = { .release = cfset_release, .unlocked_ioctl = cfset_ioctl, .compat_ioctl = cfset_ioctl, - .llseek = no_llseek }; static struct miscdevice cfset_dev = { diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 2be30a96696a..88055f58fbda 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -498,7 +498,6 @@ static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \ .open = stsi_open_##fc##_##s1##_##s2, \ .release = stsi_release, \ .read = stsi_read, \ - .llseek = no_llseek, \ }; static int stsi_release(struct inode *inode, struct file *file) diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S index e26e68675c08..aa06c85bcbd3 100644 --- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S @@ -13,10 +13,7 @@ * for details. */ .macro vdso_func func - .globl __kernel_\func - .type __kernel_\func,@function - __ALIGN -__kernel_\func: +SYM_FUNC_START(__kernel_\func) CFI_STARTPROC aghi %r15,-STACK_FRAME_VDSO_OVERHEAD CFI_DEF_CFA_OFFSET (STACK_FRAME_USER_OVERHEAD + STACK_FRAME_VDSO_OVERHEAD) @@ -32,7 +29,7 @@ __kernel_\func: CFI_RESTORE 15 br %r14 CFI_ENDPROC - .size __kernel_\func,.-__kernel_\func +SYM_FUNC_END(__kernel_\func) .endm vdso_func gettimeofday @@ -41,16 +38,13 @@ vdso_func clock_gettime vdso_func getcpu .macro vdso_syscall func,syscall - .globl __kernel_\func - .type __kernel_\func,@function - __ALIGN -__kernel_\func: +SYM_FUNC_START(__kernel_\func) CFI_STARTPROC svc \syscall /* Make sure we notice when a syscall returns, which shouldn't happen */ .word 0 CFI_ENDPROC - .size __kernel_\func,.-__kernel_\func +SYM_FUNC_END(__kernel_\func) .endm vdso_syscall restart_syscall,__NR_restart_syscall diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S index d802b0a96f41..09c034c2f853 100644 --- a/arch/s390/kernel/vdso64/vgetrandom-chacha.S +++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S @@ -1,7 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/stringify.h> #include <linux/linkage.h> #include <asm/alternative.h> +#include <asm/dwarf.h> #include <asm/fpu-insn.h> #define STATE0 %v0 @@ -12,9 +14,6 @@ #define COPY1 %v5 #define COPY2 %v6 #define COPY3 %v7 -#define PERM4 %v16 -#define PERM8 %v17 -#define PERM12 %v18 #define BEPERM %v19 #define TMP0 %v20 #define TMP1 %v21 @@ -23,13 +22,11 @@ .section .rodata - .balign 128 -.Lconstants: + .balign 32 +SYM_DATA_START_LOCAL(chacha20_constants) .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral - .long 0x04050607,0x08090a0b,0x0c0d0e0f,0x00010203 # rotl 4 bytes - .long 0x08090a0b,0x0c0d0e0f,0x00010203,0x04050607 # rotl 8 bytes - .long 0x0c0d0e0f,0x00010203,0x04050607,0x08090a0b # rotl 12 bytes .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap +SYM_DATA_END(chacha20_constants) .text /* @@ -43,13 +40,14 @@ * size_t nblocks) */ SYM_FUNC_START(__arch_chacha20_blocks_nostack) - larl %r1,.Lconstants + CFI_STARTPROC + larl %r1,chacha20_constants /* COPY0 = "expand 32-byte k" */ VL COPY0,0,,%r1 - /* PERM4-PERM12,BEPERM = byte selectors for VPERM */ - VLM PERM4,BEPERM,16,%r1 + /* BEPERM = byte selectors for VPERM */ + ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148) /* COPY1,COPY2 = key */ VLM COPY1,COPY2,0,%r3 @@ -89,11 +87,11 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) VERLLF STATE1,STATE1,7 /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */ - VPERM STATE1,STATE1,STATE1,PERM4 + VSLDB STATE1,STATE1,STATE1,4 /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ - VPERM STATE2,STATE2,STATE2,PERM8 + VSLDB STATE2,STATE2,STATE2,8 /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */ - VPERM STATE3,STATE3,STATE3,PERM12 + VSLDB STATE3,STATE3,STATE3,12 /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */ VAF STATE0,STATE0,STATE1 @@ -116,32 +114,38 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) VERLLF STATE1,STATE1,7 /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */ - VPERM STATE1,STATE1,STATE1,PERM12 + VSLDB STATE1,STATE1,STATE1,12 /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ - VPERM STATE2,STATE2,STATE2,PERM8 + VSLDB STATE2,STATE2,STATE2,8 /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */ - VPERM STATE3,STATE3,STATE3,PERM4 + VSLDB STATE3,STATE3,STATE3,4 brctg %r0,.Ldoubleround - /* OUTPUT0 = STATE0 + STATE0 */ + /* OUTPUT0 = STATE0 + COPY0 */ VAF STATE0,STATE0,COPY0 - /* OUTPUT1 = STATE1 + STATE1 */ + /* OUTPUT1 = STATE1 + COPY1 */ VAF STATE1,STATE1,COPY1 - /* OUTPUT2 = STATE2 + STATE2 */ + /* OUTPUT2 = STATE2 + COPY2 */ VAF STATE2,STATE2,COPY2 - /* OUTPUT2 = STATE3 + STATE3 */ + /* OUTPUT3 = STATE3 + COPY3 */ VAF STATE3,STATE3,COPY3 - /* - * 32 bit wise little endian store to OUTPUT. If the vector - * enhancement facility 2 is not installed use the slow path. - */ - ALTERNATIVE "brc 0xf,.Lstoreslow", "nop", ALT_FACILITY(148) - VSTBRF STATE0,0,,%r2 - VSTBRF STATE1,16,,%r2 - VSTBRF STATE2,32,,%r2 - VSTBRF STATE3,48,,%r2 -.Lstoredone: + ALTERNATIVE \ + __stringify( \ + /* Convert STATE to little endian and store to OUTPUT */\ + VPERM TMP0,STATE0,STATE0,BEPERM; \ + VPERM TMP1,STATE1,STATE1,BEPERM; \ + VPERM TMP2,STATE2,STATE2,BEPERM; \ + VPERM TMP3,STATE3,STATE3,BEPERM; \ + VSTM TMP0,TMP3,0,%r2), \ + __stringify( \ + /* 32 bit wise little endian store to OUTPUT */ \ + VSTBRF STATE0,0,,%r2; \ + VSTBRF STATE1,16,,%r2; \ + VSTBRF STATE2,32,,%r2; \ + VSTBRF STATE3,48,,%r2; \ + brcl 0,0), \ + ALT_FACILITY(148) /* ++COPY3.COUNTER */ /* alsih %r3,1 */ @@ -173,13 +177,5 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) VZERO TMP3 br %r14 - -.Lstoreslow: - /* Convert STATE to little endian format and store to OUTPUT */ - VPERM TMP0,STATE0,STATE0,BEPERM - VPERM TMP1,STATE1,STATE1,BEPERM - VPERM TMP2,STATE2,STATE2,BEPERM - VPERM TMP3,STATE3,STATE3,BEPERM - VSTM TMP0,TMP3,0,%r2 - j .Lstoredone + CFI_ENDPROC SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index ae5d0a9d6911..377b9aaf8c92 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -191,8 +191,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) - RUNTIME_CONST(shift, d_hash_shift) - RUNTIME_CONST(ptr, dentry_hashtable) + RUNTIME_CONST_VARIABLES PERCPU_SECTION(0x100) diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 2a32438e09ce..74f73141f9b9 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -77,7 +77,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) vcpu->stat.instruction_diagnose_258++; if (vcpu->run->s.regs.gprs[rx] & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm)); + rc = read_guest_real(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index e65f597e3044..a688351f4ab5 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -828,6 +828,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, const gfn_t gfn = gpa_to_gfn(gpa); int rc; + if (!gfn_to_memslot(kvm, gfn)) + return PGM_ADDRESSING; if (mode == GACC_STORE) rc = kvm_write_guest_page(kvm, gfn, data, offset, len); else @@ -985,6 +987,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, gra += fragment_len; data += fragment_len; } + if (rc > 0) + vcpu->arch.pgm.code = rc; return rc; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index b320d12aa049..3fde45a151f2 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -405,11 +405,12 @@ int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data, * @len: number of bytes to copy * * Copy @len bytes from @data (kernel space) to @gra (guest real address). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest low address and key protection are not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying from @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to guest memory. */ @@ -428,11 +429,12 @@ int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, * @len: number of bytes to copy * * Copy @len bytes from @gra (guest real address) to @data (kernel space). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest key protection is not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying to @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to kernel space. */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0fd96860fc45..bb7134faaebf 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -348,20 +348,29 @@ static inline int plo_test_bit(unsigned char nr) return cc == 0; } -static __always_inline void __insn32_query(unsigned int opcode, u8 *query) +static __always_inline void __sortl_query(u8 (*query)[32]) { asm volatile( " lghi 0,0\n" - " lgr 1,%[query]\n" + " la 1,%[query]\n" /* Parameter registers are ignored */ - " .insn rrf,%[opc] << 16,2,4,6,0\n" + " .insn rre,0xb9380000,2,4\n" + : [query] "=R" (*query) : - : [query] "d" ((unsigned long)query), [opc] "i" (opcode) - : "cc", "memory", "0", "1"); + : "cc", "0", "1"); } -#define INSN_SORTL 0xb938 -#define INSN_DFLTCC 0xb939 +static __always_inline void __dfltcc_query(u8 (*query)[32]) +{ + asm volatile( + " lghi 0,0\n" + " la 1,%[query]\n" + /* Parameter registers are ignored */ + " .insn rrf,0xb9390000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} static void __init kvm_s390_cpu_feat_init(void) { @@ -415,10 +424,10 @@ static void __init kvm_s390_cpu_feat_init(void) kvm_s390_available_subfunc.kdsa); if (test_facility(150)) /* SORTL */ - __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); + __sortl_query(&kvm_s390_available_subfunc.sortl); if (test_facility(151)) /* DFLTCC */ - __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); + __dfltcc_query(&kvm_s390_available_subfunc.dfltcc); if (MACHINE_HAS_ESOP) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 688abc65c79e..7a96623a9d2e 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL(zero_page_mask); static void __init setup_zero_pages(void) { - unsigned long total_pages = PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size()); + unsigned long total_pages = memblock_estimated_nr_free_pages(); unsigned int order; struct page *page; int i; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index ee90a91ed888..6f55a59a0871 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -657,7 +657,6 @@ static const struct file_operations clp_misc_fops = { .release = clp_misc_release, .unlocked_ioctl = clp_misc_ioctl, .compat_ioctl = clp_misc_ioctl, - .llseek = no_llseek, }; static struct miscdevice clp_misc_device = { diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index dbe95ec5917e..d4f19d33914c 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -280,18 +280,19 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) goto no_pdev; switch (ccdf->pec) { - case 0x003a: /* Service Action or Error Recovery Successful */ + case 0x002a: /* Error event concerns FMB */ + case 0x002b: + case 0x002c: + break; + case 0x0040: /* Service Action or Error Recovery Failed */ + case 0x003b: + zpci_event_io_failure(pdev, pci_channel_io_perm_failure); + break; + default: /* PCI function left in the error state attempt to recover */ ers_res = zpci_event_attempt_error_recovery(pdev); if (ers_res != PCI_ERS_RESULT_RECOVERED) zpci_event_io_failure(pdev, pci_channel_io_perm_failure); break; - default: - /* - * Mark as frozen not permanently failed because the device - * could be subsequently recovered by the platform. - */ - zpci_event_io_failure(pdev, pci_channel_io_frozen); - break; } pci_dev_put(pdev); no_pdev: diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h index fee4f25555cb..70752c7bc55f 100644 --- a/arch/sh/include/asm/flat.h +++ b/arch/sh/include/asm/flat.h @@ -9,7 +9,7 @@ #ifndef __ASM_SH_FLAT_H #define __ASM_SH_FLAT_H -#include <asm/unaligned.h> +#include <linux/unaligned.h> static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, u32 *addr) diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h index 0f384b1f45ca..53fc18a3d4c2 100644 --- a/arch/sh/include/asm/irq.h +++ b/arch/sh/include/asm/irq.h @@ -14,12 +14,6 @@ #define NO_IRQ_IGNORE ((unsigned int)-1) /* - * Simple Mask Register Support - */ -extern void make_maskreg_irq(unsigned int irq); -extern unsigned short *irq_mask_register; - -/* * PINT IRQs */ void make_imask_irq(unsigned int irq); diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index 45c8ae20d109..a1b54bedc929 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -24,7 +24,7 @@ #include <asm/dwarf.h> #include <asm/unwinder.h> #include <asm/sections.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/stacktrace.h> /* Reserve enough memory for two stack frames */ diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index b9cee98a754e..a469a80840d3 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -18,7 +18,7 @@ #include <linux/fs.h> #include <linux/string.h> #include <linux/kernel.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/dwarf.h> int apply_relocate_add(Elf32_Shdr *sechdrs, diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index 688db0dcb97d..913b9a09e885 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -20,7 +20,7 @@ #include <asm/pstate.h> #include <asm/elf.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "opcodes.h" diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c index ec61ff1f96b7..1dc9b3d70eda 100644 --- a/arch/sparc/mm/leon_mm.c +++ b/arch/sparc/mm/leon_mm.c @@ -39,12 +39,10 @@ unsigned long leon_swprobe(unsigned long vaddr, unsigned long *paddr) unsigned int ctxtbl; unsigned int pgd, pmd, ped; unsigned int ptr; - unsigned int lvl, pte, paddrbase; + unsigned int lvl, pte; unsigned int ctx; unsigned int paddr_calc; - paddrbase = 0; - if (srmmu_swprobe_trace) printk(KERN_INFO "swprobe: trace on\n"); @@ -73,7 +71,6 @@ unsigned long leon_swprobe(unsigned long vaddr, unsigned long *paddr) printk(KERN_INFO "swprobe: pgd is entry level 3\n"); lvl = 3; pte = pgd; - paddrbase = pgd & _SRMMU_PTE_PMASK_LEON; goto ready; } if (((pgd & SRMMU_ET_MASK) != SRMMU_ET_PTD)) { @@ -96,7 +93,6 @@ unsigned long leon_swprobe(unsigned long vaddr, unsigned long *paddr) printk(KERN_INFO "swprobe: pmd is entry level 2\n"); lvl = 2; pte = pmd; - paddrbase = pmd & _SRMMU_PTE_PMASK_LEON; goto ready; } if (((pmd & SRMMU_ET_MASK) != SRMMU_ET_PTD)) { @@ -124,7 +120,6 @@ unsigned long leon_swprobe(unsigned long vaddr, unsigned long *paddr) printk(KERN_INFO "swprobe: ped is entry level 1\n"); lvl = 1; pte = ped; - paddrbase = ped & _SRMMU_PTE_PMASK_LEON; goto ready; } if (((ped & SRMMU_ET_MASK) != SRMMU_ET_PTD)) { @@ -147,7 +142,6 @@ unsigned long leon_swprobe(unsigned long vaddr, unsigned long *paddr) printk(KERN_INFO "swprobe: ptr is entry level 0\n"); lvl = 0; pte = ptr; - paddrbase = ptr & _SRMMU_PTE_PMASK_LEON; goto ready; } if (srmmu_swprobe_trace) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index dca84fd6d00a..c89575d05021 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -11,7 +11,6 @@ config UML select ARCH_HAS_KCOV select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER - select ARCH_NO_PREEMPT_DYNAMIC select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_KASAN if X86_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c index 99a7144b229f..819aabb4ecdc 100644 --- a/arch/um/drivers/harddog_kern.c +++ b/arch/um/drivers/harddog_kern.c @@ -164,7 +164,6 @@ static const struct file_operations harddog_fops = { .compat_ioctl = compat_ptr_ioctl, .open = harddog_open, .release = harddog_release, - .llseek = no_llseek, }; static struct miscdevice harddog_miscdev = { diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index c42b793bce65..9d228878cea2 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -291,7 +291,6 @@ static int hostmixer_release(struct inode *inode, struct file *file) static const struct file_operations hostaudio_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = hostaudio_read, .write = hostaudio_write, .poll = hostaudio_poll, @@ -304,7 +303,6 @@ static const struct file_operations hostaudio_fops = { static const struct file_operations hostmixer_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = hostmixer_ioctl_mixdev, .open = hostmixer_open_mixdev, .release = hostmixer_release, diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 2d473282ab51..c992da83268d 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -22,6 +22,7 @@ #include <linux/interrupt.h> #include <linux/firmware.h> #include <linux/fs.h> +#include <asm/atomic.h> #include <uapi/linux/filter.h> #include <init.h> #include <irq_kern.h> @@ -102,18 +103,33 @@ static const struct { static void vector_reset_stats(struct vector_private *vp) { + /* We reuse the existing queue locks for stats */ + + /* RX stats are modified with RX head_lock held + * in vector_poll. + */ + + spin_lock(&vp->rx_queue->head_lock); vp->estats.rx_queue_max = 0; vp->estats.rx_queue_running_average = 0; - vp->estats.tx_queue_max = 0; - vp->estats.tx_queue_running_average = 0; vp->estats.rx_encaps_errors = 0; + vp->estats.sg_ok = 0; + vp->estats.sg_linearized = 0; + spin_unlock(&vp->rx_queue->head_lock); + + /* TX stats are modified with TX head_lock held + * in vector_send. + */ + + spin_lock(&vp->tx_queue->head_lock); vp->estats.tx_timeout_count = 0; vp->estats.tx_restart_queue = 0; vp->estats.tx_kicks = 0; vp->estats.tx_flow_control_xon = 0; vp->estats.tx_flow_control_xoff = 0; - vp->estats.sg_ok = 0; - vp->estats.sg_linearized = 0; + vp->estats.tx_queue_max = 0; + vp->estats.tx_queue_running_average = 0; + spin_unlock(&vp->tx_queue->head_lock); } static int get_mtu(struct arglist *def) @@ -232,12 +248,6 @@ static int get_transport_options(struct arglist *def) static char *drop_buffer; -/* Array backed queues optimized for bulk enqueue/dequeue and - * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios. - * For more details and full design rationale see - * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt - */ - /* * Advance the mmsg queue head by n = advance. Resets the queue to @@ -247,27 +257,13 @@ static char *drop_buffer; static int vector_advancehead(struct vector_queue *qi, int advance) { - int queue_depth; - qi->head = (qi->head + advance) % qi->max_depth; - spin_lock(&qi->tail_lock); - qi->queue_depth -= advance; - - /* we are at 0, use this to - * reset head and tail so we can use max size vectors - */ - - if (qi->queue_depth == 0) { - qi->head = 0; - qi->tail = 0; - } - queue_depth = qi->queue_depth; - spin_unlock(&qi->tail_lock); - return queue_depth; + atomic_sub(advance, &qi->queue_depth); + return atomic_read(&qi->queue_depth); } /* Advance the queue tail by n = advance. @@ -277,16 +273,11 @@ static int vector_advancehead(struct vector_queue *qi, int advance) static int vector_advancetail(struct vector_queue *qi, int advance) { - int queue_depth; - qi->tail = (qi->tail + advance) % qi->max_depth; - spin_lock(&qi->head_lock); - qi->queue_depth += advance; - queue_depth = qi->queue_depth; - spin_unlock(&qi->head_lock); - return queue_depth; + atomic_add(advance, &qi->queue_depth); + return atomic_read(&qi->queue_depth); } static int prep_msg(struct vector_private *vp, @@ -339,9 +330,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) int iov_count; spin_lock(&qi->tail_lock); - spin_lock(&qi->head_lock); - queue_depth = qi->queue_depth; - spin_unlock(&qi->head_lock); + queue_depth = atomic_read(&qi->queue_depth); if (skb) packet_len = skb->len; @@ -360,6 +349,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) mmsg_vector->msg_hdr.msg_iovlen = iov_count; mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr; mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size; + wmb(); /* Make the packet visible to the NAPI poll thread */ queue_depth = vector_advancetail(qi, 1); } else goto drop; @@ -398,7 +388,7 @@ static int consume_vector_skbs(struct vector_queue *qi, int count) } /* - * Generic vector deque via sendmmsg with support for forming headers + * Generic vector dequeue via sendmmsg with support for forming headers * using transport specific callback. Allows GRE, L2TPv3, RAW and * other transports to use a common dequeue procedure in vector mode */ @@ -408,69 +398,64 @@ static int vector_send(struct vector_queue *qi) { struct vector_private *vp = netdev_priv(qi->dev); struct mmsghdr *send_from; - int result = 0, send_len, queue_depth = qi->max_depth; + int result = 0, send_len; if (spin_trylock(&qi->head_lock)) { - if (spin_trylock(&qi->tail_lock)) { - /* update queue_depth to current value */ - queue_depth = qi->queue_depth; - spin_unlock(&qi->tail_lock); - while (queue_depth > 0) { - /* Calculate the start of the vector */ - send_len = queue_depth; - send_from = qi->mmsg_vector; - send_from += qi->head; - /* Adjust vector size if wraparound */ - if (send_len + qi->head > qi->max_depth) - send_len = qi->max_depth - qi->head; - /* Try to TX as many packets as possible */ - if (send_len > 0) { - result = uml_vector_sendmmsg( - vp->fds->tx_fd, - send_from, - send_len, - 0 - ); - vp->in_write_poll = - (result != send_len); - } - /* For some of the sendmmsg error scenarios - * we may end being unsure in the TX success - * for all packets. It is safer to declare - * them all TX-ed and blame the network. - */ - if (result < 0) { - if (net_ratelimit()) - netdev_err(vp->dev, "sendmmsg err=%i\n", - result); - vp->in_error = true; - result = send_len; - } - if (result > 0) { - queue_depth = - consume_vector_skbs(qi, result); - /* This is equivalent to an TX IRQ. - * Restart the upper layers to feed us - * more packets. - */ - if (result > vp->estats.tx_queue_max) - vp->estats.tx_queue_max = result; - vp->estats.tx_queue_running_average = - (vp->estats.tx_queue_running_average + result) >> 1; - } - netif_wake_queue(qi->dev); - /* if TX is busy, break out of the send loop, - * poll write IRQ will reschedule xmit for us + /* update queue_depth to current value */ + while (atomic_read(&qi->queue_depth) > 0) { + /* Calculate the start of the vector */ + send_len = atomic_read(&qi->queue_depth); + send_from = qi->mmsg_vector; + send_from += qi->head; + /* Adjust vector size if wraparound */ + if (send_len + qi->head > qi->max_depth) + send_len = qi->max_depth - qi->head; + /* Try to TX as many packets as possible */ + if (send_len > 0) { + result = uml_vector_sendmmsg( + vp->fds->tx_fd, + send_from, + send_len, + 0 + ); + vp->in_write_poll = + (result != send_len); + } + /* For some of the sendmmsg error scenarios + * we may end being unsure in the TX success + * for all packets. It is safer to declare + * them all TX-ed and blame the network. + */ + if (result < 0) { + if (net_ratelimit()) + netdev_err(vp->dev, "sendmmsg err=%i\n", + result); + vp->in_error = true; + result = send_len; + } + if (result > 0) { + consume_vector_skbs(qi, result); + /* This is equivalent to an TX IRQ. + * Restart the upper layers to feed us + * more packets. */ - if (result != send_len) { - vp->estats.tx_restart_queue++; - break; - } + if (result > vp->estats.tx_queue_max) + vp->estats.tx_queue_max = result; + vp->estats.tx_queue_running_average = + (vp->estats.tx_queue_running_average + result) >> 1; + } + netif_wake_queue(qi->dev); + /* if TX is busy, break out of the send loop, + * poll write IRQ will reschedule xmit for us. + */ + if (result != send_len) { + vp->estats.tx_restart_queue++; + break; } } spin_unlock(&qi->head_lock); } - return queue_depth; + return atomic_read(&qi->queue_depth); } /* Queue destructor. Deliberately stateless so we can use @@ -589,7 +574,7 @@ static struct vector_queue *create_queue( } spin_lock_init(&result->head_lock); spin_lock_init(&result->tail_lock); - result->queue_depth = 0; + atomic_set(&result->queue_depth, 0); result->head = 0; result->tail = 0; return result; @@ -668,18 +653,27 @@ done: } -/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/ +/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs */ static void prep_queue_for_rx(struct vector_queue *qi) { struct vector_private *vp = netdev_priv(qi->dev); struct mmsghdr *mmsg_vector = qi->mmsg_vector; void **skbuff_vector = qi->skbuff_vector; - int i; + int i, queue_depth; + + queue_depth = atomic_read(&qi->queue_depth); - if (qi->queue_depth == 0) + if (queue_depth == 0) return; - for (i = 0; i < qi->queue_depth; i++) { + + /* RX is always emptied 100% during each cycle, so we do not + * have to do the tail wraparound math for it. + */ + + qi->head = qi->tail = 0; + + for (i = 0; i < queue_depth; i++) { /* it is OK if allocation fails - recvmmsg with NULL data in * iov argument still performs an RX, just drops the packet * This allows us stop faffing around with a "drop buffer" @@ -689,7 +683,7 @@ static void prep_queue_for_rx(struct vector_queue *qi) skbuff_vector++; mmsg_vector++; } - qi->queue_depth = 0; + atomic_set(&qi->queue_depth, 0); } static struct vector_device *find_device(int n) @@ -972,7 +966,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget) budget = qi->max_depth; packet_count = uml_vector_recvmmsg( - vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0); + vp->fds->rx_fd, qi->mmsg_vector, budget, 0); if (packet_count < 0) vp->in_error = true; @@ -985,7 +979,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget) * many do we need to prep the next time prep_queue_for_rx() is called. */ - qi->queue_depth = packet_count; + atomic_add(packet_count, &qi->queue_depth); for (i = 0; i < packet_count; i++) { skb = (*skbuff_vector); @@ -1172,6 +1166,7 @@ static int vector_poll(struct napi_struct *napi, int budget) if ((vp->options & VECTOR_TX) != 0) tx_enqueued = (vector_send(vp->tx_queue) > 0); + spin_lock(&vp->rx_queue->head_lock); if ((vp->options & VECTOR_RX) > 0) err = vector_mmsg_rx(vp, budget); else { @@ -1179,12 +1174,13 @@ static int vector_poll(struct napi_struct *napi, int budget) if (err > 0) err = 1; } + spin_unlock(&vp->rx_queue->head_lock); if (err > 0) work_done += err; if (tx_enqueued || err > 0) napi_schedule(napi); - if (work_done < budget) + if (work_done <= budget) napi_complete_done(napi, work_done); return work_done; } @@ -1225,7 +1221,7 @@ static int vector_net_open(struct net_device *dev) vp->rx_header_size, MAX_IOV_SIZE ); - vp->rx_queue->queue_depth = get_depth(vp->parsed); + atomic_set(&vp->rx_queue->queue_depth, get_depth(vp->parsed)); } else { vp->header_rxbuffer = kmalloc( vp->rx_header_size, @@ -1467,7 +1463,17 @@ static void vector_get_ethtool_stats(struct net_device *dev, { struct vector_private *vp = netdev_priv(dev); + /* Stats are modified in the dequeue portions of + * rx/tx which are protected by the head locks + * grabbing these locks here ensures they are up + * to date. + */ + + spin_lock(&vp->tx_queue->head_lock); + spin_lock(&vp->rx_queue->head_lock); memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats)); + spin_unlock(&vp->rx_queue->head_lock); + spin_unlock(&vp->tx_queue->head_lock); } static int vector_get_coalesce(struct net_device *netdev, diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h index 806df551be0b..417834793658 100644 --- a/arch/um/drivers/vector_kern.h +++ b/arch/um/drivers/vector_kern.h @@ -14,6 +14,7 @@ #include <linux/ctype.h> #include <linux/workqueue.h> #include <linux/interrupt.h> +#include <asm/atomic.h> #include "vector_user.h" @@ -44,7 +45,8 @@ struct vector_queue { struct net_device *dev; spinlock_t head_lock; spinlock_t tail_lock; - int queue_depth, head, tail, max_depth, max_iov_frags; + atomic_t queue_depth; + int head, tail, max_depth, max_iov_frags; short options; }; diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c index b16a5e5619d3..2ea67e6fd067 100644 --- a/arch/um/drivers/vector_user.c +++ b/arch/um/drivers/vector_user.c @@ -46,6 +46,9 @@ #define TRANS_FD "fd" #define TRANS_FD_LEN strlen(TRANS_FD) +#define TRANS_VDE "vde" +#define TRANS_VDE_LEN strlen(TRANS_VDE) + #define VNET_HDR_FAIL "could not enable vnet headers on fd %d" #define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s" #define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i" @@ -434,6 +437,84 @@ fd_cleanup: return NULL; } +/* enough char to store an int type */ +#define ENOUGH(type) ((CHAR_BIT * sizeof(type) - 1) / 3 + 2) +#define ENOUGH_OCTAL(type) ((CHAR_BIT * sizeof(type) + 2) / 3) +/* vde_plug --descr xx --port2 xx --mod2 xx --group2 xx seqpacket://NN vnl (NULL) */ +#define VDE_MAX_ARGC 12 +#define VDE_SEQPACKET_HEAD "seqpacket://" +#define VDE_SEQPACKET_HEAD_LEN (sizeof(VDE_SEQPACKET_HEAD) - 1) +#define VDE_DEFAULT_DESCRIPTION "UML" + +static struct vector_fds *user_init_vde_fds(struct arglist *ifspec) +{ + char seqpacketvnl[VDE_SEQPACKET_HEAD_LEN + ENOUGH(int) + 1]; + char *argv[VDE_MAX_ARGC] = {"vde_plug"}; + int argc = 1; + int rv; + int sv[2]; + struct vector_fds *result = NULL; + + char *vnl = uml_vector_fetch_arg(ifspec,"vnl"); + char *descr = uml_vector_fetch_arg(ifspec,"descr"); + char *port = uml_vector_fetch_arg(ifspec,"port"); + char *mode = uml_vector_fetch_arg(ifspec,"mode"); + char *group = uml_vector_fetch_arg(ifspec,"group"); + if (descr == NULL) descr = VDE_DEFAULT_DESCRIPTION; + + argv[argc++] = "--descr"; + argv[argc++] = descr; + if (port != NULL) { + argv[argc++] = "--port2"; + argv[argc++] = port; + } + if (mode != NULL) { + argv[argc++] = "--mod2"; + argv[argc++] = mode; + } + if (group != NULL) { + argv[argc++] = "--group2"; + argv[argc++] = group; + } + argv[argc++] = seqpacketvnl; + argv[argc++] = vnl; + argv[argc++] = NULL; + + rv = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sv); + if (rv < 0) { + printk(UM_KERN_ERR "vde: seqpacket socketpair err %d", -errno); + return NULL; + } + rv = os_set_exec_close(sv[0]); + if (rv < 0) { + printk(UM_KERN_ERR "vde: seqpacket socketpair cloexec err %d", -errno); + goto vde_cleanup_sv; + } + snprintf(seqpacketvnl, sizeof(seqpacketvnl), VDE_SEQPACKET_HEAD "%d", sv[1]); + + run_helper(NULL, NULL, argv); + + close(sv[1]); + + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result == NULL) { + printk(UM_KERN_ERR "fd open: allocation failed"); + goto vde_cleanup; + } + + result->rx_fd = sv[0]; + result->tx_fd = sv[0]; + result->remote_addr_size = 0; + result->remote_addr = NULL; + return result; + +vde_cleanup_sv: + close(sv[1]); +vde_cleanup: + close(sv[0]); + return NULL; +} + static struct vector_fds *user_init_raw_fds(struct arglist *ifspec) { int rxfd = -1, txfd = -1; @@ -673,6 +754,8 @@ struct vector_fds *uml_vector_user_open( return user_init_unix_fds(parsed, ID_BESS); if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0) return user_init_fd_fds(parsed); + if (strncmp(transport, TRANS_VDE, TRANS_VDE_LEN) == 0) + return user_init_vde_fds(parsed); return NULL; } diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index 6100819681b5..744e7f31e8ef 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -14,7 +14,7 @@ #include <linux/virtio-uml.h> #include <linux/delay.h> #include <linux/msi.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <irq_kern.h> #define MAX_DEVICES 8 diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 5bb397b65efb..83373c9963e7 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -359,11 +359,4 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return pte; } -/* Clear a kernel PTE and flush it from the TLB */ -#define kpte_clear_flush(ptep, vaddr) \ -do { \ - pte_clear(&init_mm, (vaddr), (ptep)); \ - __flush_tlb_one((vaddr)); \ -} while (0) - #endif diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 5a7c05275aa7..bce4595798da 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -28,20 +28,10 @@ struct thread_struct { struct arch_thread arch; jmp_buf switch_buf; struct { - int op; - union { - struct { - int pid; - } fork, exec; - struct { - int (*proc)(void *); - void *arg; - } thread; - struct { - void (*proc)(void *); - void *arg; - } cb; - } u; + struct { + int (*proc)(void *); + void *arg; + } thread; } request; }; @@ -51,7 +41,7 @@ struct thread_struct { .fault_addr = NULL, \ .prev_sched = NULL, \ .arch = INIT_ARCH_THREAD, \ - .request = { 0 } \ + .request = { } \ } /* diff --git a/arch/um/include/asm/sysrq.h b/arch/um/include/asm/sysrq.h deleted file mode 100644 index 8fc8c65cd357..000000000000 --- a/arch/um/include/asm/sysrq.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_SYSRQ_H -#define __UM_SYSRQ_H - -struct task_struct; -extern void show_trace(struct task_struct* task, unsigned long *stack); - -#endif diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h index 7d9d60e41e4e..1d4b6bbc1b65 100644 --- a/arch/um/include/asm/uaccess.h +++ b/arch/um/include/asm/uaccess.h @@ -8,7 +8,7 @@ #define __UM_UACCESS_H #include <asm/elf.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #define __under_task_size(addr, size) \ (((unsigned long) (addr) < TASK_SIZE) && \ diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h index 1e76ba40feba..140388c282f6 100644 --- a/arch/um/include/shared/skas/mm_id.h +++ b/arch/um/include/shared/skas/mm_id.h @@ -7,10 +7,7 @@ #define __MM_ID_H struct mm_id { - union { - int mm_fd; - int pid; - } u; + int pid; unsigned long stack; int syscall_data_len; }; diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h index ebaa116de30b..85c50122ab98 100644 --- a/arch/um/include/shared/skas/skas.h +++ b/arch/um/include/shared/skas/skas.h @@ -10,10 +10,8 @@ extern int userspace_pid[]; -extern int user_thread(unsigned long stack, int flags); extern void new_thread_handler(void); extern void handle_syscall(struct uml_pt_regs *regs); -extern long execute_syscall_skas(void *r); extern unsigned long current_stub_stack(void); extern struct mm_id *current_mm_id(void); extern void current_mm_sync(void); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 2c15bb2c104c..cb8b5cd9285c 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -35,8 +35,5 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; clear_thread_flag(TIF_SINGLESTEP); -#ifdef SUBARCH_EXECVE1 - SUBARCH_EXECVE1(regs->regs); -#endif } EXPORT_SYMBOL(start_thread); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index f36b63f53bab..be2856af6d4c 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -109,8 +109,8 @@ void new_thread_handler(void) schedule_tail(current->thread.prev_sched); current->thread.prev_sched = NULL; - fn = current->thread.request.u.thread.proc; - arg = current->thread.request.u.thread.arg; + fn = current->thread.request.thread.proc; + arg = current->thread.request.thread.arg; /* * callback returns only if the kernel thread execs a process @@ -158,8 +158,8 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) arch_copy_thread(¤t->thread.arch, &p->thread.arch); } else { get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); - p->thread.request.u.thread.proc = args->fn; - p->thread.request.u.thread.arg = args->fn_arg; + p->thread.request.thread.proc = args->fn; + p->thread.request.thread.arg = args->fn_arg; handler = new_thread_handler; } diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 3736bca626ba..680bce4bd8fa 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -29,7 +29,7 @@ static void kill_off_processes(void) t = find_lock_task_mm(p); if (!t) continue; - pid = t->mm->context.id.u.pid; + pid = t->mm->context.id.pid; task_unlock(t); os_kill_ptraced_process(pid, 1); } diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 47f98d87ea3c..886ed5e65674 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -32,11 +32,11 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) new_id->stack = stack; block_signals_trace(); - new_id->u.pid = start_userspace(stack); + new_id->pid = start_userspace(stack); unblock_signals_trace(); - if (new_id->u.pid < 0) { - ret = new_id->u.pid; + if (new_id->pid < 0) { + ret = new_id->pid; goto out_free; } @@ -83,12 +83,12 @@ void destroy_context(struct mm_struct *mm) * whole UML suddenly dying. Also, cover negative and * 1 cases, since they shouldn't happen either. */ - if (mmu->id.u.pid < 2) { + if (mmu->id.pid < 2) { printk(KERN_ERR "corrupt mm_context - pid = %d\n", - mmu->id.u.pid); + mmu->id.pid); return; } - os_kill_ptraced_process(mmu->id.u.pid, 1); + os_kill_ptraced_process(mmu->id.pid, 1); free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 5f9c1c5f36e2..68657988c8d1 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -39,8 +39,8 @@ int __init start_uml(void) init_new_thread_signals(); - init_task.thread.request.u.thread.proc = start_kernel_proc; - init_task.thread.request.u.thread.arg = NULL; + init_task.thread.request.thread.proc = start_kernel_proc; + init_task.thread.request.thread.arg = NULL; return start_idle_thread(task_stack_page(&init_task), &init_task.thread.switch_buf); } diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 9ee19e566da3..b09e85279d2b 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -12,23 +12,13 @@ #include <sysdep/syscalls.h> #include <linux/time-internal.h> #include <asm/unistd.h> +#include <asm/delay.h> void handle_syscall(struct uml_pt_regs *r) { struct pt_regs *regs = container_of(r, struct pt_regs, regs); int syscall; - /* - * If we have infinite CPU resources, then make every syscall also a - * preemption point, since we don't have any other preemption in this - * case, and kernel threads would basically never run until userspace - * went to sleep, even if said userspace interacts with the kernel in - * various ways. - */ - if (time_travel_mode == TT_MODE_INFCPU || - time_travel_mode == TT_MODE_EXTERNAL) - schedule(); - /* Initialize the syscall number and default return value. */ UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS); @@ -41,9 +31,25 @@ void handle_syscall(struct uml_pt_regs *r) goto out; syscall = UPT_SYSCALL_NR(r); - if (syscall >= 0 && syscall < __NR_syscalls) - PT_REGS_SET_SYSCALL_RETURN(regs, - EXECUTE_SYSCALL(syscall, regs)); + if (syscall >= 0 && syscall < __NR_syscalls) { + unsigned long ret = EXECUTE_SYSCALL(syscall, regs); + + PT_REGS_SET_SYSCALL_RETURN(regs, ret); + + /* + * An error value here can be some form of -ERESTARTSYS + * and then we'd just loop. Make any error syscalls take + * some time, so that it won't just loop if something is + * not ready, and hopefully other things will make some + * progress. + */ + if (IS_ERR_VALUE(ret) && + (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL)) { + um_udelay(1); + schedule(); + } + } out: syscall_trace_leave(regs); diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index 746715379f12..4bb8622dc512 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -11,7 +11,6 @@ #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> -#include <asm/sysrq.h> #include <asm/stacktrace.h> #include <os.h> diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 47b9f5e63566..29b27b90581f 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -839,7 +839,7 @@ static irqreturn_t um_timer(int irq, void *dev) if (get_current()->mm != NULL) { /* userspace - relay signal, results in correct userspace timers */ - os_alarm_process(get_current()->mm->context.id.u.pid); + os_alarm_process(get_current()->mm->context.id.pid); } (*timer_clockevent.event_handler)(&timer_clockevent); diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 44c6fc697f3a..548af31d4111 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -82,16 +82,12 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, (x ? UM_PROT_EXEC : 0)); if (pte_newpage(*pte)) { if (pte_present(*pte)) { - if (pte_newpage(*pte)) { - __u64 offset; - unsigned long phys = - pte_val(*pte) & PAGE_MASK; - int fd = phys_mapping(phys, &offset); - - ret = ops->mmap(ops->mm_idp, addr, - PAGE_SIZE, prot, fd, - offset); - } + __u64 offset; + unsigned long phys = pte_val(*pte) & PAGE_MASK; + int fd = phys_mapping(phys, &offset); + + ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE, + prot, fd, offset); } else ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); } else if (pte_newprot(*pte)) diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 5adf8f630049..f1d03cf3957f 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -528,7 +528,8 @@ int os_shutdown_socket(int fd, int r, int w) ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds, void *data, size_t data_len) { - char buf[CMSG_SPACE(sizeof(*fds) * n_fds)]; +#define MAX_RCV_FDS 2 + char buf[CMSG_SPACE(sizeof(*fds) * MAX_RCV_FDS)]; struct cmsghdr *cmsg; struct iovec iov = { .iov_base = data, @@ -538,10 +539,13 @@ ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds, .msg_iov = &iov, .msg_iovlen = 1, .msg_control = buf, - .msg_controllen = sizeof(buf), + .msg_controllen = CMSG_SPACE(sizeof(*fds) * n_fds), }; int n; + if (n_fds > MAX_RCV_FDS) + return -EINVAL; + n = recvmsg(fd, &msg, 0); if (n < 0) return -errno; diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index c55430775efd..9a13ac23c606 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -78,7 +78,7 @@ static inline long do_syscall_stub(struct mm_id *mm_idp) { struct stub_data *proc_data = (void *)mm_idp->stack; int n, i; - int err, pid = mm_idp->u.pid; + int err, pid = mm_idp->pid; n = ptrace_setregs(pid, syscall_regs); if (n < 0) { diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index f7088345b3fc..b6f656bcffb1 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -588,5 +588,5 @@ void reboot_skas(void) void __switch_mm(struct mm_id *mm_idp) { - userspace_pid[0] = mm_idp->u.pid; + userspace_pid[0] = mm_idp->pid; } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2852fcd82cbd..16354dfa6d96 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2257,6 +2257,7 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING config ADDRESS_MASKING bool "Linear Address Masking support" depends on X86_64 + depends on COMPILE_TEST || !CPU_MITIGATIONS # wait for LASS help Linear Address Masking (LAM) modifies the checking that is applied to 64-bit linear addresses, allowing software to use of the diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 801fd85c3ef6..cd75e78a06c1 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -24,11 +24,15 @@ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix) ifdef CONFIG_MITIGATION_RETHUNK RETHUNK_CFLAGS := -mfunction-return=thunk-extern +RETHUNK_RUSTFLAGS := -Zfunction-return=thunk-extern RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) +RETPOLINE_RUSTFLAGS += $(RETHUNK_RUSTFLAGS) endif export RETHUNK_CFLAGS +export RETHUNK_RUSTFLAGS export RETPOLINE_CFLAGS +export RETPOLINE_RUSTFLAGS export RETPOLINE_VDSO_CFLAGS # For gcc stack alignment is specified with -mpreferred-stack-boundary, @@ -218,9 +222,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_MITIGATION_RETPOLINE KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) + KBUILD_RUSTFLAGS += $(RETPOLINE_RUSTFLAGS) # Additionally, avoid generating expensive indirect jumps which # are subject to retpolines for small number of switch cases. - # clang turns off jump table generation by default when under + # LLVM turns off jump table generation by default when under # retpoline builds, however, gcc does not for x86. This has # only been fixed starting from gcc stable version 8.4.0 and # onwards, but not for older ones. See gcc bug #86952. @@ -237,6 +242,10 @@ ifdef CONFIG_CALL_PADDING PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES) KBUILD_CFLAGS += $(PADDING_CFLAGS) export PADDING_CFLAGS + +PADDING_RUSTFLAGS := -Zpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES) +KBUILD_RUSTFLAGS += $(PADDING_RUSTFLAGS) +export PADDING_RUSTFLAGS endif KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index da8b66dce0da..327c45c5013f 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -16,6 +16,7 @@ #include <asm/insn-eval.h> #include <asm/pgtable.h> #include <asm/set_memory.h> +#include <asm/traps.h> /* MMIO direction */ #define EPT_READ 0 @@ -433,6 +434,11 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) return -EINVAL; } + if (!fault_in_kernel_space(ve->gla)) { + WARN_ONCE(1, "Access to userspace address is not supported"); + return -EINVAL; + } + /* * Reject EPT violation #VEs that split pages. * diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index d45e9c0c42ac..f110708c8038 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -8,7 +8,7 @@ * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 700ecaee9a08..41bc02e48916 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -19,7 +19,7 @@ #include <crypto/internal/simd.h> #include <asm/cpu_device_id.h> #include <asm/simd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index d9feadffa972..324686bca368 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -9,6 +9,8 @@ #include <asm/unwind_hints.h> #include <asm/segment.h> #include <asm/cache.h> +#include <asm/cpufeatures.h> +#include <asm/nospec-branch.h> #include "calling.h" @@ -19,6 +21,9 @@ SYM_FUNC_START(entry_ibpb) movl $PRED_CMD_IBPB, %eax xorl %edx, %edx wrmsr + + /* Make sure IBPB clears return stack preductions too. */ + FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_BUG_IBPB_NO_RET RET SYM_FUNC_END(entry_ibpb) /* For KVM */ diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index d3a814efbff6..20be5758c2d2 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -871,6 +871,8 @@ SYM_FUNC_START(entry_SYSENTER_32) /* Now ready to switch the cr3 */ SWITCH_TO_USER_CR3 scratch_reg=%eax + /* Clobbers ZF */ + CLEAR_CPU_BUFFERS /* * Restore all flags except IF. (We restore IF separately because @@ -881,7 +883,6 @@ SYM_FUNC_START(entry_SYSENTER_32) BUG_IF_WRONG_CR3 no_user_check=1 popfl popl %eax - CLEAR_CPU_BUFFERS /* * Return back to the vDSO, which will pop ecx and edx. @@ -1144,7 +1145,6 @@ SYM_CODE_START(asm_exc_nmi) /* Not on SYSENTER stack. */ call exc_nmi - CLEAR_CPU_BUFFERS jmp .Lnmi_return .Lnmi_from_sysenter_stack: @@ -1165,6 +1165,7 @@ SYM_CODE_START(asm_exc_nmi) CHECK_AND_APPLY_ESPFIX RESTORE_ALL_NMI cr3_reg=%edi pop=4 + CLEAR_CPU_BUFFERS jmp .Lirq_return #ifdef CONFIG_X86_ESPFIX32 @@ -1206,6 +1207,7 @@ SYM_CODE_START(asm_exc_nmi) * 1 - orig_ax */ lss (1+5+6)*4(%esp), %esp # back to espfix stack + CLEAR_CPU_BUFFERS jmp .Lirq_return #endif SYM_CODE_END(asm_exc_nmi) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 6f3b6aef47ba..d0caac26533f 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -116,7 +116,10 @@ static inline bool amd_gart_present(void) #define amd_nb_num(x) 0 #define amd_nb_has_feature(x) false -#define node_to_amd_nb(x) NULL +static inline struct amd_northbridge *node_to_amd_nb(int node) +{ + return NULL; +} #define amd_gart_present(x) false #endif diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 8db2ec4d6cda..1f650b4dde50 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -163,20 +163,18 @@ static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) } #define arch_atomic64_dec_return arch_atomic64_dec_return -static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { __alternative_atomic64(add, add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), ASM_NO_INPUT_CLOBBER("memory")); - return i; } -static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), ASM_NO_INPUT_CLOBBER("memory")); - return i; } static __always_inline void arch_atomic64_inc(atomic64_t *v) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dd4682857c12..913fd3a7bac6 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -215,7 +215,7 @@ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ #define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ #define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */ -#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier without a guaranteed RSB flush */ #define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */ @@ -348,6 +348,7 @@ #define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */ #define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ #define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */ +#define X86_FEATURE_AMD_IBPB_RET (13*32+30) /* IBPB clears return address predictor */ #define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ @@ -523,4 +524,5 @@ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ +#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h index 80cc6386d7b1..ca4243318aad 100644 --- a/arch/x86/include/asm/cpuid.h +++ b/arch/x86/include/asm/cpuid.h @@ -179,6 +179,7 @@ static __always_inline bool cpuid_function_is_indexed(u32 function) case 0x1d: case 0x1e: case 0x1f: + case 0x24: case 0x8000001d: return true; } diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 0152a81d9b4a..b4d719de2c84 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_FTRACE_H #define _ASM_X86_FTRACE_H +#include <asm/ptrace.h> + #ifdef CONFIG_FUNCTION_TRACER #ifndef CC_USING_FENTRY # error Compiler does not support fentry? diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 44949f972826..1a42f829667a 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -135,6 +135,8 @@ #define INTEL_LUNARLAKE_M IFM(6, 0xBD) +#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) + /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */ @@ -178,4 +180,7 @@ #define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ #define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */ +/* Family 19 */ +#define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */ + #endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 68ad4f923664..861d080ed4c6 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -14,8 +14,8 @@ BUILD_BUG_ON(1) * be __static_call_return0. */ KVM_X86_OP(check_processor_compatibility) -KVM_X86_OP(hardware_enable) -KVM_X86_OP(hardware_disable) +KVM_X86_OP(enable_virtualization_cpu) +KVM_X86_OP(disable_virtualization_cpu) KVM_X86_OP(hardware_unsetup) KVM_X86_OP(has_emulated_msr) KVM_X86_OP(vcpu_after_set_cpuid) @@ -125,7 +125,7 @@ KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from) KVM_X86_OP_OPTIONAL(vm_move_enc_context_from) KVM_X86_OP_OPTIONAL(guest_memory_reclaimed) -KVM_X86_OP(get_msr_feature) +KVM_X86_OP(get_feature_msr) KVM_X86_OP(check_emulate_instruction) KVM_X86_OP(apic_init_signal_blocked) KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4a68cb3eba78..6d9f763a7bb9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -36,6 +36,7 @@ #include <asm/kvm_page_track.h> #include <asm/kvm_vcpu_regs.h> #include <asm/hyperv-tlfs.h> +#include <asm/reboot.h> #define __KVM_HAVE_ARCH_VCPU_DEBUGFS @@ -211,6 +212,7 @@ enum exit_fastpath_completion { EXIT_FASTPATH_NONE, EXIT_FASTPATH_REENTER_GUEST, EXIT_FASTPATH_EXIT_HANDLED, + EXIT_FASTPATH_EXIT_USERSPACE, }; typedef enum exit_fastpath_completion fastpath_t; @@ -280,10 +282,6 @@ enum x86_intercept_stage; #define PFERR_PRIVATE_ACCESS BIT_ULL(49) #define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS) -#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ - PFERR_WRITE_MASK | \ - PFERR_PRESENT_MASK) - /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 /* @@ -1629,8 +1627,10 @@ struct kvm_x86_ops { int (*check_processor_compatibility)(void); - int (*hardware_enable)(void); - void (*hardware_disable)(void); + int (*enable_virtualization_cpu)(void); + void (*disable_virtualization_cpu)(void); + cpu_emergency_virt_cb *emergency_disable_virtualization_cpu; + void (*hardware_unsetup)(void); bool (*has_emulated_msr)(struct kvm *kvm, u32 index); void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); @@ -1727,6 +1727,8 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); + + const bool x2apic_icr_is_split; const unsigned long required_apicv_inhibits; bool allow_apicv_in_x2apic_without_x2apic_virtualization; void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); @@ -1806,7 +1808,7 @@ struct kvm_x86_ops { int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd); void (*guest_memory_reclaimed)(struct kvm *kvm); - int (*get_msr_feature)(struct kvm_msr_entry *entry); + int (*get_feature_msr)(u32 msr, u64 *data); int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, void *insn, int insn_len); @@ -2060,6 +2062,8 @@ void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); +int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data); int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); @@ -2136,7 +2140,15 @@ int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu); void kvm_update_dr7(struct kvm_vcpu *vcpu); -int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); +bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + bool always_retry); + +static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, + gpa_t cr2_or_gpa) +{ + return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false); +} + void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, ulong roots_to_free); void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu); @@ -2254,6 +2266,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); +int kvm_cpu_get_extint(struct kvm_vcpu *v); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); @@ -2345,7 +2358,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); KVM_X86_QUIRK_OUT_7E_INC_RIP | \ KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \ KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \ - KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) + KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \ + KVM_X86_QUIRK_SLOT_ZAP_ALL) /* * KVM previously used a u32 field in kvm_run to indicate the hypercall was diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a7c06a46fb76..3ae84c3b8e6d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -36,6 +36,20 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) +/* + * Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc. + * Most MSRs support/allow only a subset of memory types, but the values + * themselves are common across all relevant MSRs. + */ +#define X86_MEMTYPE_UC 0ull /* Uncacheable, a.k.a. Strong Uncacheable */ +#define X86_MEMTYPE_WC 1ull /* Write Combining */ +/* RESERVED 2 */ +/* RESERVED 3 */ +#define X86_MEMTYPE_WT 4ull /* Write Through */ +#define X86_MEMTYPE_WP 5ull /* Write Protected */ +#define X86_MEMTYPE_WB 6ull /* Write Back */ +#define X86_MEMTYPE_UC_MINUS 7ull /* Weak Uncacheabled (PAT only) */ + /* FRED MSRs */ #define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */ #define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */ @@ -365,6 +379,12 @@ #define MSR_IA32_CR_PAT 0x00000277 +#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7) \ + ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \ + (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \ + (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \ + (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56)) + #define MSR_IA32_DEBUGCTLMSR 0x000001d9 #define MSR_IA32_LASTBRANCHFROMIP 0x000001db #define MSR_IA32_LASTBRANCHTOIP 0x000001dc @@ -1159,15 +1179,6 @@ #define MSR_IA32_VMX_VMFUNC 0x00000491 #define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 -/* VMX_BASIC bits and bitmasks */ -#define VMX_BASIC_VMCS_SIZE_SHIFT 32 -#define VMX_BASIC_TRUE_CTLS (1ULL << 55) -#define VMX_BASIC_64 0x0001000000000000LLU -#define VMX_BASIC_MEM_TYPE_SHIFT 50 -#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU -#define VMX_BASIC_MEM_TYPE_WB 6LLU -#define VMX_BASIC_INOUT 0x0040000000000000LLU - /* Resctrl MSRs: */ /* - Intel: */ #define MSR_IA32_L3_QOS_CFG 0xc81 @@ -1185,11 +1196,6 @@ #define MSR_IA32_SMBA_BW_BASE 0xc0000280 #define MSR_IA32_EVT_CFG_BASE 0xc0000400 -/* MSR_IA32_VMX_MISC bits */ -#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) -#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) -#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F - /* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 #define MSR_VM_IGNNE 0xc0010115 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index ff5f1ecc7d1e..96b410b1d4e8 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -323,7 +323,16 @@ * Note: Only the memory operand variant of VERW clears the CPU buffers. */ .macro CLEAR_CPU_BUFFERS - ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF +#ifdef CONFIG_X86_64 + ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF +#else + /* + * In 32bit mode, the memory operand must be a %cs reference. The data + * segments may not be usable (vm86 mode), and the stack segment may not + * be flat (ESPFIX32). + */ + ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF +#endif .endm #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 7e9db77231ac..d1426b64c1b9 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -270,5 +270,26 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end) #include <asm/pgtable-invert.h> -#endif /* !__ASSEMBLY__ */ +#else /* __ASSEMBLY__ */ + +#define l4_index(x) (((x) >> 39) & 511) +#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + +L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) +L4_START_KERNEL = l4_index(__START_KERNEL_map) + +L3_START_KERNEL = pud_index(__START_KERNEL_map) + +#define SYM_DATA_START_PAGE_ALIGNED(name) \ + SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) + +/* Automate the creation of 1 to 1 mapping pmd entries */ +#define PMDS(START, PERM, COUNT) \ + i = 0 ; \ + .rept (COUNT) ; \ + .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ + i = i + 1 ; \ + .endr + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 6536873f8fc0..ecd58ea9a837 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,14 +25,16 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 -#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD) typedef void (cpu_emergency_virt_cb)(void); +#if IS_ENABLED(CONFIG_KVM_X86) void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback); void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback); void cpu_emergency_disable_virtualization(void); #else +static inline void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) {} +static inline void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) {} static inline void cpu_emergency_disable_virtualization(void) {} -#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */ +#endif /* CONFIG_KVM_X86 */ typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_shootdown_cpus(nmi_shootdown_cb callback); diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h index 24e3a53ca255..6652ebddfd02 100644 --- a/arch/x86/include/asm/runtime-const.h +++ b/arch/x86/include/asm/runtime-const.h @@ -6,7 +6,7 @@ typeof(sym) __ret; \ asm_inline("mov %1,%0\n1:\n" \ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ - ".long 1b - %c2 - .\n\t" \ + ".long 1b - %c2 - .\n" \ ".popsection" \ :"=r" (__ret) \ :"i" ((unsigned long)0x0123456789abcdefull), \ @@ -20,7 +20,7 @@ typeof(0u+(val)) __ret = (val); \ asm_inline("shrl $12,%k0\n1:\n" \ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ - ".long 1b - 1 - .\n\t" \ + ".long 1b - 1 - .\n" \ ".popsection" \ :"+r" (__ret)); \ __ret; }) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f0dea3750ca9..2b59b9951c90 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -516,6 +516,20 @@ struct ghcb { u32 ghcb_usage; } __packed; +struct vmcb { + struct vmcb_control_area control; + union { + struct vmcb_save_area save; + + /* + * For SEV-ES VMs, the save area in the VMCB is used only to + * save/load host state. Guest state resides in a separate + * page, the aptly named VM Save Area (VMSA), that is encrypted + * with the guest's private key. + */ + struct sev_es_save_area host_sev_es_save; + }; +} __packed; #define EXPECTED_VMCB_SAVE_AREA_SIZE 744 #define EXPECTED_GHCB_SAVE_AREA_SIZE 1032 @@ -532,6 +546,7 @@ static inline void __unused_size_checks(void) BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE); BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE); BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE); + BUILD_BUG_ON(offsetof(struct vmcb, save) != EXPECTED_VMCB_CONTROL_AREA_SIZE); BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE); /* Check offsets of reserved fields */ @@ -568,11 +583,6 @@ static inline void __unused_size_checks(void) BUILD_BUG_RESERVED_OFFSET(ghcb, 0xff0); } -struct vmcb { - struct vmcb_control_area control; - struct vmcb_save_area save; -} __packed; - #define SVM_CPUID_FUNC 0x8000000a #define SVM_SELECTOR_S_SHIFT 4 diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index aef70336d624..92f3664dd933 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -305,9 +305,4 @@ static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled extern void arch_scale_freq_tick(void); #define arch_scale_freq_tick arch_scale_freq_tick -#ifdef CONFIG_ACPI_CPPC_LIB -void init_freq_invariance_cppc(void); -#define arch_init_invariance_cppc init_freq_invariance_cppc -#endif - #endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index afce8ee5d7b7..b0a887209400 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -12,6 +12,13 @@ #include <asm/cpufeatures.h> #include <asm/page.h> #include <asm/percpu.h> +#include <asm/runtime-const.h> + +/* + * Virtual variable: there's no actual backing store for this, + * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)' + */ +extern unsigned long USER_PTR_MAX; #ifdef CONFIG_ADDRESS_MASKING /* @@ -46,19 +53,24 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, #endif -/* - * The virtual address space space is logically divided into a kernel - * half and a user half. When cast to a signed type, user pointers - * are positive and kernel pointers are negative. - */ -#define valid_user_address(x) ((__force long)(x) >= 0) +#define valid_user_address(x) \ + ((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX)) /* * Masking the user address is an alternative to a conditional * user_access_begin that can avoid the fencing. This only works * for dense accesses starting at the address. */ -#define mask_user_address(x) ((typeof(x))((long)(x)|((long)(x)>>63))) +static inline void __user *mask_user_address(const void __user *ptr) +{ + unsigned long mask; + asm("cmp %1,%0\n\t" + "sbb %0,%0" + :"=r" (mask) + :"r" (ptr), + "0" (runtime_const_ptr(USER_PTR_MAX))); + return (__force void __user *)(mask | (__force unsigned long)ptr); +} #define masked_user_access_begin(x) ({ \ __auto_type __masked_ptr = (x); \ __masked_ptr = mask_user_address(__masked_ptr); \ @@ -69,23 +81,16 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, * arbitrary values in those bits rather then masking them off. * * Enforce two rules: - * 1. 'ptr' must be in the user half of the address space + * 1. 'ptr' must be in the user part of the address space * 2. 'ptr+size' must not overflow into kernel addresses * - * Note that addresses around the sign change are not valid addresses, - * and will GP-fault even with LAM enabled if the sign bit is set (see - * "CR3.LAM_SUP" that can narrow the canonicality check if we ever - * enable it, but not remove it entirely). - * - * So the "overflow into kernel addresses" does not imply some sudden - * exact boundary at the sign bit, and we can allow a lot of slop on the - * size check. + * Note that we always have at least one guard page between the + * max user address and the non-canonical gap, allowing us to + * ignore small sizes entirely. * * In fact, we could probably remove the size check entirely, since * any kernel accesses will be in increasing address order starting - * at 'ptr', and even if the end might be in kernel space, we'll - * hit the GP faults for non-canonical accesses before we ever get - * there. + * at 'ptr'. * * That's a separate optimization, for now just handle the small * constant case. diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index d77a31039f24..f7fd4369b821 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -122,19 +122,17 @@ #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff -#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f -#define VMX_MISC_SAVE_EFER_LMA 0x00000020 -#define VMX_MISC_ACTIVITY_HLT 0x00000040 -#define VMX_MISC_ACTIVITY_WAIT_SIPI 0x00000100 -#define VMX_MISC_ZERO_LEN_INS 0x40000000 -#define VMX_MISC_MSR_LIST_MULTIPLIER 512 - /* VMFUNC functions */ #define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28) #define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING) #define VMFUNC_EPTP_ENTRIES 512 +#define VMX_BASIC_32BIT_PHYS_ADDR_ONLY BIT_ULL(48) +#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49) +#define VMX_BASIC_INOUT BIT_ULL(54) +#define VMX_BASIC_TRUE_CTLS BIT_ULL(55) + static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) { return vmx_basic & GENMASK_ULL(30, 0); @@ -145,9 +143,30 @@ static inline u32 vmx_basic_vmcs_size(u64 vmx_basic) return (vmx_basic & GENMASK_ULL(44, 32)) >> 32; } +static inline u32 vmx_basic_vmcs_mem_type(u64 vmx_basic) +{ + return (vmx_basic & GENMASK_ULL(53, 50)) >> 50; +} + +static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype) +{ + return revision | ((u64)size << 32) | ((u64)memtype << 50); +} + +#define VMX_MISC_SAVE_EFER_LMA BIT_ULL(5) +#define VMX_MISC_ACTIVITY_HLT BIT_ULL(6) +#define VMX_MISC_ACTIVITY_SHUTDOWN BIT_ULL(7) +#define VMX_MISC_ACTIVITY_WAIT_SIPI BIT_ULL(8) +#define VMX_MISC_INTEL_PT BIT_ULL(14) +#define VMX_MISC_RDMSR_IN_SMM BIT_ULL(15) +#define VMX_MISC_VMXOFF_BLOCK_SMI BIT_ULL(28) +#define VMX_MISC_VMWRITE_SHADOW_RO_FIELDS BIT_ULL(29) +#define VMX_MISC_ZERO_LEN_INS BIT_ULL(30) +#define VMX_MISC_MSR_LIST_MULTIPLIER 512 + static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc) { - return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; + return vmx_misc & GENMASK_ULL(4, 0); } static inline int vmx_misc_cr3_count(u64 vmx_misc) @@ -508,9 +527,10 @@ enum vmcs_field { #define VMX_EPTP_PWL_4 0x18ull #define VMX_EPTP_PWL_5 0x20ull #define VMX_EPTP_AD_ENABLE_BIT (1ull << 6) +/* The EPTP memtype is encoded in bits 2:0, i.e. doesn't need to be shifted. */ #define VMX_EPTP_MT_MASK 0x7ull -#define VMX_EPTP_MT_WB 0x6ull -#define VMX_EPTP_MT_UC 0x0ull +#define VMX_EPTP_MT_WB X86_MEMTYPE_WB +#define VMX_EPTP_MT_UC X86_MEMTYPE_UC #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index bf57a824f722..a8debbf2f702 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -439,6 +439,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5) #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) +#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 956984054bf3..aab9d0570841 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -110,7 +110,7 @@ static void amd_set_max_freq_ratio(void) static DEFINE_MUTEX(freq_invariance_lock); -void init_freq_invariance_cppc(void) +static inline void init_freq_invariance_cppc(void) { static bool init_done; @@ -127,6 +127,11 @@ void init_freq_invariance_cppc(void) mutex_unlock(&freq_invariance_lock); } +void acpi_processor_init_invariance_cppc(void) +{ + init_freq_invariance_cppc(); +} + /* * Get the highest performance register value. * @cpu: CPU from which to get highest performance. diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index dc5d3216af24..9fe9972d2071 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -44,6 +44,7 @@ #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4 0x12c4 +#define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F4 0x16fc #define PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4 0x124c #define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4 0x12bc #define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4 @@ -127,6 +128,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) }, diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 6513c53c9459..c5fb28e6451a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -440,7 +440,19 @@ static int lapic_timer_shutdown(struct clock_event_device *evt) v = apic_read(APIC_LVTT); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, v); - apic_write(APIC_TMICT, 0); + + /* + * Setting APIC_LVT_MASKED (above) should be enough to tell + * the hardware that this timer will never fire. But AMD + * erratum 411 and some Intel CPU behavior circa 2024 say + * otherwise. Time for belt and suspenders programming: mask + * the timer _and_ zero the counter registers: + */ + if (v & APIC_LVT_TIMER_TSCDEADLINE) + wrmsrl(MSR_IA32_TSC_DEADLINE, 0); + else + apic_write(APIC_TMICT, 0); + return 0; } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 015971adadfc..fab5caec0b72 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1202,5 +1202,6 @@ void amd_check_microcode(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return; - on_each_cpu(zenbleed_check_cpu, NULL, 1); + if (cpu_feature_enabled(X86_FEATURE_ZEN2)) + on_each_cpu(zenbleed_check_cpu, NULL, 1); } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d1915427b4ff..47a01d4028f6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1115,8 +1115,25 @@ do_cmd_auto: case RETBLEED_MITIGATION_IBPB: setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + + /* + * IBPB on entry already obviates the need for + * software-based untraining so clear those in case some + * other mitigation like SRSO has selected them. + */ + setup_clear_cpu_cap(X86_FEATURE_UNRET); + setup_clear_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); mitigate_smt = true; + + /* + * There is no need for RSB filling: entry_ibpb() ensures + * all predictions, including the RSB, are invalidated, + * regardless of IBPB implementation. + */ + setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); + break; case RETBLEED_MITIGATION_STUFF: @@ -2627,6 +2644,14 @@ static void __init srso_select_mitigation(void) if (has_microcode) { setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); srso_mitigation = SRSO_MITIGATION_IBPB; + + /* + * IBPB on entry already obviates the need for + * software-based untraining so clear those in case some + * other mitigation like Retbleed has selected them. + */ + setup_clear_cpu_cap(X86_FEATURE_UNRET); + setup_clear_cpu_cap(X86_FEATURE_RETHUNK); } } else { pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); @@ -2638,6 +2663,13 @@ static void __init srso_select_mitigation(void) if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) { setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; + + /* + * There is no need for RSB filling: entry_ibpb() ensures + * all predictions, including the RSB, are invalidated, + * regardless of IBPB implementation. + */ + setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); } } else { pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n"); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 07a34d723505..a5f221ea5688 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -69,6 +69,7 @@ #include <asm/sev.h> #include <asm/tdx.h> #include <asm/posted_intr.h> +#include <asm/runtime-const.h> #include "cpu.h" @@ -1443,6 +1444,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) boot_cpu_has(X86_FEATURE_HYPERVISOR))) setup_force_cpu_bug(X86_BUG_BHI); + if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET)) + setup_force_cpu_bug(X86_BUG_IBPB_NO_RET); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; @@ -2386,6 +2390,15 @@ void __init arch_cpu_finalize_init(void) alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { + unsigned long USER_PTR_MAX = TASK_SIZE_MAX-1; + + /* + * Enable this when LAM is gated on LASS support + if (cpu_feature_enabled(X86_FEATURE_LAM)) + USER_PTR_MAX = (1ul << 63) - PAGE_SIZE - 1; + */ + runtime_const_init(ptr, USER_PTR_MAX); + /* * Make sure the first 2MB area is not mapped by huge pages * There are typically fixed size MTRRs in there and overlapping diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c index a3aa0199222e..af44fd5dbd7c 100644 --- a/arch/x86/kernel/cpu/mce/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c @@ -331,7 +331,6 @@ static const struct file_operations mce_chrdev_ops = { .poll = mce_chrdev_poll, .unlocked_ioctl = mce_chrdev_ioctl, .compat_ioctl = compat_ptr_ioctl, - .llseek = no_llseek, }; static struct miscdevice mce_chrdev_device = { diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index f63b051f25a0..31a73715d755 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -584,7 +584,7 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_ native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy); } -static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); +static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size); static int __init save_microcode_in_initrd(void) { @@ -605,7 +605,7 @@ static int __init save_microcode_in_initrd(void) if (!desc.mc) return -EINVAL; - ret = load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size); + ret = _load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size); if (ret > UCODE_UPDATED) return -EINVAL; @@ -613,16 +613,19 @@ static int __init save_microcode_in_initrd(void) } early_initcall(save_microcode_in_initrd); -static inline bool patch_cpus_equivalent(struct ucode_patch *p, struct ucode_patch *n) +static inline bool patch_cpus_equivalent(struct ucode_patch *p, + struct ucode_patch *n, + bool ignore_stepping) { /* Zen and newer hardcode the f/m/s in the patch ID */ if (x86_family(bsp_cpuid_1_eax) >= 0x17) { union cpuid_1_eax p_cid = ucode_rev_to_cpuid(p->patch_id); union cpuid_1_eax n_cid = ucode_rev_to_cpuid(n->patch_id); - /* Zap stepping */ - p_cid.stepping = 0; - n_cid.stepping = 0; + if (ignore_stepping) { + p_cid.stepping = 0; + n_cid.stepping = 0; + } return p_cid.full == n_cid.full; } else { @@ -644,13 +647,13 @@ static struct ucode_patch *cache_find_patch(struct ucode_cpu_info *uci, u16 equi WARN_ON_ONCE(!n.patch_id); list_for_each_entry(p, µcode_cache, plist) - if (patch_cpus_equivalent(p, &n)) + if (patch_cpus_equivalent(p, &n, false)) return p; return NULL; } -static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n) +static inline int patch_newer(struct ucode_patch *p, struct ucode_patch *n) { /* Zen and newer hardcode the f/m/s in the patch ID */ if (x86_family(bsp_cpuid_1_eax) >= 0x17) { @@ -659,6 +662,9 @@ static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n) zp.ucode_rev = p->patch_id; zn.ucode_rev = n->patch_id; + if (zn.stepping != zp.stepping) + return -1; + return zn.rev > zp.rev; } else { return n->patch_id > p->patch_id; @@ -668,10 +674,14 @@ static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n) static void update_cache(struct ucode_patch *new_patch) { struct ucode_patch *p; + int ret; list_for_each_entry(p, µcode_cache, plist) { - if (patch_cpus_equivalent(p, new_patch)) { - if (!patch_newer(p, new_patch)) { + if (patch_cpus_equivalent(p, new_patch, true)) { + ret = patch_newer(p, new_patch); + if (ret < 0) + continue; + else if (!ret) { /* we already have the latest patch */ kfree(new_patch->data); kfree(new_patch); @@ -944,21 +954,30 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, return UCODE_OK; } -static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size) +static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size) { - struct cpuinfo_x86 *c; - unsigned int nid, cpu; - struct ucode_patch *p; enum ucode_state ret; /* free old equiv table */ free_equiv_cpu_table(); ret = __load_microcode_amd(family, data, size); - if (ret != UCODE_OK) { + if (ret != UCODE_OK) cleanup(); + + return ret; +} + +static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size) +{ + struct cpuinfo_x86 *c; + unsigned int nid, cpu; + struct ucode_patch *p; + enum ucode_state ret; + + ret = _load_microcode_amd(family, data, size); + if (ret != UCODE_OK) return ret; - } for_each_node(nid) { cpu = cpumask_first(cpumask_of_node(nid)); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 2a2fc14955cd..989d368be04f 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -55,6 +55,12 @@ #include "mtrr.h" +static_assert(X86_MEMTYPE_UC == MTRR_TYPE_UNCACHABLE); +static_assert(X86_MEMTYPE_WC == MTRR_TYPE_WRCOMB); +static_assert(X86_MEMTYPE_WT == MTRR_TYPE_WRTHROUGH); +static_assert(X86_MEMTYPE_WP == MTRR_TYPE_WRPROT); +static_assert(X86_MEMTYPE_WB == MTRR_TYPE_WRBACK); + /* arch_phys_wc_add returns an MTRR register index plus this offset. */ #define MTRR_TO_PHYS_WC_OFFSET 1000 diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 8591d53c144b..b681c2e07dbf 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -207,7 +207,7 @@ static inline bool rdt_get_mb_table(struct rdt_resource *r) return false; } -static bool __get_mem_config_intel(struct rdt_resource *r) +static __init bool __get_mem_config_intel(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); union cpuid_0x10_3_eax eax; @@ -241,7 +241,7 @@ static bool __get_mem_config_intel(struct rdt_resource *r) return true; } -static bool __rdt_get_mem_config_amd(struct rdt_resource *r) +static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); u32 eax, ebx, ecx, edx, subleaf; diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 50fa1fe9a073..200d89a64027 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -29,10 +29,10 @@ * hardware. The allocated bandwidth percentage is rounded to the next * control step available on the hardware. */ -static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) +static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) { - unsigned long bw; int ret; + u32 bw; /* * Only linear delay values is supported for current Intel SKUs. @@ -42,16 +42,21 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) return false; } - ret = kstrtoul(buf, 10, &bw); + ret = kstrtou32(buf, 10, &bw); if (ret) { - rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf); + rdt_last_cmd_printf("Invalid MB value %s\n", buf); return false; } - if ((bw < r->membw.min_bw || bw > r->default_ctrl) && - !is_mba_sc(r)) { - rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, - r->membw.min_bw, r->default_ctrl); + /* Nothing else to do if software controller is enabled. */ + if (is_mba_sc(r)) { + *data = bw; + return true; + } + + if (bw < r->membw.min_bw || bw > r->default_ctrl) { + rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", + bw, r->membw.min_bw, r->default_ctrl); return false; } @@ -65,7 +70,7 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, struct resctrl_staged_config *cfg; u32 closid = data->rdtgrp->closid; struct rdt_resource *r = s->res; - unsigned long bw_val; + u32 bw_val; cfg = &d->staged_config[s->conf_type]; if (cfg->have_new_ctrl) { diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index e69489d48625..972e6b6b0481 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -1567,7 +1567,6 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) static const struct file_operations pseudo_lock_dev_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = NULL, .write = NULL, .open = pseudo_lock_dev_open, diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 330922b328bf..16752b8dfa89 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -32,13 +32,6 @@ * We are not able to switch in one step to the final KERNEL ADDRESS SPACE * because we need identity-mapped pages. */ -#define l4_index(x) (((x) >> 39) & 511) -#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) - -L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) -L4_START_KERNEL = l4_index(__START_KERNEL_map) - -L3_START_KERNEL = pud_index(__START_KERNEL_map) __HEAD .code64 @@ -577,9 +570,6 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) SYM_CODE_END(vc_no_ghcb) #endif -#define SYM_DATA_START_PAGE_ALIGNED(name) \ - SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) - #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * Each PGD needs to be 8k long and 8k aligned. We do not @@ -601,14 +591,6 @@ SYM_CODE_END(vc_no_ghcb) #define PTI_USER_PGD_FILL 0 #endif -/* Automate the creation of 1 to 1 mapping pmd entries */ -#define PMDS(START, PERM, COUNT) \ - i = 0 ; \ - .rept (COUNT) ; \ - .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ - i = i + 1 ; \ - .endr - __INITDATA .balign 4 @@ -708,8 +690,6 @@ SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt) .endr SYM_DATA_END(level1_fixmap_pgt) -#undef PMDS - .data .align 16 diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 263f8aed4e2c..21e9e4845354 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -37,6 +37,7 @@ #include <asm/apic.h> #include <asm/apicdef.h> #include <asm/hypervisor.h> +#include <asm/mtrr.h> #include <asm/tlb.h> #include <asm/cpuidle_haltpoll.h> #include <asm/ptrace.h> @@ -980,6 +981,9 @@ static void __init kvm_init_platform(void) } kvmclock_init(); x86_platform.apic_post_init = kvm_apic_init; + + /* Set WB as the default cache mode for SEV-SNP and TDX */ + mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); } #if defined(CONFIG_AMD_MEM_ENCRYPT) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 0e0a4cf6b5eb..615922838c51 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -530,7 +530,7 @@ static inline void kb_wait(void) static inline void nmi_shootdown_cpus_on_restart(void); -#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD) +#if IS_ENABLED(CONFIG_KVM_X86) /* RCU-protected callback to disable virtualization prior to reboot. */ static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback; @@ -600,7 +600,7 @@ static void emergency_reboot_disable_virtualization(void) } #else static void emergency_reboot_disable_virtualization(void) { } -#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */ +#endif /* CONFIG_KVM_X86 */ void __attribute__((weak)) mach_reboot_fixups(void) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d05392db5d0f..2dbadf347b5f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -261,12 +261,6 @@ static noinstr bool handle_bug(struct pt_regs *regs) int ud_type; u32 imm; - /* - * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug() - * is a rare case that uses @regs without passing them to - * irqentry_enter(). - */ - kmsan_unpoison_entry_regs(regs); ud_type = decode_bug(regs->ip, &imm); if (ud_type == BUG_NONE) return handled; @@ -276,6 +270,12 @@ static noinstr bool handle_bug(struct pt_regs *regs) */ instrumentation_begin(); /* + * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug() + * is a rare case that uses @regs without passing them to + * irqentry_enter(). + */ + kmsan_unpoison_entry_regs(regs); + /* * Since we're emulating a CALL with exceptions, restore the interrupt * state to what it was at the exception site. */ diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 6e73403e874f..b8c5741d2fb4 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -357,8 +357,8 @@ SECTIONS PERCPU_SECTION(INTERNODE_CACHE_BYTES) #endif - RUNTIME_CONST(shift, d_hash_shift) - RUNTIME_CONST(ptr, dentry_hashtable) + RUNTIME_CONST_VARIABLES + RUNTIME_CONST(ptr, USER_PTR_MAX) . = ALIGN(PAGE_SIZE); diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 730c2f34d347..f09f13c01c6b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -17,8 +17,8 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION -config KVM - tristate "Kernel-based Virtual Machine (KVM) support" +config KVM_X86 + def_tristate KVM if KVM_INTEL || KVM_AMD depends on X86_LOCAL_APIC select KVM_COMMON select KVM_GENERIC_MMU_NOTIFIER @@ -44,7 +44,11 @@ config KVM select HAVE_KVM_PM_NOTIFIER if PM select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_PRE_FAULT_MEMORY + select KVM_GENERIC_PRIVATE_MEM if KVM_SW_PROTECTED_VM select KVM_WERROR if WERROR + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" help Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent @@ -77,7 +81,6 @@ config KVM_SW_PROTECTED_VM bool "Enable support for KVM software-protected VMs" depends on EXPERT depends on KVM && X86_64 - select KVM_GENERIC_PRIVATE_MEM help Enable support for KVM software-protected VMs. Currently, software- protected VMs are purely a development and testing vehicle for diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 5494669a055a..f9dddb8cb466 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -32,7 +32,7 @@ kvm-intel-y += vmx/vmx_onhyperv.o vmx/hyperv_evmcs.o kvm-amd-y += svm/svm_onhyperv.o endif -obj-$(CONFIG_KVM) += kvm.o +obj-$(CONFIG_KVM_X86) += kvm.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o obj-$(CONFIG_KVM_AMD) += kvm-amd.o diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2617be544480..41786b834b16 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -705,7 +705,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) | - F(AMX_COMPLEX) + F(AMX_COMPLEX) | F(AVX10) ); kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, @@ -721,6 +721,10 @@ void kvm_set_cpu_caps(void) SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) ); + kvm_cpu_cap_init_kvm_defined(CPUID_24_0_EBX, + F(AVX10_128) | F(AVX10_256) | F(AVX10_512) + ); + kvm_cpu_cap_mask(CPUID_8000_0001_ECX, F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | @@ -949,7 +953,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) switch (function) { case 0: /* Limited to the highest leaf implemented in KVM. */ - entry->eax = min(entry->eax, 0x1fU); + entry->eax = min(entry->eax, 0x24U); break; case 1: cpuid_entry_override(entry, CPUID_1_EDX); @@ -1174,6 +1178,28 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) break; } break; + case 0x24: { + u8 avx10_version; + + if (!kvm_cpu_cap_has(X86_FEATURE_AVX10)) { + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + break; + } + + /* + * The AVX10 version is encoded in EBX[7:0]. Note, the version + * is guaranteed to be >=1 if AVX10 is supported. Note #2, the + * version needs to be captured before overriding EBX features! + */ + avx10_version = min_t(u8, entry->ebx & 0xff, 1); + cpuid_entry_override(entry, CPUID_24_0_EBX); + entry->ebx |= avx10_version; + + entry->eax = 0; + entry->ecx = 0; + entry->edx = 0; + break; + } case KVM_CPUID_SIGNATURE: { const u32 *sigptr = (const u32 *)KVM_SIGNATURE; entry->eax = KVM_CPUID_FEATURES; diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 3d7eb11d0e45..63f66c51975a 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -108,7 +108,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); * Read pending interrupt(from non-APIC source) * vector and intack. */ -static int kvm_cpu_get_extint(struct kvm_vcpu *v) +int kvm_cpu_get_extint(struct kvm_vcpu *v) { if (!kvm_cpu_has_extint(v)) { WARN_ON(!lapic_in_kernel(v)); @@ -131,6 +131,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v) } else return kvm_pic_read_irq(v->kvm); /* PIC */ } +EXPORT_SYMBOL_GPL(kvm_cpu_get_extint); /* * Read pending interrupt vector and intack. @@ -141,9 +142,12 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) if (vector != -1) return vector; /* PIC */ - return kvm_get_apic_interrupt(v); /* APIC */ + vector = kvm_apic_has_interrupt(v); /* APIC */ + if (vector != -1) + kvm_apic_ack_interrupt(v, vector); + + return vector; } -EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5bb481aefcbc..95c6beb8ce27 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1944,7 +1944,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) u64 ns = 0; ktime_t expire; struct kvm_vcpu *vcpu = apic->vcpu; - unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; + u32 this_tsc_khz = vcpu->arch.virtual_tsc_khz; unsigned long flags; ktime_t now; @@ -2453,6 +2453,43 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); +#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13)) + +int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) +{ + if (data & X2APIC_ICR_RESERVED_BITS) + return 1; + + /* + * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but + * only AMD requires it to be zero, Intel essentially just ignores the + * bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled, + * the CPU performs the reserved bits checks, i.e. the underlying CPU + * behavior will "win". Arbitrarily clear the BUSY bit, as there is no + * sane way to provide consistent behavior with respect to hardware. + */ + data &= ~APIC_ICR_BUSY; + + kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); + if (kvm_x86_ops.x2apic_icr_is_split) { + kvm_lapic_set_reg(apic, APIC_ICR, data); + kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32); + } else { + kvm_lapic_set_reg64(apic, APIC_ICR, data); + } + trace_kvm_apic_write(APIC_ICR, data); + return 0; +} + +static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic) +{ + if (kvm_x86_ops.x2apic_icr_is_split) + return (u64)kvm_lapic_get_reg(apic, APIC_ICR) | + (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32; + + return kvm_lapic_get_reg64(apic, APIC_ICR); +} + /* emulate APIC access in a trap manner */ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) { @@ -2470,7 +2507,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) * maybe-unecessary write, and both are in the noise anyways. */ if (apic_x2apic_mode(apic) && offset == APIC_ICR) - kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR)); + WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic))); else kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); } @@ -2592,19 +2629,26 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (apic->apicv_active) { - /* irr_pending is always true when apicv is activated. */ - apic->irr_pending = true; + /* + * When APICv is enabled, KVM must always search the IRR for a pending + * IRQ, as other vCPUs and devices can set IRR bits even if the vCPU + * isn't running. If APICv is disabled, KVM _should_ search the IRR + * for a pending IRQ. But KVM currently doesn't ensure *all* hardware, + * e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching + * the IRR at this time could race with IRQ delivery from hardware that + * still sees APICv as being enabled. + * + * FIXME: Ensure other vCPUs and devices observe the change in APICv + * state prior to updating KVM's metadata caches, so that KVM + * can safely search the IRR and set irr_pending accordingly. + */ + apic->irr_pending = true; + + if (apic->apicv_active) apic->isr_count = 1; - } else { - /* - * Don't clear irr_pending, searching the IRR can race with - * updates from the CPU as APICv is still active from hardware's - * perspective. The flag will be cleared as appropriate when - * KVM injects the interrupt. - */ + else apic->isr_count = count_vectors(apic->regs + APIC_ISR); - } + apic->highest_isr_cache = -1; } @@ -2922,14 +2966,13 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) } } -int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) +void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector) { - int vector = kvm_apic_has_interrupt(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (vector == -1) - return -1; + if (WARN_ON_ONCE(vector < 0 || !apic)) + return; /* * We get here even with APIC virtualization enabled, if doing @@ -2957,8 +3000,8 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) __apic_update_ppr(apic, &ppr); } - return vector; } +EXPORT_SYMBOL_GPL(kvm_apic_ack_interrupt); static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s, bool set) @@ -2990,18 +3033,22 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, /* * In x2APIC mode, the LDR is fixed and based on the id. And - * ICR is internally a single 64-bit register, but needs to be - * split to ICR+ICR2 in userspace for backwards compatibility. + * if the ICR is _not_ split, ICR is internally a single 64-bit + * register, but needs to be split to ICR+ICR2 in userspace for + * backwards compatibility. */ - if (set) { + if (set) *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); - icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | - (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; - __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); - } else { - icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); - __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); + if (!kvm_x86_ops.x2apic_icr_is_split) { + if (set) { + icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | + (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; + __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); + } else { + icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); + __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); + } } } @@ -3194,22 +3241,12 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) return 0; } -int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) -{ - data &= ~APIC_ICR_BUSY; - - kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); - kvm_lapic_set_reg64(apic, APIC_ICR, data); - trace_kvm_apic_write(APIC_ICR, data); - return 0; -} - static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data) { u32 low; if (reg == APIC_ICR) { - *data = kvm_lapic_get_reg64(apic, APIC_ICR); + *data = kvm_x2apic_icr_read(apic); return 0; } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 7ef8ae73e82d..1b8ef9856422 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -88,15 +88,14 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu); void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); +void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); -int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); int kvm_apic_accept_events(struct kvm_vcpu *vcpu); void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); -u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); void kvm_recalculate_apic_map(struct kvm *kvm); void kvm_apic_set_version(struct kvm_vcpu *vcpu); void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 4341e0e28571..9dc5dd43ae7f 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -223,8 +223,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, bool kvm_mmu_may_ignore_guest_pat(void); -int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); - int kvm_mmu_post_init_vm(struct kvm *kvm); void kvm_mmu_pre_destroy_vm(struct kvm *kvm); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 7813d28b082f..8e853a5fc867 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -614,32 +614,6 @@ static u64 mmu_spte_get_lockless(u64 *sptep) return __get_spte_lockless(sptep); } -/* Returns the Accessed status of the PTE and resets it at the same time. */ -static bool mmu_spte_age(u64 *sptep) -{ - u64 spte = mmu_spte_get_lockless(sptep); - - if (!is_accessed_spte(spte)) - return false; - - if (spte_ad_enabled(spte)) { - clear_bit((ffs(shadow_accessed_mask) - 1), - (unsigned long *)sptep); - } else { - /* - * Capture the dirty status of the page, so that it doesn't get - * lost when the SPTE is marked for access tracking. - */ - if (is_writable_pte(spte)) - kvm_set_pfn_dirty(spte_to_pfn(spte)); - - spte = mark_spte_for_access_track(spte); - mmu_spte_update_no_track(sptep, spte); - } - - return true; -} - static inline bool is_tdp_mmu_active(struct kvm_vcpu *vcpu) { return tdp_mmu_enabled && vcpu->arch.mmu->root_role.direct; @@ -938,6 +912,7 @@ static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct * pte_list_desc containing more mappings. */ +#define KVM_RMAP_MANY BIT(0) /* * Returns the number of pointers in the rmap chain, not counting the new one. @@ -950,16 +925,16 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte, if (!rmap_head->val) { rmap_head->val = (unsigned long)spte; - } else if (!(rmap_head->val & 1)) { + } else if (!(rmap_head->val & KVM_RMAP_MANY)) { desc = kvm_mmu_memory_cache_alloc(cache); desc->sptes[0] = (u64 *)rmap_head->val; desc->sptes[1] = spte; desc->spte_count = 2; desc->tail_count = 0; - rmap_head->val = (unsigned long)desc | 1; + rmap_head->val = (unsigned long)desc | KVM_RMAP_MANY; ++count; } else { - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); count = desc->tail_count + desc->spte_count; /* @@ -968,10 +943,10 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte, */ if (desc->spte_count == PTE_LIST_EXT) { desc = kvm_mmu_memory_cache_alloc(cache); - desc->more = (struct pte_list_desc *)(rmap_head->val & ~1ul); + desc->more = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); desc->spte_count = 0; desc->tail_count = count; - rmap_head->val = (unsigned long)desc | 1; + rmap_head->val = (unsigned long)desc | KVM_RMAP_MANY; } desc->sptes[desc->spte_count++] = spte; } @@ -982,7 +957,7 @@ static void pte_list_desc_remove_entry(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct pte_list_desc *desc, int i) { - struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); int j = head_desc->spte_count - 1; /* @@ -1011,7 +986,7 @@ static void pte_list_desc_remove_entry(struct kvm *kvm, if (!head_desc->more) rmap_head->val = 0; else - rmap_head->val = (unsigned long)head_desc->more | 1; + rmap_head->val = (unsigned long)head_desc->more | KVM_RMAP_MANY; mmu_free_pte_list_desc(head_desc); } @@ -1024,13 +999,13 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte, if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm)) return; - if (!(rmap_head->val & 1)) { + if (!(rmap_head->val & KVM_RMAP_MANY)) { if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm)) return; rmap_head->val = 0; } else { - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); while (desc) { for (i = 0; i < desc->spte_count; ++i) { if (desc->sptes[i] == spte) { @@ -1063,12 +1038,12 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm, if (!rmap_head->val) return false; - if (!(rmap_head->val & 1)) { + if (!(rmap_head->val & KVM_RMAP_MANY)) { mmu_spte_clear_track_bits(kvm, (u64 *)rmap_head->val); goto out; } - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); for (; desc; desc = next) { for (i = 0; i < desc->spte_count; i++) @@ -1088,10 +1063,10 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head) if (!rmap_head->val) return 0; - else if (!(rmap_head->val & 1)) + else if (!(rmap_head->val & KVM_RMAP_MANY)) return 1; - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); return desc->tail_count + desc->spte_count; } @@ -1153,13 +1128,13 @@ static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head, if (!rmap_head->val) return NULL; - if (!(rmap_head->val & 1)) { + if (!(rmap_head->val & KVM_RMAP_MANY)) { iter->desc = NULL; sptep = (u64 *)rmap_head->val; goto out; } - iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + iter->desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); iter->pos = 0; sptep = iter->desc->sptes[iter->pos]; out: @@ -1307,15 +1282,6 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head, return flush; } -/** - * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages - * @kvm: kvm instance - * @slot: slot to protect - * @gfn_offset: start of the BITS_PER_LONG pages we care about - * @mask: indicates which pages we should protect - * - * Used when we do not need to care about huge page mappings. - */ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) @@ -1339,16 +1305,6 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, } } -/** - * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write - * protect the page if the D-bit isn't supported. - * @kvm: kvm instance - * @slot: slot to clear D-bit - * @gfn_offset: start of the BITS_PER_LONG pages we care about - * @mask: indicates which pages we should clear D-bit - * - * Used for PML to re-log the dirty GPAs after userspace querying dirty_bitmap. - */ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) @@ -1372,24 +1328,16 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, } } -/** - * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected - * PT level pages. - * - * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to - * enable dirty logging for them. - * - * We need to care about huge page mappings: e.g. during dirty logging we may - * have such mappings. - */ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { /* - * Huge pages are NOT write protected when we start dirty logging in - * initially-all-set mode; must write protect them here so that they - * are split to 4K on the first write. + * If the slot was assumed to be "initially all dirty", write-protect + * huge pages to ensure they are split to 4KiB on the first write (KVM + * dirty logs at 4KiB granularity). If eager page splitting is enabled, + * immediately try to split huge pages, e.g. so that vCPUs don't get + * saddled with the cost of splitting. * * The gfn_offset is guaranteed to be aligned to 64, but the base_gfn * of memslot has no such restriction, so the range can cross two large @@ -1411,7 +1359,16 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, PG_LEVEL_2M); } - /* Now handle 4K PTEs. */ + /* + * (Re)Enable dirty logging for all 4KiB SPTEs that map the GFNs in + * mask. If PML is enabled and the GFN doesn't need to be write- + * protected for other reasons, e.g. shadow paging, clear the Dirty bit. + * Otherwise clear the Writable bit. + * + * Note that kvm_mmu_clear_dirty_pt_masked() is called whenever PML is + * enabled but it chooses between clearing the Dirty bit and Writeable + * bit based on the context. + */ if (kvm_x86_ops.cpu_dirty_log_size) kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask); else @@ -1453,16 +1410,10 @@ static bool kvm_vcpu_write_protect_gfn(struct kvm_vcpu *vcpu, u64 gfn) return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn, PG_LEVEL_4K); } -static bool __kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - const struct kvm_memory_slot *slot) -{ - return kvm_zap_all_rmap_sptes(kvm, rmap_head); -} - static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level) + const struct kvm_memory_slot *slot) { - return __kvm_zap_rmap(kvm, rmap_head, slot); + return kvm_zap_all_rmap_sptes(kvm, rmap_head); } struct slot_rmap_walk_iterator { @@ -1513,7 +1464,7 @@ static bool slot_rmap_walk_okay(struct slot_rmap_walk_iterator *iterator) static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator) { while (++iterator->rmap <= iterator->end_rmap) { - iterator->gfn += (1UL << KVM_HPAGE_GFN_SHIFT(iterator->level)); + iterator->gfn += KVM_PAGES_PER_HPAGE(iterator->level); if (iterator->rmap->val) return; @@ -1534,31 +1485,92 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator) slot_rmap_walk_okay(_iter_); \ slot_rmap_walk_next(_iter_)) -typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, - int level); +/* The return value indicates if tlb flush on all vcpus is needed. */ +typedef bool (*slot_rmaps_handler) (struct kvm *kvm, + struct kvm_rmap_head *rmap_head, + const struct kvm_memory_slot *slot); -static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm, - struct kvm_gfn_range *range, - rmap_handler_t handler) +static __always_inline bool __walk_slot_rmaps(struct kvm *kvm, + const struct kvm_memory_slot *slot, + slot_rmaps_handler fn, + int start_level, int end_level, + gfn_t start_gfn, gfn_t end_gfn, + bool can_yield, bool flush_on_yield, + bool flush) { struct slot_rmap_walk_iterator iterator; - bool ret = false; - for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, - range->start, range->end - 1, &iterator) - ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn, - iterator.level); + lockdep_assert_held_write(&kvm->mmu_lock); - return ret; + for_each_slot_rmap_range(slot, start_level, end_level, start_gfn, + end_gfn, &iterator) { + if (iterator.rmap) + flush |= fn(kvm, iterator.rmap, slot); + + if (!can_yield) + continue; + + if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { + if (flush && flush_on_yield) { + kvm_flush_remote_tlbs_range(kvm, start_gfn, + iterator.gfn - start_gfn + 1); + flush = false; + } + cond_resched_rwlock_write(&kvm->mmu_lock); + } + } + + return flush; +} + +static __always_inline bool walk_slot_rmaps(struct kvm *kvm, + const struct kvm_memory_slot *slot, + slot_rmaps_handler fn, + int start_level, int end_level, + bool flush_on_yield) +{ + return __walk_slot_rmaps(kvm, slot, fn, start_level, end_level, + slot->base_gfn, slot->base_gfn + slot->npages - 1, + true, flush_on_yield, false); +} + +static __always_inline bool walk_slot_rmaps_4k(struct kvm *kvm, + const struct kvm_memory_slot *slot, + slot_rmaps_handler fn, + bool flush_on_yield) +{ + return walk_slot_rmaps(kvm, slot, fn, PG_LEVEL_4K, PG_LEVEL_4K, flush_on_yield); +} + +static bool __kvm_rmap_zap_gfn_range(struct kvm *kvm, + const struct kvm_memory_slot *slot, + gfn_t start, gfn_t end, bool can_yield, + bool flush) +{ + return __walk_slot_rmaps(kvm, slot, kvm_zap_rmap, + PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, + start, end - 1, can_yield, true, flush); } bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) { bool flush = false; + /* + * To prevent races with vCPUs faulting in a gfn using stale data, + * zapping a gfn range must be protected by mmu_invalidate_in_progress + * (and mmu_invalidate_seq). The only exception is memslot deletion; + * in that case, SRCU synchronization ensures that SPTEs are zapped + * after all vCPUs have unlocked SRCU, guaranteeing that vCPUs see the + * invalid slot. + */ + lockdep_assert_once(kvm->mmu_invalidate_in_progress || + lockdep_is_held(&kvm->slots_lock)); + if (kvm_memslots_have_rmaps(kvm)) - flush = kvm_handle_gfn_range(kvm, range, kvm_zap_rmap); + flush = __kvm_rmap_zap_gfn_range(kvm, range->slot, + range->start, range->end, + range->may_block, flush); if (tdp_mmu_enabled) flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush); @@ -1570,31 +1582,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return flush; } -static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level) -{ - u64 *sptep; - struct rmap_iterator iter; - int young = 0; - - for_each_rmap_spte(rmap_head, &iter, sptep) - young |= mmu_spte_age(sptep); - - return young; -} - -static bool kvm_test_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level) -{ - u64 *sptep; - struct rmap_iterator iter; - - for_each_rmap_spte(rmap_head, &iter, sptep) - if (is_accessed_spte(*sptep)) - return true; - return false; -} - #define RMAP_RECYCLE_THRESHOLD 1000 static void __rmap_add(struct kvm *kvm, @@ -1629,12 +1616,52 @@ static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot, __rmap_add(vcpu->kvm, cache, slot, spte, gfn, access); } +static bool kvm_rmap_age_gfn_range(struct kvm *kvm, + struct kvm_gfn_range *range, bool test_only) +{ + struct slot_rmap_walk_iterator iterator; + struct rmap_iterator iter; + bool young = false; + u64 *sptep; + + for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, + range->start, range->end - 1, &iterator) { + for_each_rmap_spte(iterator.rmap, &iter, sptep) { + u64 spte = *sptep; + + if (!is_accessed_spte(spte)) + continue; + + if (test_only) + return true; + + if (spte_ad_enabled(spte)) { + clear_bit((ffs(shadow_accessed_mask) - 1), + (unsigned long *)sptep); + } else { + /* + * Capture the dirty status of the page, so that + * it doesn't get lost when the SPTE is marked + * for access tracking. + */ + if (is_writable_pte(spte)) + kvm_set_pfn_dirty(spte_to_pfn(spte)); + + spte = mark_spte_for_access_track(spte); + mmu_spte_update_no_track(sptep, spte); + } + young = true; + } + } + return young; +} + bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { bool young = false; if (kvm_memslots_have_rmaps(kvm)) - young = kvm_handle_gfn_range(kvm, range, kvm_age_rmap); + young = kvm_rmap_age_gfn_range(kvm, range, false); if (tdp_mmu_enabled) young |= kvm_tdp_mmu_age_gfn_range(kvm, range); @@ -1647,7 +1674,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) bool young = false; if (kvm_memslots_have_rmaps(kvm)) - young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmap); + young = kvm_rmap_age_gfn_range(kvm, range, true); if (tdp_mmu_enabled) young |= kvm_tdp_mmu_test_age_gfn(kvm, range); @@ -2713,36 +2740,49 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages) write_unlock(&kvm->mmu_lock); } -int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) +bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + bool always_retry) { - struct kvm_mmu_page *sp; + struct kvm *kvm = vcpu->kvm; LIST_HEAD(invalid_list); - int r; + struct kvm_mmu_page *sp; + gpa_t gpa = cr2_or_gpa; + bool r = false; + + /* + * Bail early if there aren't any write-protected shadow pages to avoid + * unnecessarily taking mmu_lock lock, e.g. if the gfn is write-tracked + * by a third party. Reading indirect_shadow_pages without holding + * mmu_lock is safe, as this is purely an optimization, i.e. a false + * positive is benign, and a false negative will simply result in KVM + * skipping the unprotect+retry path, which is also an optimization. + */ + if (!READ_ONCE(kvm->arch.indirect_shadow_pages)) + goto out; + + if (!vcpu->arch.mmu->root_role.direct) { + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); + if (gpa == INVALID_GPA) + goto out; + } - r = 0; write_lock(&kvm->mmu_lock); - for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) { - r = 1; + for_each_gfn_valid_sp_with_gptes(kvm, sp, gpa_to_gfn(gpa)) kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); - } + + /* + * Snapshot the result before zapping, as zapping will remove all list + * entries, i.e. checking the list later would yield a false negative. + */ + r = !list_empty(&invalid_list); kvm_mmu_commit_zap_page(kvm, &invalid_list); write_unlock(&kvm->mmu_lock); - return r; -} - -static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) -{ - gpa_t gpa; - int r; - - if (vcpu->arch.mmu->root_role.direct) - return 0; - - gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); - - r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); - +out: + if (r || always_retry) { + vcpu->arch.last_retry_eip = kvm_rip_read(vcpu); + vcpu->arch.last_retry_addr = cr2_or_gpa; + } return r; } @@ -2914,10 +2954,8 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, trace_kvm_mmu_set_spte(level, gfn, sptep); } - if (wrprot) { - if (write_fault) - ret = RET_PF_EMULATE; - } + if (wrprot && write_fault) + ret = RET_PF_WRITE_PROTECTED; if (flush) kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level); @@ -4549,7 +4587,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault return RET_PF_RETRY; if (page_fault_handle_page_track(vcpu, fault)) - return RET_PF_EMULATE; + return RET_PF_WRITE_PROTECTED; r = fast_page_fault(vcpu, fault); if (r != RET_PF_INVALID) @@ -4618,8 +4656,6 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, if (!flags) { trace_kvm_page_fault(vcpu, fault_address, error_code); - if (kvm_event_needs_reinjection(vcpu)) - kvm_mmu_unprotect_page_virt(vcpu, fault_address); r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn, insn_len); } else if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) { @@ -4642,7 +4678,7 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu, int r; if (page_fault_handle_page_track(vcpu, fault)) - return RET_PF_EMULATE; + return RET_PF_WRITE_PROTECTED; r = fast_page_fault(vcpu, fault); if (r != RET_PF_INVALID) @@ -4719,6 +4755,7 @@ static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, switch (r) { case RET_PF_FIXED: case RET_PF_SPURIOUS: + case RET_PF_WRITE_PROTECTED: return 0; case RET_PF_EMULATE: @@ -5963,6 +6000,106 @@ void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, write_unlock(&vcpu->kvm->mmu_lock); } +static bool is_write_to_guest_page_table(u64 error_code) +{ + const u64 mask = PFERR_GUEST_PAGE_MASK | PFERR_WRITE_MASK | PFERR_PRESENT_MASK; + + return (error_code & mask) == mask; +} + +static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + u64 error_code, int *emulation_type) +{ + bool direct = vcpu->arch.mmu->root_role.direct; + + /* + * Do not try to unprotect and retry if the vCPU re-faulted on the same + * RIP with the same address that was previously unprotected, as doing + * so will likely put the vCPU into an infinite. E.g. if the vCPU uses + * a non-page-table modifying instruction on the PDE that points to the + * instruction, then unprotecting the gfn will unmap the instruction's + * code, i.e. make it impossible for the instruction to ever complete. + */ + if (vcpu->arch.last_retry_eip == kvm_rip_read(vcpu) && + vcpu->arch.last_retry_addr == cr2_or_gpa) + return RET_PF_EMULATE; + + /* + * Reset the unprotect+retry values that guard against infinite loops. + * The values will be refreshed if KVM explicitly unprotects a gfn and + * retries, in all other cases it's safe to retry in the future even if + * the next page fault happens on the same RIP+address. + */ + vcpu->arch.last_retry_eip = 0; + vcpu->arch.last_retry_addr = 0; + + /* + * It should be impossible to reach this point with an MMIO cache hit, + * as RET_PF_WRITE_PROTECTED is returned if and only if there's a valid, + * writable memslot, and creating a memslot should invalidate the MMIO + * cache by way of changing the memslot generation. WARN and disallow + * retry if MMIO is detected, as retrying MMIO emulation is pointless + * and could put the vCPU into an infinite loop because the processor + * will keep faulting on the non-existent MMIO address. + */ + if (WARN_ON_ONCE(mmio_info_in_cache(vcpu, cr2_or_gpa, direct))) + return RET_PF_EMULATE; + + /* + * Before emulating the instruction, check to see if the access was due + * to a read-only violation while the CPU was walking non-nested NPT + * page tables, i.e. for a direct MMU, for _guest_ page tables in L1. + * If L1 is sharing (a subset of) its page tables with L2, e.g. by + * having nCR3 share lower level page tables with hCR3, then when KVM + * (L0) write-protects the nested NPTs, i.e. npt12 entries, KVM is also + * unknowingly write-protecting L1's guest page tables, which KVM isn't + * shadowing. + * + * Because the CPU (by default) walks NPT page tables using a write + * access (to ensure the CPU can do A/D updates), page walks in L1 can + * trigger write faults for the above case even when L1 isn't modifying + * PTEs. As a result, KVM will unnecessarily emulate (or at least, try + * to emulate) an excessive number of L1 instructions; because L1's MMU + * isn't shadowed by KVM, there is no need to write-protect L1's gPTEs + * and thus no need to emulate in order to guarantee forward progress. + * + * Try to unprotect the gfn, i.e. zap any shadow pages, so that L1 can + * proceed without triggering emulation. If one or more shadow pages + * was zapped, skip emulation and resume L1 to let it natively execute + * the instruction. If no shadow pages were zapped, then the write- + * fault is due to something else entirely, i.e. KVM needs to emulate, + * as resuming the guest will put it into an infinite loop. + * + * Note, this code also applies to Intel CPUs, even though it is *very* + * unlikely that an L1 will share its page tables (IA32/PAE/paging64 + * format) with L2's page tables (EPT format). + * + * For indirect MMUs, i.e. if KVM is shadowing the current MMU, try to + * unprotect the gfn and retry if an event is awaiting reinjection. If + * KVM emulates multiple instructions before completing event injection, + * the event could be delayed beyond what is architecturally allowed, + * e.g. KVM could inject an IRQ after the TPR has been raised. + */ + if (((direct && is_write_to_guest_page_table(error_code)) || + (!direct && kvm_event_needs_reinjection(vcpu))) && + kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa)) + return RET_PF_RETRY; + + /* + * The gfn is write-protected, but if KVM detects its emulating an + * instruction that is unlikely to be used to modify page tables, or if + * emulation fails, KVM can try to unprotect the gfn and let the CPU + * re-execute the instruction that caused the page fault. Do not allow + * retrying an instruction from a nested guest as KVM is only explicitly + * shadowing L1's page tables, i.e. unprotecting something for L1 isn't + * going to magically fix whatever issue caused L2 to fail. + */ + if (!is_guest_mode(vcpu)) + *emulation_type |= EMULTYPE_ALLOW_RETRY_PF; + + return RET_PF_EMULATE; +} + int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len) { @@ -6008,6 +6145,10 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err if (r < 0) return r; + if (r == RET_PF_WRITE_PROTECTED) + r = kvm_mmu_write_protect_fault(vcpu, cr2_or_gpa, error_code, + &emulation_type); + if (r == RET_PF_FIXED) vcpu->stat.pf_fixed++; else if (r == RET_PF_EMULATE) @@ -6018,32 +6159,6 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err if (r != RET_PF_EMULATE) return 1; - /* - * Before emulating the instruction, check if the error code - * was due to a RO violation while translating the guest page. - * This can occur when using nested virtualization with nested - * paging in both guests. If true, we simply unprotect the page - * and resume the guest. - */ - if (vcpu->arch.mmu->root_role.direct && - (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); - return 1; - } - - /* - * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still - * optimistically try to just unprotect the page and let the processor - * re-execute the instruction that caused the page fault. Do not allow - * retrying MMIO emulation, as it's not only pointless but could also - * cause us to enter an infinite loop because the processor will keep - * faulting on the non-existent MMIO address. Retrying an instruction - * from a nested guest is also pointless and dangerous as we are only - * explicitly shadowing L1's page tables, i.e. unprotecting something - * for L1 isn't going to magically fix whatever issue cause L2 to fail. - */ - if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) - emulation_type |= EMULTYPE_ALLOW_RETRY_PF; emulate: return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, insn_len); @@ -6202,59 +6317,6 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, } EXPORT_SYMBOL_GPL(kvm_configure_mmu); -/* The return value indicates if tlb flush on all vcpus is needed. */ -typedef bool (*slot_rmaps_handler) (struct kvm *kvm, - struct kvm_rmap_head *rmap_head, - const struct kvm_memory_slot *slot); - -static __always_inline bool __walk_slot_rmaps(struct kvm *kvm, - const struct kvm_memory_slot *slot, - slot_rmaps_handler fn, - int start_level, int end_level, - gfn_t start_gfn, gfn_t end_gfn, - bool flush_on_yield, bool flush) -{ - struct slot_rmap_walk_iterator iterator; - - lockdep_assert_held_write(&kvm->mmu_lock); - - for_each_slot_rmap_range(slot, start_level, end_level, start_gfn, - end_gfn, &iterator) { - if (iterator.rmap) - flush |= fn(kvm, iterator.rmap, slot); - - if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { - if (flush && flush_on_yield) { - kvm_flush_remote_tlbs_range(kvm, start_gfn, - iterator.gfn - start_gfn + 1); - flush = false; - } - cond_resched_rwlock_write(&kvm->mmu_lock); - } - } - - return flush; -} - -static __always_inline bool walk_slot_rmaps(struct kvm *kvm, - const struct kvm_memory_slot *slot, - slot_rmaps_handler fn, - int start_level, int end_level, - bool flush_on_yield) -{ - return __walk_slot_rmaps(kvm, slot, fn, start_level, end_level, - slot->base_gfn, slot->base_gfn + slot->npages - 1, - flush_on_yield, false); -} - -static __always_inline bool walk_slot_rmaps_4k(struct kvm *kvm, - const struct kvm_memory_slot *slot, - slot_rmaps_handler fn, - bool flush_on_yield) -{ - return walk_slot_rmaps(kvm, slot, fn, PG_LEVEL_4K, PG_LEVEL_4K, flush_on_yield); -} - static void free_mmu_pages(struct kvm_mmu *mmu) { if (!tdp_enabled && mmu->pae_root) @@ -6528,9 +6590,8 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e if (WARN_ON_ONCE(start >= end)) continue; - flush = __walk_slot_rmaps(kvm, memslot, __kvm_zap_rmap, - PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, - start, end - 1, true, flush); + flush = __kvm_rmap_zap_gfn_range(kvm, memslot, start, + end, true, flush); } } @@ -6818,7 +6879,7 @@ static void kvm_shadow_mmu_try_split_huge_pages(struct kvm *kvm, */ for (level = KVM_MAX_HUGEPAGE_LEVEL; level > target_level; level--) __walk_slot_rmaps(kvm, slot, shadow_mmu_try_split_huge_pages, - level, level, start, end - 1, true, false); + level, level, start, end - 1, true, true, false); } /* Must be called with the mmu_lock held in write-mode. */ @@ -6997,10 +7058,70 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) kvm_mmu_zap_all(kvm); } +static void kvm_mmu_zap_memslot_pages_and_flush(struct kvm *kvm, + struct kvm_memory_slot *slot, + bool flush) +{ + LIST_HEAD(invalid_list); + unsigned long i; + + if (list_empty(&kvm->arch.active_mmu_pages)) + goto out_flush; + + /* + * Since accounting information is stored in struct kvm_arch_memory_slot, + * all MMU pages that are shadowing guest PTEs must be zapped before the + * memslot is deleted, as freeing such pages after the memslot is freed + * will result in use-after-free, e.g. in unaccount_shadowed(). + */ + for (i = 0; i < slot->npages; i++) { + struct kvm_mmu_page *sp; + gfn_t gfn = slot->base_gfn + i; + + for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) + kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); + + if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { + kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); + flush = false; + cond_resched_rwlock_write(&kvm->mmu_lock); + } + } + +out_flush: + kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); +} + +static void kvm_mmu_zap_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + struct kvm_gfn_range range = { + .slot = slot, + .start = slot->base_gfn, + .end = slot->base_gfn + slot->npages, + .may_block = true, + }; + bool flush; + + write_lock(&kvm->mmu_lock); + flush = kvm_unmap_gfn_range(kvm, &range); + kvm_mmu_zap_memslot_pages_and_flush(kvm, slot, flush); + write_unlock(&kvm->mmu_lock); +} + +static inline bool kvm_memslot_flush_zap_all(struct kvm *kvm) +{ + return kvm->arch.vm_type == KVM_X86_DEFAULT_VM && + kvm_check_has_quirk(kvm, KVM_X86_QUIRK_SLOT_ZAP_ALL); +} + void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { - kvm_mmu_zap_all_fast(kvm); + if (kvm_memslot_flush_zap_all(kvm)) + kvm_mmu_zap_all_fast(kvm); + else + kvm_mmu_zap_memslot(kvm, slot); } void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 1721d97743e9..c98827840e07 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -258,6 +258,8 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); * RET_PF_CONTINUE: So far, so good, keep handling the page fault. * RET_PF_RETRY: let CPU fault again on the address. * RET_PF_EMULATE: mmio page fault, emulate the instruction directly. + * RET_PF_WRITE_PROTECTED: the gfn is write-protected, either unprotected the + * gfn and retry, or emulate the instruction directly. * RET_PF_INVALID: the spte is invalid, let the real page fault path update it. * RET_PF_FIXED: The faulting entry has been fixed. * RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU. @@ -274,6 +276,7 @@ enum { RET_PF_CONTINUE = 0, RET_PF_RETRY, RET_PF_EMULATE, + RET_PF_WRITE_PROTECTED, RET_PF_INVALID, RET_PF_FIXED, RET_PF_SPURIOUS, @@ -349,8 +352,6 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm, void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level); -void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); - void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h index 195d98bc8de8..f35a830ce469 100644 --- a/arch/x86/kvm/mmu/mmutrace.h +++ b/arch/x86/kvm/mmu/mmutrace.h @@ -57,6 +57,7 @@ TRACE_DEFINE_ENUM(RET_PF_CONTINUE); TRACE_DEFINE_ENUM(RET_PF_RETRY); TRACE_DEFINE_ENUM(RET_PF_EMULATE); +TRACE_DEFINE_ENUM(RET_PF_WRITE_PROTECTED); TRACE_DEFINE_ENUM(RET_PF_INVALID); TRACE_DEFINE_ENUM(RET_PF_FIXED); TRACE_DEFINE_ENUM(RET_PF_SPURIOUS); diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 69941cebb3a8..ae7d39ff2d07 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -646,10 +646,10 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, * really care if it changes underneath us after this point). */ if (FNAME(gpte_changed)(vcpu, gw, top_level)) - goto out_gpte_changed; + return RET_PF_RETRY; if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) - goto out_gpte_changed; + return RET_PF_RETRY; /* * Load a new root and retry the faulting instruction in the extremely @@ -659,7 +659,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, */ if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) { kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu); - goto out_gpte_changed; + return RET_PF_RETRY; } for_each_shadow_entry(vcpu, fault->addr, it) { @@ -674,34 +674,38 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, sp = kvm_mmu_get_child_sp(vcpu, it.sptep, table_gfn, false, access); - if (sp != ERR_PTR(-EEXIST)) { - /* - * We must synchronize the pagetable before linking it - * because the guest doesn't need to flush tlb when - * the gpte is changed from non-present to present. - * Otherwise, the guest may use the wrong mapping. - * - * For PG_LEVEL_4K, kvm_mmu_get_page() has already - * synchronized it transiently via kvm_sync_page(). - * - * For higher level pagetable, we synchronize it via - * the slower mmu_sync_children(). If it needs to - * break, some progress has been made; return - * RET_PF_RETRY and retry on the next #PF. - * KVM_REQ_MMU_SYNC is not necessary but it - * expedites the process. - */ - if (sp->unsync_children && - mmu_sync_children(vcpu, sp, false)) - return RET_PF_RETRY; - } + /* + * Synchronize the new page before linking it, as the CPU (KVM) + * is architecturally disallowed from inserting non-present + * entries into the TLB, i.e. the guest isn't required to flush + * the TLB when changing the gPTE from non-present to present. + * + * For PG_LEVEL_4K, kvm_mmu_find_shadow_page() has already + * synchronized the page via kvm_sync_page(). + * + * For higher level pages, which cannot be unsync themselves + * but can have unsync children, synchronize via the slower + * mmu_sync_children(). If KVM needs to drop mmu_lock due to + * contention or to reschedule, instruct the caller to retry + * the #PF (mmu_sync_children() ensures forward progress will + * be made). + */ + if (sp != ERR_PTR(-EEXIST) && sp->unsync_children && + mmu_sync_children(vcpu, sp, false)) + return RET_PF_RETRY; /* - * Verify that the gpte in the page we've just write - * protected is still there. + * Verify that the gpte in the page, which is now either + * write-protected or unsync, wasn't modified between the fault + * and acquiring mmu_lock. This needs to be done even when + * reusing an existing shadow page to ensure the information + * gathered by the walker matches the information stored in the + * shadow page (which could have been modified by a different + * vCPU even if the page was already linked). Holding mmu_lock + * prevents the shadow page from changing after this point. */ if (FNAME(gpte_changed)(vcpu, gw, it.level - 1)) - goto out_gpte_changed; + return RET_PF_RETRY; if (sp != ERR_PTR(-EEXIST)) link_shadow_page(vcpu, it.sptep, sp); @@ -755,9 +759,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, FNAME(pte_prefetch)(vcpu, gw, it.sptep); return ret; - -out_gpte_changed: - return RET_PF_RETRY; } /* @@ -805,7 +806,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (page_fault_handle_page_track(vcpu, fault)) { shadow_page_table_clear_flood(vcpu, fault->addr); - return RET_PF_EMULATE; + return RET_PF_WRITE_PROTECTED; } r = mmu_topup_memory_caches(vcpu, true); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 3c55955bcaf8..3b996c1fdaab 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1046,10 +1046,8 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, * protected, emulation is needed. If the emulation was skipped, * the vCPU would have the same fault again. */ - if (wrprot) { - if (fault->write) - ret = RET_PF_EMULATE; - } + if (wrprot && fault->write) + ret = RET_PF_WRITE_PROTECTED; /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) { diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 2f4e155080ba..0d17d6b70639 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -17,6 +17,7 @@ enum kvm_only_cpuid_leafs { CPUID_8000_0007_EDX, CPUID_8000_0022_EAX, CPUID_7_2_EDX, + CPUID_24_0_EBX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -46,6 +47,7 @@ enum kvm_only_cpuid_leafs { #define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5) #define X86_FEATURE_AMX_COMPLEX KVM_X86_FEATURE(CPUID_7_1_EDX, 8) #define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14) +#define X86_FEATURE_AVX10 KVM_X86_FEATURE(CPUID_7_1_EDX, 19) /* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */ #define X86_FEATURE_INTEL_PSFD KVM_X86_FEATURE(CPUID_7_2_EDX, 0) @@ -55,6 +57,11 @@ enum kvm_only_cpuid_leafs { #define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) +/* Intel-defined sub-features, CPUID level 0x00000024:0 (EBX) */ +#define X86_FEATURE_AVX10_128 KVM_X86_FEATURE(CPUID_24_0_EBX, 16) +#define X86_FEATURE_AVX10_256 KVM_X86_FEATURE(CPUID_24_0_EBX, 17) +#define X86_FEATURE_AVX10_512 KVM_X86_FEATURE(CPUID_24_0_EBX, 18) + /* CPUID level 0x80000007 (EDX). */ #define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8) @@ -90,6 +97,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, + [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, }; /* diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 00e3c27d2a87..85241c0c7f56 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -624,17 +624,31 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) #endif /* - * Give leave_smm() a chance to make ISA-specific changes to the vCPU - * state (e.g. enter guest mode) before loading state from the SMM - * state-save area. + * FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest + * mode should happen _after_ loading state from SMRAM. However, KVM + * piggybacks the nested VM-Enter flows (which is wrong for many other + * reasons), and so nSVM/nVMX would clobber state that is loaded from + * SMRAM and from the VMCS/VMCB. */ if (kvm_x86_call(leave_smm)(vcpu, &smram)) return X86EMUL_UNHANDLEABLE; #ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) - return rsm_load_state_64(ctxt, &smram.smram64); + ret = rsm_load_state_64(ctxt, &smram.smram64); else #endif - return rsm_load_state_32(ctxt, &smram.smram32); + ret = rsm_load_state_32(ctxt, &smram.smram32); + + /* + * If RSM fails and triggers shutdown, architecturally the shutdown + * occurs *before* the transition to guest mode. But due to KVM's + * flawed handling of RSM to L2 (see above), the vCPU may already be + * in_guest_mode(). Force the vCPU out of guest mode before delivering + * the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit + * that architecturally shouldn't be possible. + */ + if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu)) + kvm_leave_nested(vcpu); + return ret; } diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 6f704c1037e5..cf84103ce38b 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -63,8 +63,12 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) u64 pdpte; int ret; + /* + * Note, nCR3 is "assumed" to be 32-byte aligned, i.e. the CPU ignores + * nCR3[4:0] when loading PDPTEs from memory. + */ ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte, - offset_in_page(cr3) + index * 8, 8); + (cr3 & GENMASK(11, 5)) + index * 8, 8); if (ret) return 0; return pdpte; @@ -1693,8 +1697,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; ret = -ENOMEM; - ctl = kzalloc(sizeof(*ctl), GFP_KERNEL_ACCOUNT); - save = kzalloc(sizeof(*save), GFP_KERNEL_ACCOUNT); + ctl = kzalloc(sizeof(*ctl), GFP_KERNEL); + save = kzalloc(sizeof(*save), GFP_KERNEL); if (!ctl || !save) goto out_free; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0b851ef937f2..fb854cf20ac3 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -450,8 +450,11 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, goto e_free; /* This needs to happen after SEV/SNP firmware initialization. */ - if (vm_type == KVM_X86_SNP_VM && snp_guest_req_init(kvm)) - goto e_free; + if (vm_type == KVM_X86_SNP_VM) { + ret = snp_guest_req_init(kvm); + if (ret) + goto e_free; + } INIT_LIST_HEAD(&sev->regions_list); INIT_LIST_HEAD(&sev->mirror_vms); @@ -2212,10 +2215,6 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) if (sev->snp_context) return -EINVAL; - sev->snp_context = snp_context_create(kvm, argp); - if (!sev->snp_context) - return -ENOTTY; - if (params.flags) return -EINVAL; @@ -2230,6 +2229,10 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) if (params.policy & SNP_POLICY_MASK_SINGLE_SOCKET) return -EINVAL; + sev->snp_context = snp_context_create(kvm, argp); + if (!sev->snp_context) + return -ENOTTY; + start.gctx_paddr = __psp_pa(sev->snp_context); start.policy = params.policy; memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw)); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5ab2c92c7331..9df3e1e5ae81 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -573,7 +573,7 @@ static void __svm_write_tsc_multiplier(u64 multiplier) static __always_inline struct sev_es_save_area *sev_es_host_save_area(struct svm_cpu_data *sd) { - return page_address(sd->save_area) + 0x400; + return &sd->save_area->host_sev_es_save; } static inline void kvm_cpu_svm_disable(void) @@ -592,14 +592,14 @@ static inline void kvm_cpu_svm_disable(void) } } -static void svm_emergency_disable(void) +static void svm_emergency_disable_virtualization_cpu(void) { kvm_rebooting = true; kvm_cpu_svm_disable(); } -static void svm_hardware_disable(void) +static void svm_disable_virtualization_cpu(void) { /* Make sure we clean up behind us */ if (tsc_scaling) @@ -610,7 +610,7 @@ static void svm_hardware_disable(void) amd_pmu_disable_virt(); } -static int svm_hardware_enable(void) +static int svm_enable_virtualization_cpu(void) { struct svm_cpu_data *sd; @@ -696,7 +696,7 @@ static void svm_cpu_uninit(int cpu) return; kfree(sd->sev_vmcbs); - __free_page(sd->save_area); + __free_page(__sme_pa_to_page(sd->save_area_pa)); sd->save_area_pa = 0; sd->save_area = NULL; } @@ -704,23 +704,24 @@ static void svm_cpu_uninit(int cpu) static int svm_cpu_init(int cpu) { struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); + struct page *save_area_page; int ret = -ENOMEM; memset(sd, 0, sizeof(struct svm_cpu_data)); - sd->save_area = snp_safe_alloc_page_node(cpu_to_node(cpu), GFP_KERNEL); - if (!sd->save_area) + save_area_page = snp_safe_alloc_page_node(cpu_to_node(cpu), GFP_KERNEL); + if (!save_area_page) return ret; ret = sev_cpu_init(sd); if (ret) goto free_save_area; - sd->save_area_pa = __sme_page_pa(sd->save_area); + sd->save_area = page_address(save_area_page); + sd->save_area_pa = __sme_page_pa(save_area_page); return 0; free_save_area: - __free_page(sd->save_area); - sd->save_area = NULL; + __free_page(save_area_page); return ret; } @@ -1124,8 +1125,7 @@ static void svm_hardware_unsetup(void) for_each_possible_cpu(cpu) svm_cpu_uninit(cpu); - __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), - get_order(IOPM_SIZE)); + __free_pages(__sme_pa_to_page(iopm_base), get_order(IOPM_SIZE)); iopm_base = 0; } @@ -1301,7 +1301,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu) if (!kvm_hlt_in_guest(vcpu->kvm)) svm_set_intercept(svm, INTERCEPT_HLT); - control->iopm_base_pa = __sme_set(iopm_base); + control->iopm_base_pa = iopm_base; control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); control->int_ctl = V_INTR_MASKING_MASK; @@ -1503,7 +1503,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) sev_free_vcpu(vcpu); - __free_page(pfn_to_page(__sme_clr(svm->vmcb01.pa) >> PAGE_SHIFT)); + __free_page(__sme_pa_to_page(svm->vmcb01.pa)); __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE)); } @@ -1533,7 +1533,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) * TSC_AUX is always virtualized for SEV-ES guests when the feature is * available. The user return MSR support is not required in this case * because TSC_AUX is restored on #VMEXIT from the host save area - * (which has been initialized in svm_hardware_enable()). + * (which has been initialized in svm_enable_virtualization_cpu()). */ if (likely(tsc_aux_uret_slot >= 0) && (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm))) @@ -2825,17 +2825,17 @@ static int efer_trap(struct kvm_vcpu *vcpu) return kvm_complete_insn_gp(vcpu, ret); } -static int svm_get_msr_feature(struct kvm_msr_entry *msr) +static int svm_get_feature_msr(u32 msr, u64 *data) { - msr->data = 0; + *data = 0; - switch (msr->index) { + switch (msr) { case MSR_AMD64_DE_CFG: if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) - msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; + *data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; break; default: - return KVM_MSR_RET_INVALID; + return KVM_MSR_RET_UNSUPPORTED; } return 0; @@ -3144,7 +3144,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) * feature is available. The user return MSR support is not * required in this case because TSC_AUX is restored on #VMEXIT * from the host save area (which has been initialized in - * svm_hardware_enable()). + * svm_enable_virtualization_cpu()). */ if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm)) break; @@ -3191,18 +3191,21 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) kvm_pr_unimpl_wrmsr(vcpu, ecx, data); break; case MSR_AMD64_DE_CFG: { - struct kvm_msr_entry msr_entry; + u64 supported_de_cfg; - msr_entry.index = msr->index; - if (svm_get_msr_feature(&msr_entry)) + if (svm_get_feature_msr(ecx, &supported_de_cfg)) return 1; - /* Check the supported bits */ - if (data & ~msr_entry.data) + if (data & ~supported_de_cfg) return 1; - /* Don't allow the guest to change a bit, #GP */ - if (!msr->host_initiated && (data ^ msr_entry.data)) + /* + * Don't let the guest change the host-programmed value. The + * MSR is very model specific, i.e. contains multiple bits that + * are completely unknown to KVM, and the one bit known to KVM + * is simply a reflection of hardware capabilities. + */ + if (!msr->host_initiated && data != svm->msr_decfg) return 1; svm->msr_decfg = data; @@ -4156,12 +4159,21 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu) static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + if (is_guest_mode(vcpu)) return EXIT_FASTPATH_NONE; - if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && - to_svm(vcpu)->vmcb->control.exit_info_1) + switch (svm->vmcb->control.exit_code) { + case SVM_EXIT_MSR: + if (!svm->vmcb->control.exit_info_1) + break; return handle_fastpath_set_msr_irqoff(vcpu); + case SVM_EXIT_HLT: + return handle_fastpath_hlt(vcpu); + default: + break; + } return EXIT_FASTPATH_NONE; } @@ -4992,8 +5004,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .check_processor_compatibility = svm_check_processor_compat, .hardware_unsetup = svm_hardware_unsetup, - .hardware_enable = svm_hardware_enable, - .hardware_disable = svm_hardware_disable, + .enable_virtualization_cpu = svm_enable_virtualization_cpu, + .disable_virtualization_cpu = svm_disable_virtualization_cpu, + .emergency_disable_virtualization_cpu = svm_emergency_disable_virtualization_cpu, .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_vcpu_create, @@ -5011,7 +5024,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .vcpu_unblocking = avic_vcpu_unblocking, .update_exception_bitmap = svm_update_exception_bitmap, - .get_msr_feature = svm_get_msr_feature, + .get_feature_msr = svm_get_feature_msr, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, @@ -5062,6 +5075,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .enable_nmi_window = svm_enable_nmi_window, .enable_irq_window = svm_enable_irq_window, .update_cr8_intercept = svm_update_cr8_intercept, + + .x2apic_icr_is_split = true, .set_virtual_apic_mode = avic_refresh_virtual_apic_mode, .refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl, .apicv_post_state_restore = avic_apicv_post_state_restore, @@ -5266,7 +5281,7 @@ static __init int svm_hardware_setup(void) iopm_va = page_address(iopm_pages); memset(iopm_va, 0xff, PAGE_SIZE * (1 << order)); - iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; + iopm_base = __sme_page_pa(iopm_pages); init_msrpm_offsets(); @@ -5425,8 +5440,6 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = { static void __svm_exit(void) { kvm_x86_vendor_exit(); - - cpu_emergency_unregister_virt_callback(svm_emergency_disable); } static int __init svm_init(void) @@ -5442,8 +5455,6 @@ static int __init svm_init(void) if (r) return r; - cpu_emergency_register_virt_callback(svm_emergency_disable); - /* * Common KVM initialization _must_ come last, after this, /dev/kvm is * exposed to userspace! diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 76107c7d0595..43fa6a16eb19 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -25,7 +25,21 @@ #include "cpuid.h" #include "kvm_cache_regs.h" -#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) +/* + * Helpers to convert to/from physical addresses for pages whose address is + * consumed directly by hardware. Even though it's a physical address, SVM + * often restricts the address to the natural width, hence 'unsigned long' + * instead of 'hpa_t'. + */ +static inline unsigned long __sme_page_pa(struct page *page) +{ + return __sme_set(page_to_pfn(page) << PAGE_SHIFT); +} + +static inline struct page *__sme_pa_to_page(unsigned long pa) +{ + return pfn_to_page(__sme_clr(pa) >> PAGE_SHIFT); +} #define IOPM_SIZE PAGE_SIZE * 3 #define MSRPM_SIZE PAGE_SIZE * 2 @@ -321,7 +335,7 @@ struct svm_cpu_data { u32 next_asid; u32 min_asid; - struct page *save_area; + struct vmcb *save_area; unsigned long save_area_pa; struct vmcb *current_vmcb; diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index a0c8eb37d3e1..2ed80aea3bb1 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -209,10 +209,8 @@ SYM_FUNC_START(__svm_vcpu_run) 7: vmload %_ASM_AX 8: -#ifdef CONFIG_MITIGATION_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE -#endif + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT /* Clobbers RAX, RCX, RDX. */ RESTORE_HOST_SPEC_CTRL @@ -348,10 +346,8 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) 2: cli -#ifdef CONFIG_MITIGATION_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE -#endif + FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT /* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */ RESTORE_HOST_SPEC_CTRL diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 41a4533f9989..cb6588238f46 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -54,9 +54,7 @@ struct nested_vmx_msrs { }; struct vmcs_config { - int size; - u32 basic_cap; - u32 revision_id; + u64 basic; u32 pin_based_exec_ctrl; u32 cpu_based_exec_ctrl; u32 cpu_based_2nd_exec_ctrl; @@ -76,7 +74,7 @@ extern struct vmx_capability vmx_capability __ro_after_init; static inline bool cpu_has_vmx_basic_inout(void) { - return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT); + return vmcs_config.basic & VMX_BASIC_INOUT; } static inline bool cpu_has_virtual_nmis(void) @@ -225,7 +223,7 @@ static inline bool cpu_has_vmx_vmfunc(void) static inline bool cpu_has_vmx_shadow_vmcs(void) { /* check if the cpu supports writing r/o exit information fields */ - if (!(vmcs_config.misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) + if (!(vmcs_config.misc & VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) return false; return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -367,7 +365,7 @@ static inline bool cpu_has_vmx_invvpid_global(void) static inline bool cpu_has_vmx_intel_pt(void) { - return (vmcs_config.misc & MSR_IA32_VMX_MISC_INTEL_PT) && + return (vmcs_config.misc & VMX_MISC_INTEL_PT) && (vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) && (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL); } diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 0bf35ebe8a1b..7668e2fb8043 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -23,8 +23,10 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .hardware_unsetup = vmx_hardware_unsetup, - .hardware_enable = vmx_hardware_enable, - .hardware_disable = vmx_hardware_disable, + .enable_virtualization_cpu = vmx_enable_virtualization_cpu, + .disable_virtualization_cpu = vmx_disable_virtualization_cpu, + .emergency_disable_virtualization_cpu = vmx_emergency_disable_virtualization_cpu, + .has_emulated_msr = vmx_has_emulated_msr, .vm_size = sizeof(struct kvm_vmx), @@ -41,7 +43,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vcpu_put = vmx_vcpu_put, .update_exception_bitmap = vmx_update_exception_bitmap, - .get_msr_feature = vmx_get_msr_feature, + .get_feature_msr = vmx_get_feature_msr, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, @@ -89,6 +91,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .enable_nmi_window = vmx_enable_nmi_window, .enable_irq_window = vmx_enable_irq_window, .update_cr8_intercept = vmx_update_cr8_intercept, + + .x2apic_icr_is_split = false, .set_virtual_apic_mode = vmx_set_virtual_apic_mode, .set_apic_access_page_addr = vmx_set_apic_access_page_addr, .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2392a7ef254d..931a7361c30f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -981,7 +981,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) __func__, i, e.index, e.reserved); goto fail; } - if (kvm_set_msr(vcpu, e.index, e.value)) { + if (kvm_set_msr_with_filter(vcpu, e.index, e.value)) { pr_debug_ratelimited( "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", __func__, i, e.index, e.value); @@ -1017,7 +1017,7 @@ static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu, } } - if (kvm_get_msr(vcpu, msr_index, data)) { + if (kvm_get_msr_with_filter(vcpu, msr_index, data)) { pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__, msr_index); return false; @@ -1112,9 +1112,9 @@ static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu, /* * Emulated VMEntry does not fail here. Instead a less * accurate value will be returned by - * nested_vmx_get_vmexit_msr_value() using kvm_get_msr() - * instead of reading the value from the vmcs02 VMExit - * MSR-store area. + * nested_vmx_get_vmexit_msr_value() by reading KVM's + * internal MSR state instead of reading the value from + * the vmcs02 VMExit MSR-store area. */ pr_warn_ratelimited( "Not enough msr entries in msr_autostore. Can't add msr %x\n", @@ -1197,11 +1197,14 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu, kvm_hv_nested_transtion_tlb_flush(vcpu, enable_ept); /* - * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings - * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a - * full TLB flush from the guest's perspective. This is required even - * if VPID is disabled in the host as KVM may need to synchronize the - * MMU in response to the guest TLB flush. + * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the + * same VPID as the host, and so architecturally, linear and combined + * mappings for VPID=0 must be flushed at VM-Enter and VM-Exit. KVM + * emulates L2 sharing L1's VPID=0 by using vpid01 while running L2, + * and so KVM must also emulate TLB flush of VPID=0, i.e. vpid01. This + * is required if VPID is disabled in KVM, as a TLB flush (there are no + * VPIDs) still occurs from L1's perspective, and KVM may need to + * synchronize the MMU in response to the guest TLB flush. * * Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use. * EPT is a special snowflake, as guest-physical mappings aren't @@ -1251,21 +1254,32 @@ static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) { - const u64 feature_and_reserved = - /* feature (except bit 48; see below) */ - BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | - /* reserved */ - BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); + const u64 feature_bits = VMX_BASIC_DUAL_MONITOR_TREATMENT | + VMX_BASIC_INOUT | + VMX_BASIC_TRUE_CTLS; + + const u64 reserved_bits = GENMASK_ULL(63, 56) | + GENMASK_ULL(47, 45) | + BIT_ULL(31); + u64 vmx_basic = vmcs_config.nested.basic; - if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) + BUILD_BUG_ON(feature_bits & reserved_bits); + + /* + * Except for 32BIT_PHYS_ADDR_ONLY, which is an anti-feature bit (has + * inverted polarity), the incoming value must not set feature bits or + * reserved bits that aren't allowed/supported by KVM. Fields, i.e. + * multi-bit values, are explicitly checked below. + */ + if (!is_bitwise_subset(vmx_basic, data, feature_bits | reserved_bits)) return -EINVAL; /* * KVM does not emulate a version of VMX that constrains physical * addresses of VMX structures (e.g. VMCS) to 32-bits. */ - if (data & BIT_ULL(48)) + if (data & VMX_BASIC_32BIT_PHYS_ADDR_ONLY) return -EINVAL; if (vmx_basic_vmcs_revision_id(vmx_basic) != @@ -1334,16 +1348,29 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) { - const u64 feature_and_reserved_bits = - /* feature */ - BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) | - BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) | - /* reserved */ - GENMASK_ULL(13, 9) | BIT_ULL(31); + const u64 feature_bits = VMX_MISC_SAVE_EFER_LMA | + VMX_MISC_ACTIVITY_HLT | + VMX_MISC_ACTIVITY_SHUTDOWN | + VMX_MISC_ACTIVITY_WAIT_SIPI | + VMX_MISC_INTEL_PT | + VMX_MISC_RDMSR_IN_SMM | + VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | + VMX_MISC_VMXOFF_BLOCK_SMI | + VMX_MISC_ZERO_LEN_INS; + + const u64 reserved_bits = BIT_ULL(31) | GENMASK_ULL(13, 9); + u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low, vmcs_config.nested.misc_high); - if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) + BUILD_BUG_ON(feature_bits & reserved_bits); + + /* + * The incoming value must not set feature bits or reserved bits that + * aren't allowed/supported by KVM. Fields, i.e. multi-bit values, are + * explicitly checked below. + */ + if (!is_bitwise_subset(vmx_misc, data, feature_bits | reserved_bits)) return -EINVAL; if ((vmx->nested.msrs.pinbased_ctls_high & @@ -2291,6 +2318,17 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx, vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA); + /* + * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the + * same VPID as the host. Emulate this behavior by using vpid01 for L2 + * if VPID is disabled in vmcs12. Note, if VPID is disabled, VM-Enter + * and VM-Exit are architecturally required to flush VPID=0, but *only* + * VPID=0. I.e. using vpid02 would be ok (so long as KVM emulates the + * required flushes), but doing so would cause KVM to over-flush. E.g. + * if L1 runs L2 X with VPID12=1, then runs L2 Y with VPID12 disabled, + * and then runs L2 X again, then KVM can and should retain TLB entries + * for VPID12=1. + */ if (enable_vpid) { if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); @@ -2317,10 +2355,12 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 /* Posted interrupts setting is only taken from vmcs12. */ vmx->nested.pi_pending = false; - if (nested_cpu_has_posted_intr(vmcs12)) + if (nested_cpu_has_posted_intr(vmcs12)) { vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; - else + } else { + vmx->nested.posted_intr_nv = -1; exec_control &= ~PIN_BASED_POSTED_INTR; + } pin_controls_set(vmx, exec_control); /* @@ -2470,6 +2510,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) { + vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector); @@ -2507,7 +2548,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); - vmx->segment_cache.bitmask = 0; + vmx_segment_cache_clear(vmx); } if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & @@ -4284,11 +4325,52 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) } if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) { + int irq; + if (block_nested_events) return -EBUSY; if (!nested_exit_on_intr(vcpu)) goto no_vmexit; - nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); + + if (!nested_exit_intr_ack_set(vcpu)) { + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); + return 0; + } + + irq = kvm_cpu_get_extint(vcpu); + if (irq != -1) { + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, + INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0); + return 0; + } + + irq = kvm_apic_has_interrupt(vcpu); + if (WARN_ON_ONCE(irq < 0)) + goto no_vmexit; + + /* + * If the IRQ is L2's PI notification vector, process posted + * interrupts for L2 instead of injecting VM-Exit, as the + * detection/morphing architecturally occurs when the IRQ is + * delivered to the CPU. Note, only interrupts that are routed + * through the local APIC trigger posted interrupt processing, + * and enabling posted interrupts requires ACK-on-exit. + */ + if (irq == vmx->nested.posted_intr_nv) { + vmx->nested.pi_pending = true; + kvm_apic_clear_irr(vcpu, irq); + goto no_vmexit; + } + + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, + INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0); + + /* + * ACK the interrupt _after_ emulating VM-Exit, as the IRQ must + * be marked as in-service in vmcs01.GUEST_INTERRUPT_STATUS.SVI + * if APICv is active. + */ + kvm_apic_ack_interrupt(vcpu, irq); return 0; } @@ -4806,7 +4888,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) goto vmabort; } - if (kvm_set_msr(vcpu, h.index, h.value)) { + if (kvm_set_msr_with_filter(vcpu, h.index, h.value)) { pr_debug_ratelimited( "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", __func__, j, h.index, h.value); @@ -4969,14 +5051,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; if (likely(!vmx->fail)) { - if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && - nested_exit_intr_ack_set(vcpu)) { - int irq = kvm_cpu_get_interrupt(vcpu); - WARN_ON(irq < 0); - vmcs12->vm_exit_intr_info = irq | - INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR; - } - if (vm_exit_reason != -1) trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, vmcs12->exit_qualification, @@ -5890,6 +5964,12 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + /* + * Always flush the effective vpid02, i.e. never flush the current VPID + * and never explicitly flush vpid01. INVVPID targets a VPID, not a + * VMCS, and so whether or not the current vmcs12 has VPID enabled is + * irrelevant (and there may not be a loaded vmcs12). + */ vpid02 = nested_get_vpid02(vcpu); switch (type) { case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: @@ -7051,7 +7131,7 @@ static void nested_vmx_setup_misc_data(struct vmcs_config *vmcs_conf, { msrs->misc_low = (u32)vmcs_conf->misc & VMX_MISC_SAVE_EFER_LMA; msrs->misc_low |= - MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | + VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | VMX_MISC_ACTIVITY_HLT | VMX_MISC_ACTIVITY_WAIT_SIPI; @@ -7066,12 +7146,10 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs) * guest, and the VMCS structure we give it - not about the * VMX support of the underlying hardware. */ - msrs->basic = - VMCS12_REVISION | - VMX_BASIC_TRUE_CTLS | - ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | - (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); + msrs->basic = vmx_basic_encode_vmcs_info(VMCS12_REVISION, VMCS12_SIZE, + X86_MEMTYPE_WB); + msrs->basic |= VMX_BASIC_TRUE_CTLS; if (cpu_has_vmx_basic_inout()) msrs->basic |= VMX_BASIC_INOUT; } diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index cce4e2aa30fb..2c296b6abb8c 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -39,11 +39,17 @@ bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) { + lockdep_assert_once(lockdep_is_held(&vcpu->mutex) || + !refcount_read(&vcpu->kvm->users_count)); + return to_vmx(vcpu)->nested.cached_vmcs12; } static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu) { + lockdep_assert_once(lockdep_is_held(&vcpu->mutex) || + !refcount_read(&vcpu->kvm->users_count)); + return to_vmx(vcpu)->nested.cached_shadow_vmcs12; } @@ -109,7 +115,7 @@ static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu) static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu) { return to_vmx(vcpu)->nested.msrs.misc_low & - MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; + VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; } static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index 6fef01e0536e..a3c3d2a51f47 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -274,7 +274,7 @@ static int handle_encls_ecreate(struct kvm_vcpu *vcpu) * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to * enforce restriction of access to the PROVISIONKEY. */ - contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT); + contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL); if (!contents) return -ENOMEM; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 733a0c45d1a6..d28618e9277e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -217,9 +217,11 @@ module_param(ple_window_shrink, uint, 0444); static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; module_param(ple_window_max, uint, 0444); -/* Default is SYSTEM mode, 1 for host-guest mode */ +/* Default is SYSTEM mode, 1 for host-guest mode (which is BROKEN) */ int __read_mostly pt_mode = PT_MODE_SYSTEM; +#ifdef CONFIG_BROKEN module_param(pt_mode, int, S_IRUGO); +#endif struct x86_pmu_lbr __ro_after_init vmx_lbr_caps; @@ -525,10 +527,6 @@ static const struct kvm_vmx_segment_field { VMX_SEGMENT_FIELD(LDTR), }; -static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) -{ - vmx->segment_cache.bitmask = 0; -} static unsigned long host_idt_base; @@ -755,7 +753,7 @@ fault: return -EIO; } -static void vmx_emergency_disable(void) +void vmx_emergency_disable_virtualization_cpu(void) { int cpu = raw_smp_processor_id(); struct loaded_vmcs *v; @@ -1998,15 +1996,15 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, return !(msr->data & ~valid_bits); } -int vmx_get_msr_feature(struct kvm_msr_entry *msr) +int vmx_get_feature_msr(u32 msr, u64 *data) { - switch (msr->index) { + switch (msr) { case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!nested) return 1; - return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); + return vmx_get_vmx_msr(&vmcs_config.nested, msr, data); default: - return KVM_MSR_RET_INVALID; + return KVM_MSR_RET_UNSUPPORTED; } } @@ -2605,13 +2603,13 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr) static int setup_vmcs_config(struct vmcs_config *vmcs_conf, struct vmx_capability *vmx_cap) { - u32 vmx_msr_low, vmx_msr_high; u32 _pin_based_exec_control = 0; u32 _cpu_based_exec_control = 0; u32 _cpu_based_2nd_exec_control = 0; u64 _cpu_based_3rd_exec_control = 0; u32 _vmexit_control = 0; u32 _vmentry_control = 0; + u64 basic_msr; u64 misc_msr; int i; @@ -2734,29 +2732,29 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, _vmexit_control &= ~x_ctrl; } - rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); + rdmsrl(MSR_IA32_VMX_BASIC, basic_msr); /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ - if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) + if (vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE) return -EIO; #ifdef CONFIG_X86_64 - /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ - if (vmx_msr_high & (1u<<16)) + /* + * KVM expects to be able to shove all legal physical addresses into + * VMCS fields for 64-bit kernels, and per the SDM, "This bit is always + * 0 for processors that support Intel 64 architecture". + */ + if (basic_msr & VMX_BASIC_32BIT_PHYS_ADDR_ONLY) return -EIO; #endif /* Require Write-Back (WB) memory type for VMCS accesses. */ - if (((vmx_msr_high >> 18) & 15) != 6) + if (vmx_basic_vmcs_mem_type(basic_msr) != X86_MEMTYPE_WB) return -EIO; rdmsrl(MSR_IA32_VMX_MISC, misc_msr); - vmcs_conf->size = vmx_msr_high & 0x1fff; - vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; - - vmcs_conf->revision_id = vmx_msr_low; - + vmcs_conf->basic = basic_msr; vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; @@ -2844,7 +2842,7 @@ fault: return -EFAULT; } -int vmx_hardware_enable(void) +int vmx_enable_virtualization_cpu(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2881,7 +2879,7 @@ static void vmclear_local_loaded_vmcss(void) __loaded_vmcs_clear(v); } -void vmx_hardware_disable(void) +void vmx_disable_virtualization_cpu(void) { vmclear_local_loaded_vmcss(); @@ -2903,13 +2901,13 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) if (!pages) return NULL; vmcs = page_address(pages); - memset(vmcs, 0, vmcs_config.size); + memset(vmcs, 0, vmx_basic_vmcs_size(vmcs_config.basic)); /* KVM supports Enlightened VMCS v1 only */ if (kvm_is_using_evmcs()) vmcs->hdr.revision_id = KVM_EVMCS_VERSION; else - vmcs->hdr.revision_id = vmcs_config.revision_id; + vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic); if (shadow) vmcs->hdr.shadow_vmcs = 1; @@ -3002,7 +3000,7 @@ static __init int alloc_kvm_area(void) * physical CPU. */ if (kvm_is_using_evmcs()) - vmcs->hdr.revision_id = vmcs_config.revision_id; + vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic); per_cpu(vmxarea, cpu) = vmcs; } @@ -3220,7 +3218,7 @@ void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu) && nested_cpu_has_vpid(get_vmcs12(vcpu))) return nested_get_vpid02(vcpu); return to_vmx(vcpu)->vpid; } @@ -4219,6 +4217,13 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); + /* + * DO NOT query the vCPU's vmcs12, as vmcs12 is dynamically allocated + * and freed, and must not be accessed outside of vcpu->mutex. The + * vCPU's cached PI NV is valid if and only if posted interrupts + * enabled in its vmcs12, i.e. checking the vector also checks that + * L1 has enabled posted interrupts for L2. + */ if (is_guest_mode(vcpu) && vector == vmx->nested.posted_intr_nv) { /* @@ -4885,9 +4890,6 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->hv_deadline_tsc = -1; kvm_set_cr8(vcpu, 0); - vmx_segment_cache_clear(vmx); - kvm_register_mark_available(vcpu, VCPU_EXREG_SEGMENTS); - seg_setup(VCPU_SREG_CS); vmcs_write16(GUEST_CS_SELECTOR, 0xf000); vmcs_writel(GUEST_CS_BASE, 0xffff0000ul); @@ -4914,6 +4916,9 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_writel(GUEST_IDTR_BASE, 0); vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); + vmx_segment_cache_clear(vmx); + kvm_register_mark_available(vcpu, VCPU_EXREG_SEGMENTS); + vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); @@ -5804,8 +5809,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK) ? PFERR_PRESENT_MASK : 0; - error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ? - PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; + if (error_code & EPT_VIOLATION_GVA_IS_VALID) + error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ? + PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; /* * Check that the GPA doesn't exceed physical memory limits, as that is @@ -7265,6 +7271,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu, return handle_fastpath_set_msr_irqoff(vcpu); case EXIT_REASON_PREEMPTION_TIMER: return handle_fastpath_preemption_timer(vcpu, force_immediate_exit); + case EXIT_REASON_HLT: + return handle_fastpath_hlt(vcpu); default: return EXIT_FASTPATH_NONE; } @@ -7965,6 +7973,7 @@ static __init void vmx_set_cpu_caps(void) kvm_cpu_cap_clear(X86_FEATURE_SGX_LC); kvm_cpu_cap_clear(X86_FEATURE_SGX1); kvm_cpu_cap_clear(X86_FEATURE_SGX2); + kvm_cpu_cap_clear(X86_FEATURE_SGX_EDECCSSA); } if (vmx_umip_emulated()) @@ -8515,7 +8524,7 @@ __init int vmx_hardware_setup(void) u64 use_timer_freq = 5000ULL * 1000 * 1000; cpu_preemption_timer_multi = - vmcs_config.misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; + vmx_misc_preemption_timer_rate(vmcs_config.misc); if (tsc_khz) use_timer_freq = (u64)tsc_khz * 1000; @@ -8582,8 +8591,6 @@ static void __vmx_exit(void) { allow_smaller_maxphyaddr = false; - cpu_emergency_unregister_virt_callback(vmx_emergency_disable); - vmx_cleanup_l1d_flush(); } @@ -8630,8 +8637,6 @@ static int __init vmx_init(void) pi_init_cpu(cpu); } - cpu_emergency_register_virt_callback(vmx_emergency_disable); - vmx_check_vmcs12_offsets(); /* diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 42498fa63abb..2325f773a20b 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -17,10 +17,6 @@ #include "run_flags.h" #include "../mmu.h" -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -#define MSR_TYPE_RW 3 - #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) #ifdef CONFIG_X86_64 @@ -756,4 +752,9 @@ static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu) return lapic_in_kernel(vcpu) && enable_ipiv; } +static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +{ + vmx->segment_cache.bitmask = 0; +} + #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/vmx/vmx_onhyperv.h b/arch/x86/kvm/vmx/vmx_onhyperv.h index eb48153bfd73..bba24ed99ee6 100644 --- a/arch/x86/kvm/vmx/vmx_onhyperv.h +++ b/arch/x86/kvm/vmx/vmx_onhyperv.h @@ -104,6 +104,14 @@ static inline void evmcs_load(u64 phys_addr) struct hv_vp_assist_page *vp_ap = hv_get_vp_assist_page(smp_processor_id()); + /* + * When enabling eVMCS, KVM verifies that every CPU has a valid hv_vp_assist_page() + * and aborts enabling the feature otherwise. CPU onlining path is also checked in + * vmx_hardware_enable(). + */ + if (KVM_BUG_ON(!vp_ap, kvm_get_running_vcpu()->kvm)) + return; + if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall) vp_ap->nested_control.features.directhypercall = 1; vp_ap->current_nested_vmcs = phys_addr; diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 8060e5fc6dbd..93e020dc88f6 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -47,7 +47,7 @@ static __always_inline void vmcs_check16(unsigned long field) BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, "16-bit accessor invalid for 64-bit high field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, - "16-bit accessor invalid for 32-bit high field"); + "16-bit accessor invalid for 32-bit field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, "16-bit accessor invalid for natural width field"); } diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index ce3221cd1d01..a55981c5216e 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -13,8 +13,9 @@ extern struct kvm_x86_init_ops vt_init_ops __initdata; void vmx_hardware_unsetup(void); int vmx_check_processor_compat(void); -int vmx_hardware_enable(void); -void vmx_hardware_disable(void); +int vmx_enable_virtualization_cpu(void); +void vmx_disable_virtualization_cpu(void); +void vmx_emergency_disable_virtualization_cpu(void); int vmx_vm_init(struct kvm *kvm); void vmx_vm_destroy(struct kvm *kvm); int vmx_vcpu_precreate(struct kvm *kvm); @@ -56,7 +57,7 @@ bool vmx_has_emulated_msr(struct kvm *kvm, u32 index); void vmx_msr_filter_changed(struct kvm_vcpu *vcpu); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); -int vmx_get_msr_feature(struct kvm_msr_entry *msr); +int vmx_get_feature_msr(u32 msr, u64 *data); int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg); void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c983c8e434b8..83fe0a78146f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -305,24 +305,237 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { static struct kmem_cache *x86_emulator_cache; /* - * When called, it means the previous get/set msr reached an invalid msr. - * Return true if we want to ignore/silent this failed msr access. + * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) track + * the set of MSRs that KVM exposes to userspace through KVM_GET_MSRS, + * KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. msrs_to_save holds MSRs that + * require host support, i.e. should be probed via RDMSR. emulated_msrs holds + * MSRs that KVM emulates without strictly requiring host support. + * msr_based_features holds MSRs that enumerate features, i.e. are effectively + * CPUID leafs. Note, msr_based_features isn't mutually exclusive with + * msrs_to_save and emulated_msrs. */ -static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write) + +static const u32 msrs_to_save_base[] = { + MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, + MSR_STAR, +#ifdef CONFIG_X86_64 + MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, +#endif + MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, + MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL, + MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, + MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, + MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B, + MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B, + MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, + MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, + MSR_IA32_UMWAIT_CONTROL, + + MSR_IA32_XFD, MSR_IA32_XFD_ERR, +}; + +static const u32 msrs_to_save_pmu[] = { + MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, + MSR_ARCH_PERFMON_FIXED_CTR0 + 2, + MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, + MSR_CORE_PERF_GLOBAL_CTRL, + MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, + + /* This part of MSRs should match KVM_MAX_NR_INTEL_GP_COUNTERS. */ + MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, + MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, + MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, + MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, + MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, + MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, + MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, + MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, + + MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, + MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, + + /* This part of MSRs should match KVM_MAX_NR_AMD_GP_COUNTERS. */ + MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2, + MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5, + MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2, + MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5, + + MSR_AMD64_PERF_CNTR_GLOBAL_CTL, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, +}; + +static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) + + ARRAY_SIZE(msrs_to_save_pmu)]; +static unsigned num_msrs_to_save; + +static const u32 emulated_msrs_all[] = { + MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, + MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, + +#ifdef CONFIG_KVM_HYPERV + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, + HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, + HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY, + HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, + HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, + HV_X64_MSR_RESET, + HV_X64_MSR_VP_INDEX, + HV_X64_MSR_VP_RUNTIME, + HV_X64_MSR_SCONTROL, + HV_X64_MSR_STIMER0_CONFIG, + HV_X64_MSR_VP_ASSIST_PAGE, + HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL, + HV_X64_MSR_TSC_EMULATION_STATUS, HV_X64_MSR_TSC_INVARIANT_CONTROL, + HV_X64_MSR_SYNDBG_OPTIONS, + HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS, + HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER, + HV_X64_MSR_SYNDBG_PENDING_BUFFER, +#endif + + MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, + MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK, + + MSR_IA32_TSC_ADJUST, + MSR_IA32_TSC_DEADLINE, + MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_PERF_CAPABILITIES, + MSR_IA32_MISC_ENABLE, + MSR_IA32_MCG_STATUS, + MSR_IA32_MCG_CTL, + MSR_IA32_MCG_EXT_CTL, + MSR_IA32_SMBASE, + MSR_SMI_COUNT, + MSR_PLATFORM_INFO, + MSR_MISC_FEATURES_ENABLES, + MSR_AMD64_VIRT_SPEC_CTRL, + MSR_AMD64_TSC_RATIO, + MSR_IA32_POWER_CTL, + MSR_IA32_UCODE_REV, + + /* + * KVM always supports the "true" VMX control MSRs, even if the host + * does not. The VMX MSRs as a whole are considered "emulated" as KVM + * doesn't strictly require them to exist in the host (ignoring that + * KVM would refuse to load in the first place if the core set of MSRs + * aren't supported). + */ + MSR_IA32_VMX_BASIC, + MSR_IA32_VMX_TRUE_PINBASED_CTLS, + MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + MSR_IA32_VMX_TRUE_EXIT_CTLS, + MSR_IA32_VMX_TRUE_ENTRY_CTLS, + MSR_IA32_VMX_MISC, + MSR_IA32_VMX_CR0_FIXED0, + MSR_IA32_VMX_CR4_FIXED0, + MSR_IA32_VMX_VMCS_ENUM, + MSR_IA32_VMX_PROCBASED_CTLS2, + MSR_IA32_VMX_EPT_VPID_CAP, + MSR_IA32_VMX_VMFUNC, + + MSR_K7_HWCR, + MSR_KVM_POLL_CONTROL, +}; + +static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)]; +static unsigned num_emulated_msrs; + +/* + * List of MSRs that control the existence of MSR-based features, i.e. MSRs + * that are effectively CPUID leafs. VMX MSRs are also included in the set of + * feature MSRs, but are handled separately to allow expedited lookups. + */ +static const u32 msr_based_features_all_except_vmx[] = { + MSR_AMD64_DE_CFG, + MSR_IA32_UCODE_REV, + MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_PERF_CAPABILITIES, +}; + +static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) + + (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)]; +static unsigned int num_msr_based_features; + +/* + * All feature MSRs except uCode revID, which tracks the currently loaded uCode + * patch, are immutable once the vCPU model is defined. + */ +static bool kvm_is_immutable_feature_msr(u32 msr) { - const char *op = write ? "wrmsr" : "rdmsr"; + int i; - if (ignore_msrs) { - if (report_ignored_msrs) - kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", - op, msr, data); - /* Mask the error */ + if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR) return true; - } else { + + for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) { + if (msr == msr_based_features_all_except_vmx[i]) + return msr != MSR_IA32_UCODE_REV; + } + + return false; +} + +static bool kvm_is_advertised_msr(u32 msr_index) +{ + unsigned int i; + + for (i = 0; i < num_msrs_to_save; i++) { + if (msrs_to_save[i] == msr_index) + return true; + } + + for (i = 0; i < num_emulated_msrs; i++) { + if (emulated_msrs[i] == msr_index) + return true; + } + + return false; +} + +typedef int (*msr_access_t)(struct kvm_vcpu *vcpu, u32 index, u64 *data, + bool host_initiated); + +static __always_inline int kvm_do_msr_access(struct kvm_vcpu *vcpu, u32 msr, + u64 *data, bool host_initiated, + enum kvm_msr_access rw, + msr_access_t msr_access_fn) +{ + const char *op = rw == MSR_TYPE_W ? "wrmsr" : "rdmsr"; + int ret; + + BUILD_BUG_ON(rw != MSR_TYPE_R && rw != MSR_TYPE_W); + + /* + * Zero the data on read failures to avoid leaking stack data to the + * guest and/or userspace, e.g. if the failure is ignored below. + */ + ret = msr_access_fn(vcpu, msr, data, host_initiated); + if (ret && rw == MSR_TYPE_R) + *data = 0; + + if (ret != KVM_MSR_RET_UNSUPPORTED) + return ret; + + /* + * Userspace is allowed to read MSRs, and write '0' to MSRs, that KVM + * advertises to userspace, even if an MSR isn't fully supported. + * Simply check that @data is '0', which covers both the write '0' case + * and all reads (in which case @data is zeroed on failure; see above). + */ + if (host_initiated && !*data && kvm_is_advertised_msr(msr)) + return 0; + + if (!ignore_msrs) { kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n", - op, msr, data); - return false; + op, msr, *data); + return ret; } + + if (report_ignored_msrs) + kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", op, msr, *data); + + return 0; } static struct kmem_cache *kvm_alloc_emulator_cache(void) @@ -355,7 +568,7 @@ static void kvm_on_user_return(struct user_return_notifier *urn) /* * Disabling irqs at this point since the following code could be - * interrupted and executed through kvm_arch_hardware_disable() + * interrupted and executed through kvm_arch_disable_virtualization_cpu() */ local_irq_save(flags); if (msrs->registered) { @@ -413,8 +626,7 @@ EXPORT_SYMBOL_GPL(kvm_find_user_return_msr); static void kvm_user_return_msr_cpu_online(void) { - unsigned int cpu = smp_processor_id(); - struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu); + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); u64 value; int i; @@ -621,12 +833,6 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto ex->payload = payload; } -/* Forcibly leave the nested mode in cases like a vCPU reset */ -static void kvm_leave_nested(struct kvm_vcpu *vcpu) -{ - kvm_x86_ops.nested_ops->leave_nested(vcpu); -} - static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error, u32 error_code, bool has_payload, unsigned long payload, bool reinject) @@ -1412,178 +1618,6 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc); /* - * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) track - * the set of MSRs that KVM exposes to userspace through KVM_GET_MSRS, - * KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. msrs_to_save holds MSRs that - * require host support, i.e. should be probed via RDMSR. emulated_msrs holds - * MSRs that KVM emulates without strictly requiring host support. - * msr_based_features holds MSRs that enumerate features, i.e. are effectively - * CPUID leafs. Note, msr_based_features isn't mutually exclusive with - * msrs_to_save and emulated_msrs. - */ - -static const u32 msrs_to_save_base[] = { - MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, - MSR_STAR, -#ifdef CONFIG_X86_64 - MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, -#endif - MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, - MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL, - MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, - MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, - MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B, - MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B, - MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, - MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, - MSR_IA32_UMWAIT_CONTROL, - - MSR_IA32_XFD, MSR_IA32_XFD_ERR, -}; - -static const u32 msrs_to_save_pmu[] = { - MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, - MSR_ARCH_PERFMON_FIXED_CTR0 + 2, - MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, - MSR_CORE_PERF_GLOBAL_CTRL, - MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, - - /* This part of MSRs should match KVM_MAX_NR_INTEL_GP_COUNTERS. */ - MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, - MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, - MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, - MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, - MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, - MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, - MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, - MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, - - MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, - MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, - - /* This part of MSRs should match KVM_MAX_NR_AMD_GP_COUNTERS. */ - MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2, - MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5, - MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2, - MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5, - - MSR_AMD64_PERF_CNTR_GLOBAL_CTL, - MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, - MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, -}; - -static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) + - ARRAY_SIZE(msrs_to_save_pmu)]; -static unsigned num_msrs_to_save; - -static const u32 emulated_msrs_all[] = { - MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, - MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, - -#ifdef CONFIG_KVM_HYPERV - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, - HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, - HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY, - HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, - HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, - HV_X64_MSR_RESET, - HV_X64_MSR_VP_INDEX, - HV_X64_MSR_VP_RUNTIME, - HV_X64_MSR_SCONTROL, - HV_X64_MSR_STIMER0_CONFIG, - HV_X64_MSR_VP_ASSIST_PAGE, - HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL, - HV_X64_MSR_TSC_EMULATION_STATUS, HV_X64_MSR_TSC_INVARIANT_CONTROL, - HV_X64_MSR_SYNDBG_OPTIONS, - HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS, - HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER, - HV_X64_MSR_SYNDBG_PENDING_BUFFER, -#endif - - MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, - MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK, - - MSR_IA32_TSC_ADJUST, - MSR_IA32_TSC_DEADLINE, - MSR_IA32_ARCH_CAPABILITIES, - MSR_IA32_PERF_CAPABILITIES, - MSR_IA32_MISC_ENABLE, - MSR_IA32_MCG_STATUS, - MSR_IA32_MCG_CTL, - MSR_IA32_MCG_EXT_CTL, - MSR_IA32_SMBASE, - MSR_SMI_COUNT, - MSR_PLATFORM_INFO, - MSR_MISC_FEATURES_ENABLES, - MSR_AMD64_VIRT_SPEC_CTRL, - MSR_AMD64_TSC_RATIO, - MSR_IA32_POWER_CTL, - MSR_IA32_UCODE_REV, - - /* - * KVM always supports the "true" VMX control MSRs, even if the host - * does not. The VMX MSRs as a whole are considered "emulated" as KVM - * doesn't strictly require them to exist in the host (ignoring that - * KVM would refuse to load in the first place if the core set of MSRs - * aren't supported). - */ - MSR_IA32_VMX_BASIC, - MSR_IA32_VMX_TRUE_PINBASED_CTLS, - MSR_IA32_VMX_TRUE_PROCBASED_CTLS, - MSR_IA32_VMX_TRUE_EXIT_CTLS, - MSR_IA32_VMX_TRUE_ENTRY_CTLS, - MSR_IA32_VMX_MISC, - MSR_IA32_VMX_CR0_FIXED0, - MSR_IA32_VMX_CR4_FIXED0, - MSR_IA32_VMX_VMCS_ENUM, - MSR_IA32_VMX_PROCBASED_CTLS2, - MSR_IA32_VMX_EPT_VPID_CAP, - MSR_IA32_VMX_VMFUNC, - - MSR_K7_HWCR, - MSR_KVM_POLL_CONTROL, -}; - -static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)]; -static unsigned num_emulated_msrs; - -/* - * List of MSRs that control the existence of MSR-based features, i.e. MSRs - * that are effectively CPUID leafs. VMX MSRs are also included in the set of - * feature MSRs, but are handled separately to allow expedited lookups. - */ -static const u32 msr_based_features_all_except_vmx[] = { - MSR_AMD64_DE_CFG, - MSR_IA32_UCODE_REV, - MSR_IA32_ARCH_CAPABILITIES, - MSR_IA32_PERF_CAPABILITIES, -}; - -static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) + - (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)]; -static unsigned int num_msr_based_features; - -/* - * All feature MSRs except uCode revID, which tracks the currently loaded uCode - * patch, are immutable once the vCPU model is defined. - */ -static bool kvm_is_immutable_feature_msr(u32 msr) -{ - int i; - - if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR) - return true; - - for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) { - if (msr == msr_based_features_all_except_vmx[i]) - return msr != MSR_IA32_UCODE_REV; - } - - return false; -} - -/* * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM * does not yet virtualize. These include: * 10 - MISC_PACKAGE_CTRLS @@ -1660,40 +1694,31 @@ static u64 kvm_get_arch_capabilities(void) return data; } -static int kvm_get_msr_feature(struct kvm_msr_entry *msr) +static int kvm_get_feature_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, + bool host_initiated) { - switch (msr->index) { + WARN_ON_ONCE(!host_initiated); + + switch (index) { case MSR_IA32_ARCH_CAPABILITIES: - msr->data = kvm_get_arch_capabilities(); + *data = kvm_get_arch_capabilities(); break; case MSR_IA32_PERF_CAPABILITIES: - msr->data = kvm_caps.supported_perf_cap; + *data = kvm_caps.supported_perf_cap; break; case MSR_IA32_UCODE_REV: - rdmsrl_safe(msr->index, &msr->data); + rdmsrl_safe(index, data); break; default: - return kvm_x86_call(get_msr_feature)(msr); + return kvm_x86_call(get_feature_msr)(index, data); } return 0; } -static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) +static int do_get_feature_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { - struct kvm_msr_entry msr; - int r; - - /* Unconditionally clear the output for simplicity */ - msr.data = 0; - msr.index = index; - r = kvm_get_msr_feature(&msr); - - if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false)) - r = 0; - - *data = msr.data; - - return r; + return kvm_do_msr_access(vcpu, index, data, true, MSR_TYPE_R, + kvm_get_feature_msr); } static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) @@ -1880,16 +1905,17 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, return kvm_x86_call(set_msr)(vcpu, &msr); } +static int _kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, + bool host_initiated) +{ + return __kvm_set_msr(vcpu, index, *data, host_initiated); +} + static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu, u32 index, u64 data, bool host_initiated) { - int ret = __kvm_set_msr(vcpu, index, data, host_initiated); - - if (ret == KVM_MSR_RET_INVALID) - if (kvm_msr_ignored_check(index, data, true)) - ret = 0; - - return ret; + return kvm_do_msr_access(vcpu, index, &data, host_initiated, MSR_TYPE_W, + _kvm_set_msr); } /* @@ -1928,31 +1954,25 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated) { - int ret = __kvm_get_msr(vcpu, index, data, host_initiated); - - if (ret == KVM_MSR_RET_INVALID) { - /* Unconditionally clear *data for simplicity */ - *data = 0; - if (kvm_msr_ignored_check(index, 0, false)) - ret = 0; - } - - return ret; + return kvm_do_msr_access(vcpu, index, data, host_initiated, MSR_TYPE_R, + __kvm_get_msr); } -static int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) +int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) { if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) return KVM_MSR_RET_FILTERED; return kvm_get_msr_ignored_check(vcpu, index, data, false); } +EXPORT_SYMBOL_GPL(kvm_get_msr_with_filter); -static int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data) +int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data) { if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) return KVM_MSR_RET_FILTERED; return kvm_set_msr_ignored_check(vcpu, index, data, false); } +EXPORT_SYMBOL_GPL(kvm_set_msr_with_filter); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) { @@ -1999,7 +2019,7 @@ static int complete_fast_rdmsr(struct kvm_vcpu *vcpu) static u64 kvm_msr_reason(int r) { switch (r) { - case KVM_MSR_RET_INVALID: + case KVM_MSR_RET_UNSUPPORTED: return KVM_MSR_EXIT_REASON_UNKNOWN; case KVM_MSR_RET_FILTERED: return KVM_MSR_EXIT_REASON_FILTER; @@ -2162,31 +2182,34 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) { u32 msr = kvm_rcx_read(vcpu); u64 data; - fastpath_t ret = EXIT_FASTPATH_NONE; + fastpath_t ret; + bool handled; kvm_vcpu_srcu_read_lock(vcpu); switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): data = kvm_read_edx_eax(vcpu); - if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) { - kvm_skip_emulated_instruction(vcpu); - ret = EXIT_FASTPATH_EXIT_HANDLED; - } + handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); break; case MSR_IA32_TSC_DEADLINE: data = kvm_read_edx_eax(vcpu); - if (!handle_fastpath_set_tscdeadline(vcpu, data)) { - kvm_skip_emulated_instruction(vcpu); - ret = EXIT_FASTPATH_REENTER_GUEST; - } + handled = !handle_fastpath_set_tscdeadline(vcpu, data); break; default: + handled = false; break; } - if (ret != EXIT_FASTPATH_NONE) + if (handled) { + if (!kvm_skip_emulated_instruction(vcpu)) + ret = EXIT_FASTPATH_EXIT_USERSPACE; + else + ret = EXIT_FASTPATH_REENTER_GUEST; trace_kvm_msr_write(msr, data); + } else { + ret = EXIT_FASTPATH_NONE; + } kvm_vcpu_srcu_read_unlock(vcpu); @@ -3746,18 +3769,6 @@ static void record_steal_time(struct kvm_vcpu *vcpu) mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } -static bool kvm_is_msr_to_save(u32 msr_index) -{ - unsigned int i; - - for (i = 0; i < num_msrs_to_save; i++) { - if (msrs_to_save[i] == msr_index) - return true; - } - - return false; -} - int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u32 msr = msr_info->index; @@ -4139,15 +4150,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (kvm_pmu_is_valid_msr(vcpu, msr)) return kvm_pmu_set_msr(vcpu, msr_info); - /* - * Userspace is allowed to write '0' to MSRs that KVM reports - * as to-be-saved, even if an MSRs isn't fully supported. - */ - if (msr_info->host_initiated && !data && - kvm_is_msr_to_save(msr)) - break; - - return KVM_MSR_RET_INVALID; + return KVM_MSR_RET_UNSUPPORTED; } return 0; } @@ -4498,17 +4501,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) return kvm_pmu_get_msr(vcpu, msr_info); - /* - * Userspace is allowed to read MSRs that KVM reports as - * to-be-saved, even if an MSR isn't fully supported. - */ - if (msr_info->host_initiated && - kvm_is_msr_to_save(msr_info->index)) { - msr_info->data = 0; - break; - } - - return KVM_MSR_RET_INVALID; + return KVM_MSR_RET_UNSUPPORTED; } return 0; } @@ -4946,7 +4939,7 @@ long kvm_arch_dev_ioctl(struct file *filp, break; } case KVM_GET_MSRS: - r = msr_io(NULL, argp, do_get_msr_feature, 1); + r = msr_io(NULL, argp, do_get_feature_msr, 1); break; #ifdef CONFIG_KVM_HYPERV case KVM_GET_SUPPORTED_HV_CPUID: @@ -7383,11 +7376,9 @@ out: static void kvm_probe_feature_msr(u32 msr_index) { - struct kvm_msr_entry msr = { - .index = msr_index, - }; + u64 data; - if (kvm_get_msr_feature(&msr)) + if (kvm_get_feature_msr(NULL, msr_index, &data, true)) return; msr_based_features[num_msr_based_features++] = msr_index; @@ -8865,60 +8856,13 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) return 1; } -static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, - int emulation_type) +static bool kvm_unprotect_and_retry_on_failure(struct kvm_vcpu *vcpu, + gpa_t cr2_or_gpa, + int emulation_type) { - gpa_t gpa = cr2_or_gpa; - kvm_pfn_t pfn; - if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) return false; - if (WARN_ON_ONCE(is_guest_mode(vcpu)) || - WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF))) - return false; - - if (!vcpu->arch.mmu->root_role.direct) { - /* - * Write permission should be allowed since only - * write access need to be emulated. - */ - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); - - /* - * If the mapping is invalid in guest, let cpu retry - * it to generate fault. - */ - if (gpa == INVALID_GPA) - return true; - } - - /* - * Do not retry the unhandleable instruction if it faults on the - * readonly host memory, otherwise it will goto a infinite loop: - * retry instruction -> write #PF -> emulation fail -> retry - * instruction -> ... - */ - pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); - - /* - * If the instruction failed on the error pfn, it can not be fixed, - * report the error to userspace. - */ - if (is_error_noslot_pfn(pfn)) - return false; - - kvm_release_pfn_clean(pfn); - - /* - * If emulation may have been triggered by a write to a shadowed page - * table, unprotect the gfn (zap any relevant SPTEs) and re-enter the - * guest to let the CPU re-execute the instruction in the hope that the - * CPU can cleanly execute the instruction that KVM failed to emulate. - */ - if (vcpu->kvm->arch.indirect_shadow_pages) - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); - /* * If the failed instruction faulted on an access to page tables that * are used to translate any part of the instruction, KVM can't resolve @@ -8929,54 +8873,24 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, * then zap the SPTE to unprotect the gfn, and then do it all over * again. Report the error to userspace. */ - return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP); -} - -static bool retry_instruction(struct x86_emulate_ctxt *ctxt, - gpa_t cr2_or_gpa, int emulation_type) -{ - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa; - - last_retry_eip = vcpu->arch.last_retry_eip; - last_retry_addr = vcpu->arch.last_retry_addr; + if (emulation_type & EMULTYPE_WRITE_PF_TO_SP) + return false; /* - * If the emulation is caused by #PF and it is non-page_table - * writing instruction, it means the VM-EXIT is caused by shadow - * page protected, we can zap the shadow page and retry this - * instruction directly. - * - * Note: if the guest uses a non-page-table modifying instruction - * on the PDE that points to the instruction, then we will unmap - * the instruction and go to an infinite loop. So, we cache the - * last retried eip and the last fault address, if we meet the eip - * and the address again, we can break out of the potential infinite - * loop. + * If emulation may have been triggered by a write to a shadowed page + * table, unprotect the gfn (zap any relevant SPTEs) and re-enter the + * guest to let the CPU re-execute the instruction in the hope that the + * CPU can cleanly execute the instruction that KVM failed to emulate. */ - vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0; - - if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) - return false; - - if (WARN_ON_ONCE(is_guest_mode(vcpu)) || - WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF))) - return false; - - if (x86_page_table_writing_insn(ctxt)) - return false; - - if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa) - return false; - - vcpu->arch.last_retry_eip = ctxt->eip; - vcpu->arch.last_retry_addr = cr2_or_gpa; - - if (!vcpu->arch.mmu->root_role.direct) - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); - - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, true); + /* + * Retry even if _this_ vCPU didn't unprotect the gfn, as it's possible + * all SPTEs were already zapped by a different task. The alternative + * is to report the error to userspace and likely terminate the guest, + * and the last_retry_{eip,addr} checks will prevent retrying the page + * fault indefinitely, i.e. there's nothing to lose by retrying. + */ return true; } @@ -9176,6 +9090,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; bool writeback = true; + if ((emulation_type & EMULTYPE_ALLOW_RETRY_PF) && + (WARN_ON_ONCE(is_guest_mode(vcpu)) || + WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))) + emulation_type &= ~EMULTYPE_ALLOW_RETRY_PF; + r = kvm_check_emulate_insn(vcpu, emulation_type, insn, insn_len); if (r != X86EMUL_CONTINUE) { if (r == X86EMUL_RETRY_INSTR || r == X86EMUL_PROPAGATE_FAULT) @@ -9206,8 +9125,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, kvm_queue_exception(vcpu, UD_VECTOR); return 1; } - if (reexecute_instruction(vcpu, cr2_or_gpa, - emulation_type)) + if (kvm_unprotect_and_retry_on_failure(vcpu, cr2_or_gpa, + emulation_type)) return 1; if (ctxt->have_exception && @@ -9254,7 +9173,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, return 1; } - if (retry_instruction(ctxt, cr2_or_gpa, emulation_type)) + /* + * If emulation was caused by a write-protection #PF on a non-page_table + * writing instruction, try to unprotect the gfn, i.e. zap shadow pages, + * and retry the instruction, as the vCPU is likely no longer using the + * gfn as a page table. + */ + if ((emulation_type & EMULTYPE_ALLOW_RETRY_PF) && + !x86_page_table_writing_insn(ctxt) && + kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa)) return 1; /* this is needed for vmware backdoor interface to work since it @@ -9285,7 +9212,8 @@ restart: return 1; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2_or_gpa, emulation_type)) + if (kvm_unprotect_and_retry_on_failure(vcpu, cr2_or_gpa, + emulation_type)) return 1; return handle_emulation_failure(vcpu, emulation_type); @@ -9753,7 +9681,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) guard(mutex)(&vendor_module_lock); - if (kvm_x86_ops.hardware_enable) { + if (kvm_x86_ops.enable_virtualization_cpu) { pr_err("already loaded vendor module '%s'\n", kvm_x86_ops.name); return -EEXIST; } @@ -9880,7 +9808,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) return 0; out_unwind_ops: - kvm_x86_ops.hardware_enable = NULL; + kvm_x86_ops.enable_virtualization_cpu = NULL; kvm_x86_call(hardware_unsetup)(); out_mmu_exit: kvm_mmu_vendor_module_exit(); @@ -9921,56 +9849,11 @@ void kvm_x86_vendor_exit(void) WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key)); #endif mutex_lock(&vendor_module_lock); - kvm_x86_ops.hardware_enable = NULL; + kvm_x86_ops.enable_virtualization_cpu = NULL; mutex_unlock(&vendor_module_lock); } EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit); -static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason) -{ - /* - * The vCPU has halted, e.g. executed HLT. Update the run state if the - * local APIC is in-kernel, the run loop will detect the non-runnable - * state and halt the vCPU. Exit to userspace if the local APIC is - * managed by userspace, in which case userspace is responsible for - * handling wake events. - */ - ++vcpu->stat.halt_exits; - if (lapic_in_kernel(vcpu)) { - vcpu->arch.mp_state = state; - return 1; - } else { - vcpu->run->exit_reason = reason; - return 0; - } -} - -int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu) -{ - return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT); -} -EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip); - -int kvm_emulate_halt(struct kvm_vcpu *vcpu) -{ - int ret = kvm_skip_emulated_instruction(vcpu); - /* - * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ - return kvm_emulate_halt_noskip(vcpu) && ret; -} -EXPORT_SYMBOL_GPL(kvm_emulate_halt); - -int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) -{ - int ret = kvm_skip_emulated_instruction(vcpu); - - return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, - KVM_EXIT_AP_RESET_HOLD) && ret; -} -EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold); - #ifdef CONFIG_X86_64 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, unsigned long clock_type) @@ -11207,6 +11090,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (vcpu->arch.apic_attention) kvm_lapic_sync_from_vapic(vcpu); + if (unlikely(exit_fastpath == EXIT_FASTPATH_EXIT_USERSPACE)) + return 0; + r = kvm_x86_call(handle_exit)(vcpu, exit_fastpath); return r; @@ -11220,6 +11106,67 @@ out: return r; } +static bool kvm_vcpu_running(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && + !vcpu->arch.apf.halted); +} + +static bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) +{ + if (!list_empty_careful(&vcpu->async_pf.done)) + return true; + + if (kvm_apic_has_pending_init_or_sipi(vcpu) && + kvm_apic_init_sipi_allowed(vcpu)) + return true; + + if (vcpu->arch.pv.pv_unhalted) + return true; + + if (kvm_is_exception_pending(vcpu)) + return true; + + if (kvm_test_request(KVM_REQ_NMI, vcpu) || + (vcpu->arch.nmi_pending && + kvm_x86_call(nmi_allowed)(vcpu, false))) + return true; + +#ifdef CONFIG_KVM_SMM + if (kvm_test_request(KVM_REQ_SMI, vcpu) || + (vcpu->arch.smi_pending && + kvm_x86_call(smi_allowed)(vcpu, false))) + return true; +#endif + + if (kvm_test_request(KVM_REQ_PMI, vcpu)) + return true; + + if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu)) + return true; + + if (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)) + return true; + + if (kvm_hv_has_stimer_pending(vcpu)) + return true; + + if (is_guest_mode(vcpu) && + kvm_x86_ops.nested_ops->has_events && + kvm_x86_ops.nested_ops->has_events(vcpu, false)) + return true; + + if (kvm_xen_has_pending_events(vcpu)) + return true; + + return false; +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); +} + /* Called within kvm->srcu read side. */ static inline int vcpu_block(struct kvm_vcpu *vcpu) { @@ -11291,12 +11238,6 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu) return 1; } -static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && - !vcpu->arch.apf.halted); -} - /* Called within kvm->srcu read side. */ static int vcpu_run(struct kvm_vcpu *vcpu) { @@ -11348,6 +11289,98 @@ static int vcpu_run(struct kvm_vcpu *vcpu) return r; } +static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason) +{ + /* + * The vCPU has halted, e.g. executed HLT. Update the run state if the + * local APIC is in-kernel, the run loop will detect the non-runnable + * state and halt the vCPU. Exit to userspace if the local APIC is + * managed by userspace, in which case userspace is responsible for + * handling wake events. + */ + ++vcpu->stat.halt_exits; + if (lapic_in_kernel(vcpu)) { + if (kvm_vcpu_has_events(vcpu)) + vcpu->arch.pv.pv_unhalted = false; + else + vcpu->arch.mp_state = state; + return 1; + } else { + vcpu->run->exit_reason = reason; + return 0; + } +} + +int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu) +{ + return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT); +} +EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip); + +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + int ret = kvm_skip_emulated_instruction(vcpu); + /* + * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered + * KVM_EXIT_DEBUG here. + */ + return kvm_emulate_halt_noskip(vcpu) && ret; +} +EXPORT_SYMBOL_GPL(kvm_emulate_halt); + +fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu) +{ + int ret; + + kvm_vcpu_srcu_read_lock(vcpu); + ret = kvm_emulate_halt(vcpu); + kvm_vcpu_srcu_read_unlock(vcpu); + + if (!ret) + return EXIT_FASTPATH_EXIT_USERSPACE; + + if (kvm_vcpu_running(vcpu)) + return EXIT_FASTPATH_REENTER_GUEST; + + return EXIT_FASTPATH_EXIT_HANDLED; +} +EXPORT_SYMBOL_GPL(handle_fastpath_hlt); + +int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) +{ + int ret = kvm_skip_emulated_instruction(vcpu); + + return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, + KVM_EXIT_AP_RESET_HOLD) && ret; +} +EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold); + +bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_apicv_active(vcpu) && + kvm_x86_call(dy_apicv_has_pending_interrupt)(vcpu); +} + +bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.preempted_in_kernel; +} + +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) +{ + if (READ_ONCE(vcpu->arch.pv.pv_unhalted)) + return true; + + if (kvm_test_request(KVM_REQ_NMI, vcpu) || +#ifdef CONFIG_KVM_SMM + kvm_test_request(KVM_REQ_SMI, vcpu) || +#endif + kvm_test_request(KVM_REQ_EVENT, vcpu)) + return true; + + return kvm_arch_dy_has_pending_interrupt(vcpu); +} + static inline int complete_emulated_io(struct kvm_vcpu *vcpu) { return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); @@ -12264,8 +12297,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); - vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; - kvm_async_pf_hash_reset(vcpu); vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap; @@ -12431,6 +12462,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (!init_event) { vcpu->arch.smbase = 0x30000; + vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; + vcpu->arch.msr_misc_features_enables = 0; vcpu->arch.ia32_misc_enable_msr = MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; @@ -12516,7 +12549,17 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) } EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector); -int kvm_arch_hardware_enable(void) +void kvm_arch_enable_virtualization(void) +{ + cpu_emergency_register_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu); +} + +void kvm_arch_disable_virtualization(void) +{ + cpu_emergency_unregister_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu); +} + +int kvm_arch_enable_virtualization_cpu(void) { struct kvm *kvm; struct kvm_vcpu *vcpu; @@ -12532,7 +12575,7 @@ int kvm_arch_hardware_enable(void) if (ret) return ret; - ret = kvm_x86_call(hardware_enable)(); + ret = kvm_x86_call(enable_virtualization_cpu)(); if (ret != 0) return ret; @@ -12612,9 +12655,9 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { - kvm_x86_call(hardware_disable)(); + kvm_x86_call(disable_virtualization_cpu)(); drop_user_return_notifiers(); } @@ -13162,87 +13205,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvm_arch_free_memslot(kvm, old); } -static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) -{ - if (!list_empty_careful(&vcpu->async_pf.done)) - return true; - - if (kvm_apic_has_pending_init_or_sipi(vcpu) && - kvm_apic_init_sipi_allowed(vcpu)) - return true; - - if (vcpu->arch.pv.pv_unhalted) - return true; - - if (kvm_is_exception_pending(vcpu)) - return true; - - if (kvm_test_request(KVM_REQ_NMI, vcpu) || - (vcpu->arch.nmi_pending && - kvm_x86_call(nmi_allowed)(vcpu, false))) - return true; - -#ifdef CONFIG_KVM_SMM - if (kvm_test_request(KVM_REQ_SMI, vcpu) || - (vcpu->arch.smi_pending && - kvm_x86_call(smi_allowed)(vcpu, false))) - return true; -#endif - - if (kvm_test_request(KVM_REQ_PMI, vcpu)) - return true; - - if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu)) - return true; - - if (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)) - return true; - - if (kvm_hv_has_stimer_pending(vcpu)) - return true; - - if (is_guest_mode(vcpu) && - kvm_x86_ops.nested_ops->has_events && - kvm_x86_ops.nested_ops->has_events(vcpu, false)) - return true; - - if (kvm_xen_has_pending_events(vcpu)) - return true; - - return false; -} - -int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) -{ - return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); -} - -bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu) -{ - return kvm_vcpu_apicv_active(vcpu) && - kvm_x86_call(dy_apicv_has_pending_interrupt)(vcpu); -} - -bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.preempted_in_kernel; -} - -bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) -{ - if (READ_ONCE(vcpu->arch.pv.pv_unhalted)) - return true; - - if (kvm_test_request(KVM_REQ_NMI, vcpu) || -#ifdef CONFIG_KVM_SMM - kvm_test_request(KVM_REQ_SMI, vcpu) || -#endif - kvm_test_request(KVM_REQ_EVENT, vcpu)) - return true; - - return kvm_arch_dy_has_pending_interrupt(vcpu); -} - bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { if (vcpu->arch.guest_state_protected) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 50596f6f8320..a84c48ef5278 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -103,11 +103,18 @@ static inline unsigned int __shrink_ple_window(unsigned int val, return max(val, min); } -#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL +#define MSR_IA32_CR_PAT_DEFAULT \ + PAT_VALUE(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC) void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); int kvm_check_nested_events(struct kvm_vcpu *vcpu); +/* Forcibly leave the nested mode in cases like a vCPU reset */ +static inline void kvm_leave_nested(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops.nested_ops->leave_nested(vcpu); +} + static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu) { return vcpu->arch.last_vmentry_cpu != -1; @@ -334,6 +341,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); +fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu); extern struct kvm_caps kvm_caps; extern struct kvm_host_values kvm_host; @@ -504,13 +512,26 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type); +enum kvm_msr_access { + MSR_TYPE_R = BIT(0), + MSR_TYPE_W = BIT(1), + MSR_TYPE_RW = MSR_TYPE_R | MSR_TYPE_W, +}; + /* * Internal error codes that are used to indicate that MSR emulation encountered - * an error that should result in #GP in the guest, unless userspace - * handles it. + * an error that should result in #GP in the guest, unless userspace handles it. + * Note, '1', '0', and negative numbers are off limits, as they are used by KVM + * as part of KVM's lightly documented internal KVM_RUN return codes. + * + * UNSUPPORTED - The MSR isn't supported, either because it is completely + * unknown to KVM, or because the MSR should not exist according + * to the vCPU model. + * + * FILTERED - Access to the MSR is denied by a userspace MSR filter. */ -#define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */ -#define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */ +#define KVM_MSR_RET_UNSUPPORTED 2 +#define KVM_MSR_RET_FILTERED 3 #define __cr4_reserved_bits(__cpu_has, __c) \ ({ \ diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 90afb488b396..b2eff07d65e4 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -16,6 +16,11 @@ cmpxchg8b (\reg) .endm +.macro read64_nonatomic reg + movl (\reg), %eax + movl 4(\reg), %edx +.endm + SYM_FUNC_START(atomic64_read_cx8) read64 %ecx RET @@ -51,7 +56,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8) movl %edx, %edi movl %ecx, %ebp - read64 %ecx + read64_nonatomic %ecx 1: movl %eax, %ebx movl %edx, %ecx @@ -79,7 +84,7 @@ addsub_return sub sub sbb SYM_FUNC_START(atomic64_\func\()_return_cx8) pushl %ebx - read64 %esi + read64_nonatomic %esi 1: movl %eax, %ebx movl %edx, %ecx diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index d066aecf8aeb..4357ec2a0bfc 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -39,8 +39,13 @@ .macro check_range size:req .if IS_ENABLED(CONFIG_X86_64) - mov %rax, %rdx - sar $63, %rdx + movq $0x0123456789abcdef,%rdx + 1: + .pushsection runtime_ptr_USER_PTR_MAX,"a" + .long 1b - 8 - . + .popsection + cmp %rax, %rdx + sbb %rdx, %rdx or %rdx, %rax .else cmp $TASK_SIZE_MAX-\size+1, %eax diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 5952ab41c60f..6ffb931b9fb1 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -13,7 +13,7 @@ #endif #include <asm/inat.h> /*__ignore_sync_check__ */ #include <asm/insn.h> /* __ignore_sync_check__ */ -#include <asm/unaligned.h> /* __ignore_sync_check__ */ +#include <linux/unaligned.h> /* __ignore_sync_check__ */ #include <linux/errno.h> #include <linux/kconfig.h> diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index f73b5ce270b3..feb8cc6a12bf 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -176,15 +176,6 @@ static inline void set_page_memtype(struct page *pg, } #endif -enum { - PAT_UC = 0, /* uncached */ - PAT_WC = 1, /* Write combining */ - PAT_WT = 4, /* Write Through */ - PAT_WP = 5, /* Write Protected */ - PAT_WB = 6, /* Write Back (default) */ - PAT_UC_MINUS = 7, /* UC, but can be overridden by MTRR */ -}; - #define CM(c) (_PAGE_CACHE_MODE_ ## c) static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val, @@ -194,13 +185,13 @@ static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val, char *cache_mode; switch (pat_val) { - case PAT_UC: cache = CM(UC); cache_mode = "UC "; break; - case PAT_WC: cache = CM(WC); cache_mode = "WC "; break; - case PAT_WT: cache = CM(WT); cache_mode = "WT "; break; - case PAT_WP: cache = CM(WP); cache_mode = "WP "; break; - case PAT_WB: cache = CM(WB); cache_mode = "WB "; break; - case PAT_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break; - default: cache = CM(WB); cache_mode = "WB "; break; + case X86_MEMTYPE_UC: cache = CM(UC); cache_mode = "UC "; break; + case X86_MEMTYPE_WC: cache = CM(WC); cache_mode = "WC "; break; + case X86_MEMTYPE_WT: cache = CM(WT); cache_mode = "WT "; break; + case X86_MEMTYPE_WP: cache = CM(WP); cache_mode = "WP "; break; + case X86_MEMTYPE_WB: cache = CM(WB); cache_mode = "WB "; break; + case X86_MEMTYPE_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break; + default: cache = CM(WB); cache_mode = "WB "; break; } memcpy(msg, cache_mode, 4); @@ -257,12 +248,6 @@ void pat_cpu_init(void) void __init pat_bp_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; -#define PAT(p0, p1, p2, p3, p4, p5, p6, p7) \ - (((u64)PAT_ ## p0) | ((u64)PAT_ ## p1 << 8) | \ - ((u64)PAT_ ## p2 << 16) | ((u64)PAT_ ## p3 << 24) | \ - ((u64)PAT_ ## p4 << 32) | ((u64)PAT_ ## p5 << 40) | \ - ((u64)PAT_ ## p6 << 48) | ((u64)PAT_ ## p7 << 56)) - if (!IS_ENABLED(CONFIG_X86_PAT)) pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n"); @@ -293,7 +278,7 @@ void __init pat_bp_init(void) * NOTE: When WC or WP is used, it is redirected to UC- per * the default setup in __cachemode2pte_tbl[]. */ - pat_msr_val = PAT(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC); + pat_msr_val = PAT_VALUE(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC); } /* @@ -328,7 +313,7 @@ void __init pat_bp_init(void) * NOTE: When WT or WP is used, it is redirected to UC- per * the default setup in __cachemode2pte_tbl[]. */ - pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WC, UC_MINUS, UC); + pat_msr_val = PAT_VALUE(WB, WC, UC_MINUS, UC, WB, WC, UC_MINUS, UC); } else { /* * Full PAT support. We put WT in slot 7 to improve @@ -356,13 +341,12 @@ void __init pat_bp_init(void) * The reserved slots are unused, but mapped to their * corresponding types in the presence of PAT errata. */ - pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WP, UC_MINUS, WT); + pat_msr_val = PAT_VALUE(WB, WC, UC_MINUS, UC, WB, WP, UC_MINUS, WT); } memory_caching_control |= CACHE_PAT; init_cache_modes(pat_msr_val); -#undef PAT } static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index f7235ef87bc3..64fca49cd88f 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -7,6 +7,7 @@ .code32 .text #define _pa(x) ((x) - __START_KERNEL_map) +#define rva(x) ((x) - pvh_start_xen) #include <linux/elfnote.h> #include <linux/init.h> @@ -15,6 +16,7 @@ #include <asm/segment.h> #include <asm/asm.h> #include <asm/boot.h> +#include <asm/pgtable.h> #include <asm/processor-flags.h> #include <asm/msr.h> #include <asm/nospec-branch.h> @@ -54,7 +56,25 @@ SYM_CODE_START_LOCAL(pvh_start_xen) UNWIND_HINT_END_OF_STACK cld - lgdt (_pa(gdt)) + /* + * See the comment for startup_32 for more details. We need to + * execute a call to get the execution address to be position + * independent, but we don't have a stack. Save and restore the + * magic field of start_info in ebx, and use that as the stack. + */ + mov (%ebx), %eax + leal 4(%ebx), %esp + ANNOTATE_INTRA_FUNCTION_CALL + call 1f +1: popl %ebp + mov %eax, (%ebx) + subl $rva(1b), %ebp + movl $0, %esp + + leal rva(gdt)(%ebp), %eax + leal rva(gdt_start)(%ebp), %ecx + movl %ecx, 2(%eax) + lgdt (%eax) mov $PVH_DS_SEL,%eax mov %eax,%ds @@ -62,14 +82,14 @@ SYM_CODE_START_LOCAL(pvh_start_xen) mov %eax,%ss /* Stash hvm_start_info. */ - mov $_pa(pvh_start_info), %edi + leal rva(pvh_start_info)(%ebp), %edi mov %ebx, %esi - mov _pa(pvh_start_info_sz), %ecx + movl rva(pvh_start_info_sz)(%ebp), %ecx shr $2,%ecx rep movsl - mov $_pa(early_stack_end), %esp + leal rva(early_stack_end)(%ebp), %esp /* Enable PAE mode. */ mov %cr4, %eax @@ -83,31 +103,86 @@ SYM_CODE_START_LOCAL(pvh_start_xen) btsl $_EFER_LME, %eax wrmsr + mov %ebp, %ebx + subl $_pa(pvh_start_xen), %ebx /* offset */ + jz .Lpagetable_done + + /* Fixup page-tables for relocation. */ + leal rva(pvh_init_top_pgt)(%ebp), %edi + movl $PTRS_PER_PGD, %ecx +2: + testl $_PAGE_PRESENT, 0x00(%edi) + jz 1f + addl %ebx, 0x00(%edi) +1: + addl $8, %edi + decl %ecx + jnz 2b + + /* L3 ident has a single entry. */ + leal rva(pvh_level3_ident_pgt)(%ebp), %edi + addl %ebx, 0x00(%edi) + + leal rva(pvh_level3_kernel_pgt)(%ebp), %edi + addl %ebx, (PAGE_SIZE - 16)(%edi) + addl %ebx, (PAGE_SIZE - 8)(%edi) + + /* pvh_level2_ident_pgt is fine - large pages */ + + /* pvh_level2_kernel_pgt needs adjustment - large pages */ + leal rva(pvh_level2_kernel_pgt)(%ebp), %edi + movl $PTRS_PER_PMD, %ecx +2: + testl $_PAGE_PRESENT, 0x00(%edi) + jz 1f + addl %ebx, 0x00(%edi) +1: + addl $8, %edi + decl %ecx + jnz 2b + +.Lpagetable_done: /* Enable pre-constructed page tables. */ - mov $_pa(init_top_pgt), %eax + leal rva(pvh_init_top_pgt)(%ebp), %eax mov %eax, %cr3 mov $(X86_CR0_PG | X86_CR0_PE), %eax mov %eax, %cr0 /* Jump to 64-bit mode. */ - ljmp $PVH_CS_SEL, $_pa(1f) + pushl $PVH_CS_SEL + leal rva(1f)(%ebp), %eax + pushl %eax + lretl /* 64-bit entry point. */ .code64 1: + UNWIND_HINT_END_OF_STACK + /* Set base address in stack canary descriptor. */ mov $MSR_GS_BASE,%ecx - mov $_pa(canary), %eax + leal canary(%rip), %eax xor %edx, %edx wrmsr + /* + * Calculate load offset and store in phys_base. __pa() needs + * phys_base set to calculate the hypercall page in xen_pvh_init(). + */ + movq %rbp, %rbx + subq $_pa(pvh_start_xen), %rbx + movq %rbx, phys_base(%rip) call xen_prepare_pvh + /* + * Clear phys_base. __startup_64 will *add* to its value, + * so reset to 0. + */ + xor %rbx, %rbx + movq %rbx, phys_base(%rip) /* startup_64 expects boot_params in %rsi. */ - mov $_pa(pvh_bootparams), %rsi - mov $_pa(startup_64), %rax - ANNOTATE_RETPOLINE_SAFE - jmp *%rax + lea pvh_bootparams(%rip), %rsi + jmp startup_64 #else /* CONFIG_X86_64 */ @@ -143,7 +218,7 @@ SYM_CODE_END(pvh_start_xen) .balign 8 SYM_DATA_START_LOCAL(gdt) .word gdt_end - gdt_start - .long _pa(gdt_start) + .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */ .word 0 SYM_DATA_END(gdt) SYM_DATA_START_LOCAL(gdt_start) @@ -163,5 +238,67 @@ SYM_DATA_START_LOCAL(early_stack) .fill BOOT_STACK_SIZE, 1, 0 SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) +#ifdef CONFIG_X86_64 +/* + * Xen PVH needs a set of identity mapped and kernel high mapping + * page tables. pvh_start_xen starts running on the identity mapped + * page tables, but xen_prepare_pvh calls into the high mapping. + * These page tables need to be relocatable and are only used until + * startup_64 transitions to init_top_pgt. + */ +SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt) + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0 + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ + .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC +SYM_DATA_END(pvh_init_top_pgt) + +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt) + .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .fill 511, 8, 0 +SYM_DATA_END(pvh_level3_ident_pgt) +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt) + /* + * Since I easily can, map the first 1G. + * Don't set NX because code runs from these pages. + * + * Note: This sets _PAGE_GLOBAL despite whether + * the CPU supports it or it is enabled. But, + * the CPU should ignore the bit. + */ + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) +SYM_DATA_END(pvh_level2_ident_pgt) +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt) + .fill L3_START_KERNEL, 8, 0 + /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ + .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .quad 0 /* no fixmap */ +SYM_DATA_END(pvh_level3_kernel_pgt) + +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt) + /* + * Kernel high mapping. + * + * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in + * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, + * 512 MiB otherwise. + * + * (NOTE: after that starts the module area, see MODULES_VADDR.) + * + * This table is eventually used by the kernel during normal runtime. + * Care must be taken to clear out undesired bits later, like _PAGE_RW + * or _PAGE_GLOBAL in some cases. + */ + PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE) +SYM_DATA_END(pvh_level2_kernel_pgt) + + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC, + .long CONFIG_PHYSICAL_ALIGN; + .long LOAD_PHYSICAL_ADDR; + .long KERNEL_IMAGE_SIZE - 1) +#endif + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, _ASM_PTR (pvh_start_xen - __START_KERNEL_map)) diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c index f2383484840d..a1ee415c008d 100644 --- a/arch/x86/um/sysrq_32.c +++ b/arch/x86/um/sysrq_32.c @@ -9,7 +9,6 @@ #include <linux/sched/debug.h> #include <linux/kallsyms.h> #include <asm/ptrace.h> -#include <asm/sysrq.h> /* This is declared by <linux/sched.h> */ void show_regs(struct pt_regs *regs) diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c index 0bf6de40abff..340d8a243c8a 100644 --- a/arch/x86/um/sysrq_64.c +++ b/arch/x86/um/sysrq_64.c @@ -12,7 +12,6 @@ #include <linux/utsname.h> #include <asm/current.h> #include <asm/ptrace.h> -#include <asm/sysrq.h> void show_regs(struct pt_regs *regs) { diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 0ce17766c0e5..9a6a943d8e41 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -173,6 +173,8 @@ static void __init __snp_fixup_e820_tables(u64 pa) e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); + if (!memblock_is_region_reserved(pa, PMD_SIZE)) + memblock_reserve(pa, PMD_SIZE); } } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 2c12ae42dc8b..d6818c6cafda 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1032,6 +1032,10 @@ static u64 xen_do_read_msr(unsigned int msr, int *err) switch (msr) { case MSR_IA32_APICBASE: val &= ~X2APIC_ENABLE; + if (smp_processor_id() == 0) + val |= MSR_IA32_APICBASE_BSP; + else + val &= ~MSR_IA32_APICBASE_BSP; break; } return val; diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 728a4366ca85..bf68c329fc01 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -4,6 +4,7 @@ #include <linux/mm.h> #include <xen/hvc-console.h> +#include <xen/acpi.h> #include <asm/bootparam.h> #include <asm/io_apic.h> @@ -28,6 +29,28 @@ bool __ro_after_init xen_pvh; EXPORT_SYMBOL_GPL(xen_pvh); +#ifdef CONFIG_XEN_DOM0 +int xen_pvh_setup_gsi(int gsi, int trigger, int polarity) +{ + int ret; + struct physdev_setup_gsi setup_gsi; + + setup_gsi.gsi = gsi; + setup_gsi.triggering = (trigger == ACPI_EDGE_SENSITIVE ? 0 : 1); + setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + + ret = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); + if (ret == -EEXIST) { + xen_raw_printk("Already setup the GSI :%d\n", gsi); + ret = 0; + } else if (ret) + xen_raw_printk("Fail to setup GSI (%d)!\n", gsi); + + return ret; +} +EXPORT_SYMBOL_GPL(xen_pvh_setup_gsi); +#endif + /* * Reserve e820 UNUSABLE regions to inflate the memory balloon. * diff --git a/arch/xtensa/include/asm/flat.h b/arch/xtensa/include/asm/flat.h index ed5870c779f9..4854419dcd86 100644 --- a/arch/xtensa/include/asm/flat.h +++ b/arch/xtensa/include/asm/flat.h @@ -2,7 +2,7 @@ #ifndef __ASM_XTENSA_FLAT_H #define __ASM_XTENSA_FLAT_H -#include <asm/unaligned.h> +#include <linux/unaligned.h> static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, u32 *addr) |