diff options
author | Jiri Kosina <jkosina@suse.cz> | 2022-12-13 14:27:16 +0100 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2022-12-13 14:27:16 +0100 |
commit | cfd1f6c16f7deadfe5269a76c1516405c4466481 (patch) | |
tree | 44c0635e57e9627d9838c0d0d5801381b93ccccd /arch | |
parent | a9d9e46c755a189ccb44d91b8cf737742a975de8 (diff) | |
parent | fd7b68b763c4dfa65e3c145c624427d5fd11202f (diff) |
Merge branch 'for-6.2/apple' into for-linus
- new quirks for select Apple keyboards (Kerem Karabay, Aditya Garg)
Diffstat (limited to 'arch')
837 files changed, 19083 insertions, 8421 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 266862428a84..8f138e580d1a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1416,6 +1416,14 @@ config DYNAMIC_SIGFRAME config HAVE_ARCH_NODE_DEV_GROUP bool +config ARCH_HAS_NONLEAF_PMD_YOUNG + bool + help + Architectures that select this option are capable of setting the + accessed bit in non-leaf PMD entries when using them as part of linear + address translations. Page table walkers that clear the accessed bit + may use this capability to reduce their search space. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile index 881cb913e23a..45158024085e 100644 --- a/arch/alpha/Makefile +++ b/arch/alpha/Makefile @@ -36,8 +36,6 @@ cflags-y += $(cpuflags-y) # BWX is most important, but we don't really want any emulation ever. KBUILD_CFLAGS += $(cflags-y) -Wa,-mev6 -head-y := arch/alpha/kernel/head.o - libs-y += arch/alpha/lib/ # export what is needed by arch/alpha/boot/Makefile diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig index 7e9336930880..6a39fe8ce9e5 100644 --- a/arch/alpha/configs/defconfig +++ b/arch/alpha/configs/defconfig @@ -65,7 +65,7 @@ CONFIG_NFSD=m CONFIG_NLS_CODEPAGE_437=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_ALPHA_LEGACY_START_ADDRESS=y CONFIG_MATHEMU=y CONFIG_CRYPTO_HMAC=y diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 43e234c518b1..714abe494e5f 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -36,8 +36,6 @@ extern void start_thread(struct pt_regs *, unsigned long, unsigned long); /* Free all resources held by a thread. */ struct task_struct; -extern void release_thread(struct task_struct *); - unsigned long __get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc) diff --git a/arch/alpha/include/asm/termios.h b/arch/alpha/include/asm/termios.h deleted file mode 100644 index b7c77bb1bfd2..000000000000 --- a/arch/alpha/include/asm/termios.h +++ /dev/null @@ -1,87 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ALPHA_TERMIOS_H -#define _ALPHA_TERMIOS_H - -#include <uapi/asm/termios.h> - -/* eof=^D eol=\0 eol2=\0 erase=del - werase=^W kill=^U reprint=^R sxtc=\0 - intr=^C quit=^\ susp=^Z <OSF/1 VDSUSP> - start=^Q stop=^S lnext=^V discard=^U - vmin=\1 vtime=\0 -*/ -#define INIT_C_CC "\004\000\000\177\027\025\022\000\003\034\032\000\021\023\026\025\001\000" - -/* - * Translate a "termio" structure into a "termios". Ugh. - */ - -#define user_termio_to_kernel_termios(a_termios, u_termio) \ -({ \ - struct ktermios *k_termios = (a_termios); \ - struct termio k_termio; \ - int canon, ret; \ - \ - ret = copy_from_user(&k_termio, u_termio, sizeof(k_termio)); \ - if (!ret) { \ - /* Overwrite only the low bits. */ \ - *(unsigned short *)&k_termios->c_iflag = k_termio.c_iflag; \ - *(unsigned short *)&k_termios->c_oflag = k_termio.c_oflag; \ - *(unsigned short *)&k_termios->c_cflag = k_termio.c_cflag; \ - *(unsigned short *)&k_termios->c_lflag = k_termio.c_lflag; \ - canon = k_termio.c_lflag & ICANON; \ - \ - k_termios->c_cc[VINTR] = k_termio.c_cc[_VINTR]; \ - k_termios->c_cc[VQUIT] = k_termio.c_cc[_VQUIT]; \ - k_termios->c_cc[VERASE] = k_termio.c_cc[_VERASE]; \ - k_termios->c_cc[VKILL] = k_termio.c_cc[_VKILL]; \ - k_termios->c_cc[VEOL2] = k_termio.c_cc[_VEOL2]; \ - k_termios->c_cc[VSWTC] = k_termio.c_cc[_VSWTC]; \ - k_termios->c_cc[canon ? VEOF : VMIN] = k_termio.c_cc[_VEOF]; \ - k_termios->c_cc[canon ? VEOL : VTIME] = k_termio.c_cc[_VEOL]; \ - } \ - ret; \ -}) - -/* - * Translate a "termios" structure into a "termio". Ugh. - * - * Note the "fun" _VMIN overloading. - */ -#define kernel_termios_to_user_termio(u_termio, a_termios) \ -({ \ - struct ktermios *k_termios = (a_termios); \ - struct termio k_termio; \ - int canon; \ - \ - k_termio.c_iflag = k_termios->c_iflag; \ - k_termio.c_oflag = k_termios->c_oflag; \ - k_termio.c_cflag = k_termios->c_cflag; \ - canon = (k_termio.c_lflag = k_termios->c_lflag) & ICANON; \ - \ - k_termio.c_line = k_termios->c_line; \ - k_termio.c_cc[_VINTR] = k_termios->c_cc[VINTR]; \ - k_termio.c_cc[_VQUIT] = k_termios->c_cc[VQUIT]; \ - k_termio.c_cc[_VERASE] = k_termios->c_cc[VERASE]; \ - k_termio.c_cc[_VKILL] = k_termios->c_cc[VKILL]; \ - k_termio.c_cc[_VEOF] = k_termios->c_cc[canon ? VEOF : VMIN]; \ - k_termio.c_cc[_VEOL] = k_termios->c_cc[canon ? VEOL : VTIME]; \ - k_termio.c_cc[_VEOL2] = k_termios->c_cc[VEOL2]; \ - k_termio.c_cc[_VSWTC] = k_termios->c_cc[VSWTC]; \ - \ - copy_to_user(u_termio, &k_termio, sizeof(k_termio)); \ -}) - -#define user_termios_to_kernel_termios(k, u) \ - copy_from_user(k, u, sizeof(struct termios2)) - -#define kernel_termios_to_user_termios(u, k) \ - copy_to_user(u, k, sizeof(struct termios2)) - -#define user_termios_to_kernel_termios_1(k, u) \ - copy_from_user(k, u, sizeof(struct termios)) - -#define kernel_termios_to_user_termios_1(u, k) \ - copy_to_user(u, k, sizeof(struct termios)) - -#endif /* _ALPHA_TERMIOS_H */ diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index 4aa996423b0d..763929e814e9 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -76,6 +76,8 @@ #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ +#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index 6a274c0d53a2..fb4efec7cbc7 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -3,13 +3,13 @@ # Makefile for the linux kernel. # -extra-y := head.o vmlinux.lds +extra-y := vmlinux.lds asflags-y := $(KBUILD_CFLAGS) ccflags-y := -Wno-sign-compare -obj-y := entry.o traps.o process.o osf_sys.o irq.o \ +obj-y := head.o entry.o traps.o process.o osf_sys.o irq.o \ irq_alpha.o signal.o setup.o ptrace.o time.o \ - systbls.o err_common.o io.o bugs.o + systbls.o err_common.o io.o bugs.o termios.o obj-$(CONFIG_VGA_HOSE) += console.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 6d0b3baf97ff..e9348aec4649 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -803,7 +803,7 @@ void __iomem *marvel_ioportmap (unsigned long addr) return (void __iomem *)addr; } -unsigned u8 +u8 marvel_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 68ec314d3fac..c54469b369cb 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1278,18 +1278,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, return addr; } -SYSCALL_DEFINE3(osf_readv, unsigned long, fd, - const struct iovec __user *, vector, unsigned long, count) -{ - return sys_readv(fd, vector, count); -} - -SYSCALL_DEFINE3(osf_writev, unsigned long, fd, - const struct iovec __user *, vector, unsigned long, count) -{ - return sys_writev(fd, vector, count); -} - SYSCALL_DEFINE2(osf_getpriority, int, which, int, who) { int prio = sys_getpriority(which, who); diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index e2e25f8b5e76..dbf1bc5e2ad2 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -225,11 +225,6 @@ flush_thread(void) current_thread_info()->pcb.unique = 0; } -void -release_thread(struct task_struct *dead_task) -{ -} - /* * Copy architecture-specific thread state */ diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index b4fbbba30aa2..33bf3a627002 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -491,9 +491,9 @@ setup_arch(char **cmdline_p) boot flags depending on the boot mode, we need some shorthand. This should do for installation. */ if (strcmp(COMMAND_LINE, "INSTALL") == 0) { - strlcpy(command_line, "root=/dev/fd0 load_ramdisk=1", sizeof command_line); + strscpy(command_line, "root=/dev/fd0 load_ramdisk=1", sizeof(command_line)); } else { - strlcpy(command_line, COMMAND_LINE, sizeof command_line); + strscpy(command_line, COMMAND_LINE, sizeof(command_line)); } strcpy(boot_command_line, command_line); *cmdline_p = command_line; diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 3515bc4f16a4..8ebacf37a8cf 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -125,8 +125,8 @@ 116 common osf_gettimeofday sys_osf_gettimeofday 117 common osf_getrusage sys_osf_getrusage 118 common getsockopt sys_getsockopt -120 common readv sys_osf_readv -121 common writev sys_osf_writev +120 common readv sys_readv +121 common writev sys_writev 122 common osf_settimeofday sys_osf_settimeofday 123 common fchown sys_fchown 124 common fchmod sys_fchmod diff --git a/arch/alpha/kernel/termios.c b/arch/alpha/kernel/termios.c new file mode 100644 index 000000000000..a4c29a22edf7 --- /dev/null +++ b/arch/alpha/kernel/termios.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/termios_internal.h> + +int user_termio_to_kernel_termios(struct ktermios *termios, + struct termio __user *termio) +{ + struct termio v; + bool canon; + + if (copy_from_user(&v, termio, sizeof(struct termio))) + return -EFAULT; + + termios->c_iflag = (0xffff0000 & termios->c_iflag) | v.c_iflag; + termios->c_oflag = (0xffff0000 & termios->c_oflag) | v.c_oflag; + termios->c_cflag = (0xffff0000 & termios->c_cflag) | v.c_cflag; + termios->c_lflag = (0xffff0000 & termios->c_lflag) | v.c_lflag; + termios->c_line = (0xffff0000 & termios->c_lflag) | v.c_line; + + canon = v.c_lflag & ICANON; + termios->c_cc[VINTR] = v.c_cc[_VINTR]; + termios->c_cc[VQUIT] = v.c_cc[_VQUIT]; + termios->c_cc[VERASE] = v.c_cc[_VERASE]; + termios->c_cc[VKILL] = v.c_cc[_VKILL]; + termios->c_cc[VEOL2] = v.c_cc[_VEOL2]; + termios->c_cc[VSWTC] = v.c_cc[_VSWTC]; + termios->c_cc[canon ? VEOF : VMIN] = v.c_cc[_VEOF]; + termios->c_cc[canon ? VEOL : VTIME] = v.c_cc[_VEOL]; + + return 0; +} + +int kernel_termios_to_user_termio(struct termio __user *termio, + struct ktermios *termios) +{ + struct termio v; + bool canon; + + memset(&v, 0, sizeof(struct termio)); + v.c_iflag = termios->c_iflag; + v.c_oflag = termios->c_oflag; + v.c_cflag = termios->c_cflag; + v.c_lflag = termios->c_lflag; + v.c_line = termios->c_line; + + canon = v.c_lflag & ICANON; + v.c_cc[_VINTR] = termios->c_cc[VINTR]; + v.c_cc[_VQUIT] = termios->c_cc[VQUIT]; + v.c_cc[_VERASE] = termios->c_cc[VERASE]; + v.c_cc[_VKILL] = termios->c_cc[VKILL]; + v.c_cc[_VEOF] = termios->c_cc[canon ? VEOF : VMIN]; + v.c_cc[_VEOL] = termios->c_cc[canon ? VEOL : VTIME]; + v.c_cc[_VEOL2] = termios->c_cc[VEOL2]; + v.c_cc[_VSWTC] = termios->c_cc[VSWTC]; + + return copy_to_user(termio, &v, sizeof(struct termio)); +} diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 9e3653253ef2..d9a13ccf89a3 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -554,7 +554,7 @@ config ARC_BUILTIN_DTB_NAME endmenu # "ARC Architecture Configuration" -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int "Maximum zone order" default "12" if ARC_HUGEPAGE_16M default "11" diff --git a/arch/arc/Makefile b/arch/arc/Makefile index efc54f3e35e0..329400a1c355 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -82,8 +82,6 @@ KBUILD_CFLAGS += $(cflags-y) KBUILD_AFLAGS += $(KBUILD_CFLAGS) KBUILD_LDFLAGS += $(ldflags-y) -head-y := arch/arc/kernel/head.o - # w/o this dtb won't embed into kernel binary core-y += arch/arc/boot/dts/ diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index d93b65008d4a..4a94d1684ed6 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -90,7 +90,7 @@ CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y CONFIG_HEADERS_INSTALL=y diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 54db9d7bb562..fb844fce1ab6 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -43,9 +43,6 @@ struct task_struct; #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_SIZE + (void *)task_stack_page(p)) - 1) -/* Free all resources held by a thread */ -#define release_thread(thread) do { } while (0) - /* * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise * get optimised away by gcc diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index 8c4fc4b54c14..0723d888ac44 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -3,7 +3,7 @@ # Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) # -obj-y := arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o +obj-y := head.o arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o obj-$(CONFIG_ISA_ARCOMPACT) += entry-compact.o intc-compact.o obj-$(CONFIG_ISA_ARCV2) += entry-arcv2.o intc-arcv2.o @@ -31,4 +31,4 @@ else obj-y += ctx_sw_asm.o endif -extra-y := vmlinux.lds head.o +extra-y := vmlinux.lds diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 22dc1d6936bc..a08c9d092a33 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1362,7 +1362,7 @@ config ARM_MODULE_PLTS Disabling this is usually safe for small single-platform configurations. If unsure, say y. -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int "Maximum zone order" default "12" if SOC_AM33XX default "9" if SA1111 @@ -1850,8 +1850,4 @@ config ARCH_HIBERNATION_POSSIBLE endmenu -if CRYPTO -source "arch/arm/crypto/Kconfig" -endif - source "arch/arm/Kconfig.assembler" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 0e04bc6b2ad3..c846119c448f 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -137,9 +137,6 @@ KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) $(arch-y) $(tune-y) -include asm/uni CHECKFLAGS += -D__arm__ -#Default value -head-y := arch/arm/kernel/head$(MMUEXT).o - # Text offset. This list is sorted numerically by address in order to # provide a means to avoid/resolve conflicts in multi-arch kernels. # Note: the 32kB below this value is reserved for use by the kernel diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi index c89092c3905b..04f98d1dbb97 100644 --- a/arch/arm/boot/dts/aspeed-g5.dtsi +++ b/arch/arm/boot/dts/aspeed-g5.dtsi @@ -262,6 +262,14 @@ quality = <100>; }; + hace: crypto@1e6e3000 { + compatible = "aspeed,ast2500-hace"; + reg = <0x1e6e3000 0x100>; + interrupts = <4>; + clocks = <&syscon ASPEED_CLK_GATE_YCLK>; + resets = <&syscon ASPEED_RESET_HACE>; + }; + gfx: display@1e6e6000 { compatible = "aspeed,ast2500-gfx", "syscon"; reg = <0x1e6e6000 0x1000>; diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi index 1387a763a6a5..ebbcfe445d9c 100644 --- a/arch/arm/boot/dts/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed-g6.dtsi @@ -323,6 +323,14 @@ #size-cells = <1>; ranges; + hace: crypto@1e6d0000 { + compatible = "aspeed,ast2600-hace"; + reg = <0x1e6d0000 0x200>; + interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&syscon ASPEED_CLK_GATE_YCLK>; + resets = <&syscon ASPEED_RESET_HACE>; + }; + syscon: syscon@1e6e2000 { compatible = "aspeed,ast2600-scu", "syscon", "simple-mfd"; reg = <0x1e6e2000 0x1000>; diff --git a/arch/arm/boot/dts/tegra30-apalis-v1.1.dtsi b/arch/arm/boot/dts/tegra30-apalis-v1.1.dtsi index 380f22a35821..a1bcd67fa505 100644 --- a/arch/arm/boot/dts/tegra30-apalis-v1.1.dtsi +++ b/arch/arm/boot/dts/tegra30-apalis-v1.1.dtsi @@ -993,7 +993,7 @@ touchscreen@41 { compatible = "st,stmpe811"; reg = <0x41>; - irq-gpio = <&gpio TEGRA_GPIO(V, 0) IRQ_TYPE_LEVEL_LOW>; + irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>; interrupt-controller; id = <0>; blocks = <0x5>; diff --git a/arch/arm/boot/dts/tegra30-apalis.dtsi b/arch/arm/boot/dts/tegra30-apalis.dtsi index 9bdc4cb71449..99d7dad72d29 100644 --- a/arch/arm/boot/dts/tegra30-apalis.dtsi +++ b/arch/arm/boot/dts/tegra30-apalis.dtsi @@ -976,7 +976,7 @@ touchscreen@41 { compatible = "st,stmpe811"; reg = <0x41>; - irq-gpio = <&gpio TEGRA_GPIO(V, 0) IRQ_TYPE_LEVEL_LOW>; + irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>; interrupt-controller; id = <0>; blocks = <0x5>; diff --git a/arch/arm/boot/dts/tegra30-colibri.dtsi b/arch/arm/boot/dts/tegra30-colibri.dtsi index 310dff05910d..2867a138e011 100644 --- a/arch/arm/boot/dts/tegra30-colibri.dtsi +++ b/arch/arm/boot/dts/tegra30-colibri.dtsi @@ -849,7 +849,7 @@ touchscreen@41 { compatible = "st,stmpe811"; reg = <0x41>; - irq-gpio = <&gpio TEGRA_GPIO(V, 0) IRQ_TYPE_LEVEL_LOW>; + irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>; interrupt-controller; id = <0>; blocks = <0x5>; diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index deb24a4bd011..31e8e0c0ee1b 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -32,7 +32,6 @@ CONFIG_KERNEL_MODE_NEON=y CONFIG_PM_DEBUG=y CONFIG_PM_ADVANCED_DEBUG=y CONFIG_ENERGY_MODEL=y -CONFIG_ARM_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM_NEON=m CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 6429c4106ab5..078d61b758a9 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -31,7 +31,7 @@ CONFIG_SOC_VF610=y CONFIG_SMP=y CONFIG_ARM_PSCI=y CONFIG_HIGHMEM=y -CONFIG_FORCE_MAX_ZONEORDER=14 +CONFIG_ARCH_FORCE_MAX_ORDER=14 CONFIG_CMDLINE="noinitrd console=ttymxc0,115200" CONFIG_KEXEC=y CONFIG_CPU_FREQ=y diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig index 6f6b5d0918f7..a2e25bf843cc 100644 --- a/arch/arm/configs/milbeaut_m10v_defconfig +++ b/arch/arm/configs/milbeaut_m10v_defconfig @@ -26,7 +26,7 @@ CONFIG_THUMB2_KERNEL=y # CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11 is not set # CONFIG_ARM_PATCH_IDIV is not set CONFIG_HIGHMEM=y -CONFIG_FORCE_MAX_ZONEORDER=12 +CONFIG_ARCH_FORCE_MAX_ORDER=12 CONFIG_SECCOMP=y CONFIG_KEXEC=y CONFIG_EFI=y @@ -44,7 +44,6 @@ CONFIG_ARM_CPUIDLE=y CONFIG_VFP=y CONFIG_NEON=y CONFIG_KERNEL_MODE_NEON=y -CONFIG_ARM_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM_NEON=m CONFIG_CRYPTO_SHA1_ARM_CE=m CONFIG_CRYPTO_SHA2_ARM_CE=m diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 23003a43ffe9..b61b2e3d116b 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -132,7 +132,6 @@ CONFIG_ARM_EXYNOS_CPUIDLE=y CONFIG_ARM_TEGRA_CPUIDLE=y CONFIG_ARM_QCOM_SPM_CPUIDLE=y CONFIG_KERNEL_MODE_NEON=y -CONFIG_ARM_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM_NEON=m CONFIG_CRYPTO_SHA1_ARM_CE=m CONFIG_CRYPTO_SHA2_ARM_CE=m @@ -1189,11 +1188,11 @@ CONFIG_TI_PIPE3=y CONFIG_TWL4030_USB=m CONFIG_RAS=y CONFIG_NVMEM_IMX_OCOTP=y -CONFIG_QCOM_QFPROM=y -CONFIG_ROCKCHIP_EFUSE=m +CONFIG_NVMEM_QCOM_QFPROM=y +CONFIG_NVMEM_ROCKCHIP_EFUSE=m CONFIG_NVMEM_SUNXI_SID=y CONFIG_NVMEM_VF610_OCOTP=y -CONFIG_MESON_MX_EFUSE=m +CONFIG_NVMEM_MESON_MX_EFUSE=m CONFIG_NVMEM_RMEM=m CONFIG_FSI=m CONFIG_FSI_MASTER_GPIO=m diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 965853c1c530..2a66850d3288 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -53,7 +53,6 @@ CONFIG_CPU_IDLE=y CONFIG_ARM_CPUIDLE=y CONFIG_KERNEL_MODE_NEON=y CONFIG_PM_DEBUG=y -CONFIG_ARM_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM_NEON=m CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m diff --git a/arch/arm/configs/oxnas_v6_defconfig b/arch/arm/configs/oxnas_v6_defconfig index d206c4f04490..70a67b3fc91b 100644 --- a/arch/arm/configs/oxnas_v6_defconfig +++ b/arch/arm/configs/oxnas_v6_defconfig @@ -12,7 +12,7 @@ CONFIG_ARCH_OXNAS=y CONFIG_MACH_OX820=y CONFIG_SMP=y CONFIG_NR_CPUS=16 -CONFIG_FORCE_MAX_ZONEORDER=12 +CONFIG_ARCH_FORCE_MAX_ORDER=12 CONFIG_SECCOMP=y CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index ca6d0049362b..d60cc9cc4c21 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -21,7 +21,7 @@ CONFIG_MACH_AKITA=y CONFIG_MACH_BORZOI=y CONFIG_PXA_SYSTEMS_CPLDS=y CONFIG_AEABI=y -CONFIG_FORCE_MAX_ZONEORDER=9 +CONFIG_ARCH_FORCE_MAX_ORDER=9 CONFIG_CMDLINE="root=/dev/ram0 ro" CONFIG_KEXEC=y CONFIG_CPU_FREQ=y @@ -34,7 +34,6 @@ CONFIG_CPUFREQ_DT=m CONFIG_ARM_PXA2xx_CPUFREQ=m CONFIG_CPU_IDLE=y CONFIG_ARM_CPUIDLE=y -CONFIG_ARM_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM=m CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig index 0afa62b92de7..7d8b6884fd00 100644 --- a/arch/arm/configs/qcom_defconfig +++ b/arch/arm/configs/qcom_defconfig @@ -283,7 +283,7 @@ CONFIG_PHY_QCOM_QMP=y CONFIG_PHY_QCOM_USB_HS=y CONFIG_PHY_QCOM_USB_SNPS_FEMTO_V2=y CONFIG_PHY_QCOM_USB_HSIC=y -CONFIG_QCOM_QFPROM=y +CONFIG_NVMEM_QCOM_QFPROM=y CONFIG_INTERCONNECT=y CONFIG_INTERCONNECT_QCOM=y CONFIG_INTERCONNECT_QCOM_MSM8974=m diff --git a/arch/arm/configs/sama7_defconfig b/arch/arm/configs/sama7_defconfig index 72af50d9e48a..8f28c9d443f0 100644 --- a/arch/arm/configs/sama7_defconfig +++ b/arch/arm/configs/sama7_defconfig @@ -19,7 +19,7 @@ CONFIG_ATMEL_CLOCKSOURCE_TCB=y # CONFIG_CACHE_L2X0 is not set # CONFIG_ARM_PATCH_IDIV is not set # CONFIG_CPU_SW_DOMAIN_PAN is not set -CONFIG_FORCE_MAX_ZONEORDER=15 +CONFIG_ARCH_FORCE_MAX_ORDER=15 CONFIG_UACCESS_WITH_MEMCPY=y # CONFIG_ATAGS is not set CONFIG_CMDLINE="console=ttyS0,115200 earlyprintk ignore_loglevel" diff --git a/arch/arm/configs/sp7021_defconfig b/arch/arm/configs/sp7021_defconfig index aa7dfd670db5..5bca2eb59b86 100644 --- a/arch/arm/configs/sp7021_defconfig +++ b/arch/arm/configs/sp7021_defconfig @@ -17,7 +17,7 @@ CONFIG_ARCH_SUNPLUS=y # CONFIG_VDSO is not set CONFIG_SMP=y CONFIG_THUMB2_KERNEL=y -CONFIG_FORCE_MAX_ZONEORDER=12 +CONFIG_ARCH_FORCE_MAX_ORDER=12 CONFIG_VFP=y CONFIG_NEON=y CONFIG_MODULES=y diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 149a5bd6b88c..3858c4d4cb98 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -1,92 +1,156 @@ # SPDX-License-Identifier: GPL-2.0 -menuconfig ARM_CRYPTO - bool "ARM Accelerated Cryptographic Algorithms" - depends on ARM +menu "Accelerated Cryptographic Algorithms for CPU (arm)" + +config CRYPTO_CURVE25519_NEON + tristate "Public key crypto: Curve25519 (NEON)" + depends on KERNEL_MODE_NEON + select CRYPTO_LIB_CURVE25519_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CURVE25519 + help + Curve25519 algorithm + + Architecture: arm with + - NEON (Advanced SIMD) extensions + +config CRYPTO_GHASH_ARM_CE + tristate "Hash functions: GHASH (PMULL/NEON/ARMv8 Crypto Extensions)" + depends on KERNEL_MODE_NEON + select CRYPTO_HASH + select CRYPTO_CRYPTD + select CRYPTO_GF128MUL help - Say Y here to choose from a selection of cryptographic algorithms - implemented using ARM specific CPU features or instructions. + GCM GHASH function (NIST SP800-38D) -if ARM_CRYPTO + Architecture: arm using + - PMULL (Polynomial Multiply Long) instructions + - NEON (Advanced SIMD) extensions + - ARMv8 Crypto Extensions + + Use an implementation of GHASH (used by the GCM AEAD chaining mode) + that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64) + that is part of the ARMv8 Crypto Extensions, or a slower variant that + uses the vmull.p8 instruction that is part of the basic NEON ISA. + +config CRYPTO_NHPOLY1305_NEON + tristate "Hash functions: NHPoly1305 (NEON)" + depends on KERNEL_MODE_NEON + select CRYPTO_NHPOLY1305 + help + NHPoly1305 hash function (Adiantum) + + Architecture: arm using: + - NEON (Advanced SIMD) extensions + +config CRYPTO_POLY1305_ARM + tristate "Hash functions: Poly1305 (NEON)" + select CRYPTO_HASH + select CRYPTO_ARCH_HAVE_LIB_POLY1305 + help + Poly1305 authenticator algorithm (RFC7539) + + Architecture: arm optionally using + - NEON (Advanced SIMD) extensions + +config CRYPTO_BLAKE2S_ARM + bool "Hash functions: BLAKE2s" + select CRYPTO_ARCH_HAVE_LIB_BLAKE2S + help + BLAKE2s cryptographic hash function (RFC 7693) + + Architecture: arm + + This is faster than the generic implementations of BLAKE2s and + BLAKE2b, but slower than the NEON implementation of BLAKE2b. + There is no NEON implementation of BLAKE2s, since NEON doesn't + really help with it. + +config CRYPTO_BLAKE2B_NEON + tristate "Hash functions: BLAKE2b (NEON)" + depends on KERNEL_MODE_NEON + select CRYPTO_BLAKE2B + help + BLAKE2b cryptographic hash function (RFC 7693) + + Architecture: arm using + - NEON (Advanced SIMD) extensions + + BLAKE2b digest algorithm optimized with ARM NEON instructions. + On ARM processors that have NEON support but not the ARMv8 + Crypto Extensions, typically this BLAKE2b implementation is + much faster than the SHA-2 family and slightly faster than + SHA-1. config CRYPTO_SHA1_ARM - tristate "SHA1 digest algorithm (ARM-asm)" + tristate "Hash functions: SHA-1" select CRYPTO_SHA1 select CRYPTO_HASH help - SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented - using optimized ARM assembler. + SHA-1 secure hash algorithm (FIPS 180) + + Architecture: arm config CRYPTO_SHA1_ARM_NEON - tristate "SHA1 digest algorithm (ARM NEON)" + tristate "Hash functions: SHA-1 (NEON)" depends on KERNEL_MODE_NEON select CRYPTO_SHA1_ARM select CRYPTO_SHA1 select CRYPTO_HASH help - SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented - using optimized ARM NEON assembly, when NEON instructions are - available. + SHA-1 secure hash algorithm (FIPS 180) + + Architecture: arm using + - NEON (Advanced SIMD) extensions config CRYPTO_SHA1_ARM_CE - tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)" + tristate "Hash functions: SHA-1 (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_SHA1_ARM select CRYPTO_HASH help - SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented - using special ARMv8 Crypto Extensions. + SHA-1 secure hash algorithm (FIPS 180) + + Architecture: arm using ARMv8 Crypto Extensions config CRYPTO_SHA2_ARM_CE - tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)" + tristate "Hash functions: SHA-224 and SHA-256 (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_SHA256_ARM select CRYPTO_HASH help - SHA-256 secure hash standard (DFIPS 180-2) implemented - using special ARMv8 Crypto Extensions. + SHA-224 and SHA-256 secure hash algorithms (FIPS 180) + + Architecture: arm using + - ARMv8 Crypto Extensions config CRYPTO_SHA256_ARM - tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)" + tristate "Hash functions: SHA-224 and SHA-256 (NEON)" select CRYPTO_HASH depends on !CPU_V7M help - SHA-256 secure hash standard (DFIPS 180-2) implemented - using optimized ARM assembler and NEON, when available. + SHA-224 and SHA-256 secure hash algorithms (FIPS 180) + + Architecture: arm using + - NEON (Advanced SIMD) extensions config CRYPTO_SHA512_ARM - tristate "SHA-384/512 digest algorithm (ARM-asm and NEON)" + tristate "Hash functions: SHA-384 and SHA-512 (NEON)" select CRYPTO_HASH depends on !CPU_V7M help - SHA-512 secure hash standard (DFIPS 180-2) implemented - using optimized ARM assembler and NEON, when available. - -config CRYPTO_BLAKE2S_ARM - bool "BLAKE2s digest algorithm (ARM)" - select CRYPTO_ARCH_HAVE_LIB_BLAKE2S - help - BLAKE2s digest algorithm optimized with ARM scalar instructions. This - is faster than the generic implementations of BLAKE2s and BLAKE2b, but - slower than the NEON implementation of BLAKE2b. (There is no NEON - implementation of BLAKE2s, since NEON doesn't really help with it.) + SHA-384 and SHA-512 secure hash algorithms (FIPS 180) -config CRYPTO_BLAKE2B_NEON - tristate "BLAKE2b digest algorithm (ARM NEON)" - depends on KERNEL_MODE_NEON - select CRYPTO_BLAKE2B - help - BLAKE2b digest algorithm optimized with ARM NEON instructions. - On ARM processors that have NEON support but not the ARMv8 - Crypto Extensions, typically this BLAKE2b implementation is - much faster than SHA-2 and slightly faster than SHA-1. + Architecture: arm using + - NEON (Advanced SIMD) extensions config CRYPTO_AES_ARM - tristate "Scalar AES cipher for ARM" + tristate "Ciphers: AES" select CRYPTO_ALGAPI select CRYPTO_AES help - Use optimized AES assembler routines for ARM platforms. + Block ciphers: AES cipher algorithms (FIPS-197) + + Architecture: arm On ARM processors without the Crypto Extensions, this is the fastest AES implementation for single blocks. For multiple @@ -98,7 +162,7 @@ config CRYPTO_AES_ARM such attacks very difficult. config CRYPTO_AES_ARM_BS - tristate "Bit sliced AES using NEON instructions" + tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (bit-sliced NEON)" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_LIB_AES @@ -106,8 +170,13 @@ config CRYPTO_AES_ARM_BS select CRYPTO_CBC select CRYPTO_SIMD help - Use a faster and more secure NEON based implementation of AES in CBC, - CTR and XTS modes + Length-preserving ciphers: AES cipher algorithms (FIPS-197) + with block cipher modes: + - ECB (Electronic Codebook) mode (NIST SP800-38A) + - CBC (Cipher Block Chaining) mode (NIST SP800-38A) + - CTR (Counter) mode (NIST SP800-38A) + - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E + and IEEE 1619) Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode and for XTS mode encryption, CBC and XTS mode decryption speedup is @@ -116,58 +185,59 @@ config CRYPTO_AES_ARM_BS believed to be invulnerable to cache timing attacks. config CRYPTO_AES_ARM_CE - tristate "Accelerated AES using ARMv8 Crypto Extensions" + tristate "Ciphers: AES, modes: ECB/CBC/CTS/CTR/XTS (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_LIB_AES select CRYPTO_SIMD help - Use an implementation of AES in CBC, CTR and XTS modes that uses - ARMv8 Crypto Extensions + Length-preserving ciphers: AES cipher algorithms (FIPS-197) + with block cipher modes: + - ECB (Electronic Codebook) mode (NIST SP800-38A) + - CBC (Cipher Block Chaining) mode (NIST SP800-38A) + - CTR (Counter) mode (NIST SP800-38A) + - CTS (Cipher Text Stealing) mode (NIST SP800-38A) + - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E + and IEEE 1619) + + Architecture: arm using: + - ARMv8 Crypto Extensions -config CRYPTO_GHASH_ARM_CE - tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON - select CRYPTO_HASH - select CRYPTO_CRYPTD - select CRYPTO_GF128MUL +config CRYPTO_CHACHA20_NEON + tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (NEON)" + select CRYPTO_SKCIPHER + select CRYPTO_ARCH_HAVE_LIB_CHACHA help - Use an implementation of GHASH (used by the GCM AEAD chaining mode) - that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64) - that is part of the ARMv8 Crypto Extensions, or a slower variant that - uses the vmull.p8 instruction that is part of the basic NEON ISA. + Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12 + stream cipher algorithms -config CRYPTO_CRCT10DIF_ARM_CE - tristate "CRCT10DIF digest algorithm using PMULL instructions" - depends on KERNEL_MODE_NEON - depends on CRC_T10DIF - select CRYPTO_HASH + Architecture: arm using: + - NEON (Advanced SIMD) extensions config CRYPTO_CRC32_ARM_CE - tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions" + tristate "CRC32C and CRC32" depends on KERNEL_MODE_NEON depends on CRC32 select CRYPTO_HASH + help + CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) + and CRC32 CRC algorithm (IEEE 802.3) -config CRYPTO_CHACHA20_NEON - tristate "NEON and scalar accelerated ChaCha stream cipher algorithms" - select CRYPTO_SKCIPHER - select CRYPTO_ARCH_HAVE_LIB_CHACHA + Architecture: arm using: + - CRC and/or PMULL instructions -config CRYPTO_POLY1305_ARM - tristate "Accelerated scalar and SIMD Poly1305 hash implementations" - select CRYPTO_HASH - select CRYPTO_ARCH_HAVE_LIB_POLY1305 + Drivers: crc32-arm-ce and crc32c-arm-ce -config CRYPTO_NHPOLY1305_NEON - tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" +config CRYPTO_CRCT10DIF_ARM_CE + tristate "CRCT10DIF" depends on KERNEL_MODE_NEON - select CRYPTO_NHPOLY1305 + depends on CRC_T10DIF + select CRYPTO_HASH + help + CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF) -config CRYPTO_CURVE25519_NEON - tristate "NEON accelerated Curve25519 scalar multiplication library" - depends on KERNEL_MODE_NEON - select CRYPTO_LIB_CURVE25519_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CURVE25519 + Architecture: arm using: + - PMULL (Polynomial Multiply Long) instructions + +endmenu -endif diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index 8754c0f5fc90..c6beb1708c64 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -9,7 +9,6 @@ struct dev_archdata { #ifdef CONFIG_ARM_DMA_USE_IOMMU struct dma_iommu_mapping *mapping; #endif - unsigned int dma_coherent:1; unsigned int dma_ops_setup:1; }; diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 3088ef72704e..4bdd930167c0 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -17,6 +17,7 @@ #ifdef CONFIG_EFI void efi_init(void); +void arm_efi_init(void); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); @@ -37,7 +38,7 @@ void efi_virtmap_load(void); void efi_virtmap_unload(void); #else -#define efi_init() +#define arm_efi_init() #endif /* CONFIG_EFI */ /* arch specific definitions used by the stub code */ diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index bdc35c0e8dfb..326864f79d18 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -81,9 +81,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, /* Forward declaration, a strange C thing */ struct task_struct; -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - unsigned long __get_wchan(struct task_struct *p); #define task_pt_regs(p) \ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 95034d32213c..48737ec800eb 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -88,7 +88,7 @@ obj-$(CONFIG_VDSO) += vdso.o obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_PARAVIRT) += paravirt.o -head-y := head$(MMUEXT).o +obj-y += head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_PATCH_PHYS_VIRT) += phys2virt.o @@ -108,4 +108,4 @@ obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o -extra-y := $(head-y) vmlinux.lds +extra-y := vmlinux.lds diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c index e57dbcc89123..e50ad7eefc02 100644 --- a/arch/arm/kernel/efi.c +++ b/arch/arm/kernel/efi.c @@ -4,6 +4,7 @@ */ #include <linux/efi.h> +#include <linux/memblock.h> #include <asm/efi.h> #include <asm/mach/map.h> #include <asm/mmu_context.h> @@ -73,3 +74,81 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) return efi_set_mapping_permissions(mm, md); return 0; } + +static unsigned long __initdata screen_info_table = EFI_INVALID_TABLE_ADDR; +static unsigned long __initdata cpu_state_table = EFI_INVALID_TABLE_ADDR; + +const efi_config_table_type_t efi_arch_tables[] __initconst = { + {LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, &screen_info_table}, + {LINUX_EFI_ARM_CPU_STATE_TABLE_GUID, &cpu_state_table}, + {} +}; + +static void __init load_screen_info_table(void) +{ + struct screen_info *si; + + if (screen_info_table != EFI_INVALID_TABLE_ADDR) { + si = early_memremap_ro(screen_info_table, sizeof(*si)); + if (!si) { + pr_err("Could not map screen_info config table\n"); + return; + } + screen_info = *si; + early_memunmap(si, sizeof(*si)); + + /* dummycon on ARM needs non-zero values for columns/lines */ + screen_info.orig_video_cols = 80; + screen_info.orig_video_lines = 25; + + if (memblock_is_map_memory(screen_info.lfb_base)) + memblock_mark_nomap(screen_info.lfb_base, + screen_info.lfb_size); + } +} + +static void __init load_cpu_state_table(void) +{ + if (cpu_state_table != EFI_INVALID_TABLE_ADDR) { + struct efi_arm_entry_state *state; + bool dump_state = true; + + state = early_memremap_ro(cpu_state_table, + sizeof(struct efi_arm_entry_state)); + if (state == NULL) { + pr_warn("Unable to map CPU entry state table.\n"); + return; + } + + if ((state->sctlr_before_ebs & 1) == 0) + pr_warn(FW_BUG "EFI stub was entered with MMU and Dcache disabled, please fix your firmware!\n"); + else if ((state->sctlr_after_ebs & 1) == 0) + pr_warn(FW_BUG "ExitBootServices() returned with MMU and Dcache disabled, please fix your firmware!\n"); + else + dump_state = false; + + if (dump_state || efi_enabled(EFI_DBG)) { + pr_info("CPSR at EFI stub entry : 0x%08x\n", + state->cpsr_before_ebs); + pr_info("SCTLR at EFI stub entry : 0x%08x\n", + state->sctlr_before_ebs); + pr_info("CPSR after ExitBootServices() : 0x%08x\n", + state->cpsr_after_ebs); + pr_info("SCTLR after ExitBootServices(): 0x%08x\n", + state->sctlr_after_ebs); + } + early_memunmap(state, sizeof(struct efi_arm_entry_state)); + } +} + +void __init arm_efi_init(void) +{ + efi_init(); + + load_screen_info_table(); + + /* ARM does not permit early mappings to persist across paging_init() */ + efi_memmap_unmap(); + + load_cpu_state_table(); +} diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 96f3fbd51764..a2b31d91a1b6 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -232,10 +232,6 @@ void flush_thread(void) thread_notify(THREAD_NOTIFY_FLUSH, thread); } -void release_thread(struct task_struct *dead_task) -{ -} - asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) @@ -375,7 +371,7 @@ static unsigned long sigpage_addr(const struct mm_struct *mm, slots = ((last - first) >> PAGE_SHIFT) + 1; - offset = get_random_int() % slots; + offset = prandom_u32_max(slots); addr = first + (offset << PAGE_SHIFT); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 1e8a50a97edf..cb88c6e69377 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1141,7 +1141,7 @@ void __init setup_arch(char **cmdline_p) #endif setup_dma_zone(mdesc); xen_early_init(); - efi_init(); + arm_efi_init(); /* * Make sure the calculation for lowmem/highmem is set appropriately * before reserving/allocating any memory diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index ea128e32e8ca..e07f359254c3 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -655,7 +655,7 @@ struct page *get_signal_page(void) PAGE_SIZE / sizeof(u32)); /* Give the signal return code some randomness */ - offset = 0x200 + (get_random_int() & 0x7fc); + offset = 0x200 + (get_random_u16() & 0x7fc); signal_return_offset = offset; /* Copy signal return handlers into the page */ diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index f4501dea98b0..60dc56d8acfb 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -19,8 +19,6 @@ #include <linux/clk/at91_pmc.h> #include <linux/platform_data/atmel.h> -#include <soc/at91/pm.h> - #include <asm/cacheflush.h> #include <asm/fncpy.h> #include <asm/system_misc.h> @@ -656,16 +654,6 @@ static int at91_pm_enter(suspend_state_t state) if (ret) return ret; -#ifdef CONFIG_PINCTRL_AT91 - /* - * FIXME: this is needed to communicate between the pinctrl driver and - * the PM implementation in the machine. Possibly part of the PM - * implementation should be moved down into the pinctrl driver and get - * called as part of the generic suspend/resume path. - */ - at91_pinctrl_gpio_suspend(); -#endif - switch (state) { case PM_SUSPEND_MEM: case PM_SUSPEND_STANDBY: @@ -690,9 +678,6 @@ static int at91_pm_enter(suspend_state_t state) } error: -#ifdef CONFIG_PINCTRL_AT91 - at91_pinctrl_gpio_resume(); -#endif at91_pm_config_quirks(false); return 0; } diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 2d58e273c96d..95e731676cea 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -22,6 +22,7 @@ #include <linux/io.h> #include <linux/gpio.h> #include <linux/leds.h> +#include <linux/uaccess.h> #include <linux/termios.h> #include <linux/amba/bus.h> #include <linux/amba/serial.h> diff --git a/arch/arm/mach-mmp/devices.c b/arch/arm/mach-mmp/devices.c index 79f4a2aa5475..9968239d8041 100644 --- a/arch/arm/mach-mmp/devices.c +++ b/arch/arm/mach-mmp/devices.c @@ -238,7 +238,7 @@ void pxa_usb_phy_deinit(void __iomem *phy_reg) static u64 __maybe_unused usb_dma_mask = ~(u32)0; #if IS_ENABLED(CONFIG_PHY_PXA_USB) -struct resource pxa168_usb_phy_resources[] = { +static struct resource pxa168_usb_phy_resources[] = { [0] = { .start = PXA168_U2O_PHYBASE, .end = PXA168_U2O_PHYBASE + USB_PHY_RANGE, @@ -259,7 +259,7 @@ struct platform_device pxa168_device_usb_phy = { #endif /* CONFIG_PHY_PXA_USB */ #if IS_ENABLED(CONFIG_USB_MV_UDC) -struct resource pxa168_u2o_resources[] = { +static struct resource pxa168_u2o_resources[] = { /* regbase */ [0] = { .start = PXA168_U2O_REGBASE + U2x_CAPREGS_OFFSET, @@ -294,7 +294,7 @@ struct platform_device pxa168_device_u2o = { #endif /* CONFIG_USB_MV_UDC */ #if IS_ENABLED(CONFIG_USB_EHCI_MV_U2O) -struct resource pxa168_u2oehci_resources[] = { +static struct resource pxa168_u2oehci_resources[] = { [0] = { .start = PXA168_U2O_REGBASE, .end = PXA168_U2O_REGBASE + USB_REG_RANGE, @@ -321,7 +321,7 @@ struct platform_device pxa168_device_u2oehci = { #endif #if IS_ENABLED(CONFIG_USB_MV_OTG) -struct resource pxa168_u2ootg_resources[] = { +static struct resource pxa168_u2ootg_resources[] = { /* regbase */ [0] = { .start = PXA168_U2O_REGBASE + U2x_CAPREGS_OFFSET, diff --git a/arch/arm/mach-omap1/board-htcherald.c b/arch/arm/mach-omap1/board-htcherald.c index ec049cee49c6..291d294b5824 100644 --- a/arch/arm/mach-omap1/board-htcherald.c +++ b/arch/arm/mach-omap1/board-htcherald.c @@ -141,13 +141,6 @@ #define HTCPLD_GPIO_DOWN_DPAD HTCPLD_BASE(7, 4) #define HTCPLD_GPIO_ENTER_DPAD HTCPLD_BASE(7, 3) -/* - * The htcpld chip requires a gpio write to a specific line - * to re-enable interrupts after one has occurred. - */ -#define HTCPLD_GPIO_INT_RESET_HI HTCPLD_BASE(2, 7) -#define HTCPLD_GPIO_INT_RESET_LO HTCPLD_BASE(2, 0) - /* Chip 5 */ #define HTCPLD_IRQ_RIGHT_KBD HTCPLD_IRQ(0, 7) #define HTCPLD_IRQ_UP_KBD HTCPLD_IRQ(0, 6) @@ -348,8 +341,6 @@ static struct htcpld_chip_platform_data htcpld_chips[] = { }; static struct htcpld_core_platform_data htcpld_pfdata = { - .int_reset_gpio_hi = HTCPLD_GPIO_INT_RESET_HI, - .int_reset_gpio_lo = HTCPLD_GPIO_INT_RESET_LO, .i2c_adapter_id = 1, .chip = htcpld_chips, diff --git a/arch/arm/mach-s3c/mach-gta02.c b/arch/arm/mach-s3c/mach-gta02.c index abfdce765525..d50a81d85ae1 100644 --- a/arch/arm/mach-s3c/mach-gta02.c +++ b/arch/arm/mach-s3c/mach-gta02.c @@ -421,7 +421,14 @@ static struct s3c2410_platform_nand __initdata gta02_nand_info = { /* Get PMU to set USB current limit accordingly. */ static struct s3c2410_udc_mach_info gta02_udc_cfg __initdata = { .vbus_draw = gta02_udc_vbus_draw, - .pullup_pin = GTA02_GPIO_USB_PULLUP, +}; + +static struct gpiod_lookup_table gta02_udc_gpio_table = { + .dev_id = "s3c2410-usbgadget", + .table = { + GPIO_LOOKUP("GPIOB", 9, "pullup", GPIO_ACTIVE_HIGH), + { }, + }, }; /* USB */ @@ -555,6 +562,7 @@ static void __init gta02_machine_init(void) s3c_gpio_cfgall_range(S3C2410_GPE(0), 5, S3C_GPIO_SFN(2), S3C_GPIO_PULL_NONE); + gpiod_add_lookup_table(>a02_udc_gpio_table); gpiod_add_lookup_table(>a02_audio_gpio_table); gpiod_add_lookup_table(>a02_mmc_gpio_table); platform_add_devices(gta02_devices, ARRAY_SIZE(gta02_devices)); diff --git a/arch/arm/mach-s3c/mach-h1940.c b/arch/arm/mach-s3c/mach-h1940.c index 032b18837855..83ac6cfdb1d8 100644 --- a/arch/arm/mach-s3c/mach-h1940.c +++ b/arch/arm/mach-s3c/mach-h1940.c @@ -167,9 +167,15 @@ static struct gpio_chip h1940_latch_gpiochip = { }; static struct s3c2410_udc_mach_info h1940_udc_cfg __initdata = { - .vbus_pin = S3C2410_GPG(5), - .vbus_pin_inverted = 1, - .pullup_pin = H1940_LATCH_USB_DP, +}; + +static struct gpiod_lookup_table h1940_udc_gpio_table = { + .dev_id = "s3c2410-usbgadget", + .table = { + GPIO_LOOKUP("GPIOG", 5, "vbus", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("H1940_LATCH", 7, "pullup", GPIO_ACTIVE_HIGH), + { }, + }, }; static struct s3c2410_ts_mach_info h1940_ts_cfg __initdata = { @@ -725,6 +731,7 @@ static void __init h1940_init(void) u32 tmp; s3c24xx_fb_set_platdata(&h1940_fb_info); + gpiod_add_lookup_table(&h1940_udc_gpio_table); gpiod_add_lookup_table(&h1940_mmc_gpio_table); gpiod_add_lookup_table(&h1940_audio_gpio_table); gpiod_add_lookup_table(&h1940_bat_gpio_table); diff --git a/arch/arm/mach-s3c/mach-jive.c b/arch/arm/mach-s3c/mach-jive.c index e32773175944..16859bb3bb13 100644 --- a/arch/arm/mach-s3c/mach-jive.c +++ b/arch/arm/mach-s3c/mach-jive.c @@ -493,7 +493,14 @@ static struct platform_device *jive_devices[] __initdata = { }; static struct s3c2410_udc_mach_info jive_udc_cfg __initdata = { - .vbus_pin = S3C2410_GPG(1), /* detect is on GPG1 */ +}; + +static struct gpiod_lookup_table jive_udc_gpio_table = { + .dev_id = "s3c2410-usbgadget", + .table = { + GPIO_LOOKUP("GPIOG", 1, "vbus", GPIO_ACTIVE_HIGH), + { }, + }, }; /* Jive power management device */ @@ -669,6 +676,7 @@ static void __init jive_machine_init(void) pm_power_off = jive_power_off; + gpiod_add_lookup_table(&jive_udc_gpio_table); gpiod_add_lookup_table(&jive_lcdspi_gpiod_table); gpiod_add_lookup_table(&jive_wm8750_gpiod_table); platform_add_devices(jive_devices, ARRAY_SIZE(jive_devices)); diff --git a/arch/arm/mach-s3c/mach-mini2440.c b/arch/arm/mach-s3c/mach-mini2440.c index a6d17ffcdba1..283be70ca622 100644 --- a/arch/arm/mach-s3c/mach-mini2440.c +++ b/arch/arm/mach-s3c/mach-mini2440.c @@ -93,9 +93,15 @@ static struct s3c2410_uartcfg mini2440_uartcfgs[] __initdata = { /* USB device UDC support */ static struct s3c2410_udc_mach_info mini2440_udc_cfg __initdata = { - .pullup_pin = S3C2410_GPC(5), }; +static struct gpiod_lookup_table mini2440_udc_gpio_table = { + .dev_id = "s3c2410-usbgadget", + .table = { + GPIO_LOOKUP("GPIOC", 5, "pullup", GPIO_ACTIVE_HIGH), + { }, + }, +}; /* LCD timing and setup */ @@ -755,6 +761,7 @@ static void __init mini2440_init(void) s3c24xx_fb_set_platdata(&mini2440_fb_info); } + gpiod_add_lookup_table(&mini2440_udc_gpio_table); s3c24xx_udc_set_platdata(&mini2440_udc_cfg); gpiod_add_lookup_table(&mini2440_mmc_gpio_table); s3c24xx_mci_set_platdata(&mini2440_mmc_cfg); diff --git a/arch/arm/mach-s3c/mach-n30.c b/arch/arm/mach-s3c/mach-n30.c index 75f5dc6351a1..90122fc6b2aa 100644 --- a/arch/arm/mach-s3c/mach-n30.c +++ b/arch/arm/mach-s3c/mach-n30.c @@ -84,9 +84,15 @@ static struct s3c2410_uartcfg n30_uartcfgs[] = { }; static struct s3c2410_udc_mach_info n30_udc_cfg __initdata = { - .vbus_pin = S3C2410_GPG(1), - .vbus_pin_inverted = 0, - .pullup_pin = S3C2410_GPB(3), +}; + +static struct gpiod_lookup_table n30_udc_gpio_table = { + .dev_id = "s3c2410-usbgadget", + .table = { + GPIO_LOOKUP("GPIOG", 1, "vbus", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("GPIOB", 3, "pullup", GPIO_ACTIVE_HIGH), + { }, + }, }; static struct gpio_keys_button n30_buttons[] = { @@ -595,6 +601,7 @@ static void __init n30_init(void) WARN_ON(gpio_request(S3C2410_GPG(4), "mmc power")); s3c24xx_fb_set_platdata(&n30_fb_info); + gpiod_add_lookup_table(&n30_udc_gpio_table); s3c24xx_udc_set_platdata(&n30_udc_cfg); gpiod_add_lookup_table(&n30_mci_gpio_table); s3c24xx_mci_set_platdata(&n30_mci_cfg); diff --git a/arch/arm/mach-s3c/mach-rx1950.c b/arch/arm/mach-s3c/mach-rx1950.c index 7a3e7c0a6484..d8c49e562660 100644 --- a/arch/arm/mach-s3c/mach-rx1950.c +++ b/arch/arm/mach-s3c/mach-rx1950.c @@ -643,9 +643,15 @@ static struct s3c2410_platform_nand rx1950_nand_info = { }; static struct s3c2410_udc_mach_info rx1950_udc_cfg __initdata = { - .vbus_pin = S3C2410_GPG(5), - .vbus_pin_inverted = 1, - .pullup_pin = S3C2410_GPJ(5), +}; + +static struct gpiod_lookup_table rx1950_udc_gpio_table = { + .dev_id = "s3c2410-usbgadget", + .table = { + GPIO_LOOKUP("GPIOG", 5, "vbus", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("GPIOJ", 5, "pullup", GPIO_ACTIVE_HIGH), + { }, + }, }; static struct s3c2410_ts_mach_info rx1950_ts_cfg __initdata = { @@ -847,6 +853,7 @@ static void __init rx1950_init_machine(void) gpio_direction_output(S3C2410_GPJ(6), 0); pwm_add_table(rx1950_pwm_lookup, ARRAY_SIZE(rx1950_pwm_lookup)); + gpiod_add_lookup_table(&rx1950_udc_gpio_table); gpiod_add_lookup_table(&rx1950_audio_gpio_table); gpiod_add_lookup_table(&rx1950_bat_gpio_table); /* Configure the I2S pins (GPE0...GPE4) in correct mode */ diff --git a/arch/arm/mach-s3c/mach-smdk2413.c b/arch/arm/mach-s3c/mach-smdk2413.c index f1f0ec174579..2b4e20aaa346 100644 --- a/arch/arm/mach-s3c/mach-smdk2413.c +++ b/arch/arm/mach-s3c/mach-smdk2413.c @@ -12,7 +12,7 @@ #include <linux/list.h> #include <linux/timer.h> #include <linux/init.h> -#include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/serial_core.h> #include <linux/serial_s3c.h> #include <linux/platform_device.h> @@ -74,9 +74,15 @@ static struct s3c2410_uartcfg smdk2413_uartcfgs[] __initdata = { static struct s3c2410_udc_mach_info smdk2413_udc_cfg __initdata = { - .pullup_pin = S3C2410_GPF(2), }; +static struct gpiod_lookup_table smdk2413_udc_gpio_table = { + .dev_id = "s3c2410-usbgadget", + .table = { + GPIO_LOOKUP("GPIOF", 2, "pullup", GPIO_ACTIVE_HIGH), + { }, + }, +}; static struct platform_device *smdk2413_devices[] __initdata = { &s3c_device_ohci, @@ -115,7 +121,7 @@ static void __init smdk2413_machine_init(void) S3C2410_MISCCR_USBSUSPND0 | S3C2410_MISCCR_USBSUSPND1, 0x0); - + gpiod_add_lookup_table(&smdk2413_udc_gpio_table); s3c24xx_udc_set_platdata(&smdk2413_udc_cfg); s3c_i2c0_set_platdata(NULL); /* Configure the I2S pins (GPE0...GPE4) in correct mode */ diff --git a/arch/arm/mach-spear/generic.h b/arch/arm/mach-spear/generic.h index 43b7996ab754..9e36920d4cfd 100644 --- a/arch/arm/mach-spear/generic.h +++ b/arch/arm/mach-spear/generic.h @@ -25,11 +25,8 @@ extern struct pl022_ssp_controller pl022_plat_data; extern struct pl08x_platform_data pl080_plat_data; void __init spear_setup_of_timer(void); -void __init spear3xx_clk_init(void __iomem *misc_base, - void __iomem *soc_config_base); void __init spear3xx_map_io(void); void __init spear3xx_dt_init_irq(void); -void __init spear6xx_clk_init(void __iomem *misc_base); void __init spear13xx_map_io(void); void __init spear13xx_l2x0_init(void); diff --git a/arch/arm/mach-spear/spear3xx.c b/arch/arm/mach-spear/spear3xx.c index 2ba406e92c41..7ef9670d3029 100644 --- a/arch/arm/mach-spear/spear3xx.c +++ b/arch/arm/mach-spear/spear3xx.c @@ -13,6 +13,7 @@ #include <linux/amba/pl022.h> #include <linux/amba/pl080.h> #include <linux/clk.h> +#include <linux/clk/spear.h> #include <linux/io.h> #include <asm/mach/map.h> #include "pl080.h" diff --git a/arch/arm/mach-spear/spear6xx.c b/arch/arm/mach-spear/spear6xx.c index 58183493e06d..f0a1e704cceb 100644 --- a/arch/arm/mach-spear/spear6xx.c +++ b/arch/arm/mach-spear/spear6xx.c @@ -12,6 +12,7 @@ #include <linux/amba/pl08x.h> #include <linux/clk.h> +#include <linux/clk/spear.h> #include <linux/err.h> #include <linux/of.h> #include <linux/of_address.h> @@ -339,7 +340,7 @@ static struct pl08x_platform_data spear6xx_pl080_plat_data = { * 0xD0000000 0xFD000000 * 0xFC000000 0xFC000000 */ -struct map_desc spear6xx_io_desc[] __initdata = { +static struct map_desc spear6xx_io_desc[] __initdata = { { .virtual = (unsigned long)VA_SPEAR6XX_ML_CPU_BASE, .pfn = __phys_to_pfn(SPEAR_ICM3_ML1_2_BASE), @@ -359,12 +360,12 @@ struct map_desc spear6xx_io_desc[] __initdata = { }; /* This will create static memory mapping for selected devices */ -void __init spear6xx_map_io(void) +static void __init spear6xx_map_io(void) { iotable_init(spear6xx_io_desc, ARRAY_SIZE(spear6xx_io_desc)); } -void __init spear6xx_timer_init(void) +static void __init spear6xx_timer_init(void) { char pclk_name[] = "pll3_clk"; struct clk *gpt_clk, *pclk; @@ -394,7 +395,7 @@ void __init spear6xx_timer_init(void) } /* Add auxdata to pass platform data */ -struct of_dev_auxdata spear6xx_auxdata_lookup[] __initdata = { +static struct of_dev_auxdata spear6xx_auxdata_lookup[] __initdata = { OF_DEV_AUXDATA("arm,pl080", SPEAR_ICM3_DMA_BASE, NULL, &spear6xx_pl080_plat_data), {} diff --git a/arch/arm/mach-versatile/integrator_ap.c b/arch/arm/mach-versatile/integrator_ap.c index e216fac917d0..4bd6712e9f52 100644 --- a/arch/arm/mach-versatile/integrator_ap.c +++ b/arch/arm/mach-versatile/integrator_ap.c @@ -11,6 +11,7 @@ #include <linux/of_irq.h> #include <linux/of_address.h> #include <linux/of_platform.h> +#include <linux/uaccess.h> #include <linux/termios.h> #include <linux/mfd/syscon.h> #include <linux/regmap.h> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ef691a5720d2..d7909091cf97 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1769,8 +1769,14 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { - dev->archdata.dma_coherent = coherent; - dev->dma_coherent = coherent; + /* + * Due to legacy code that sets the ->dma_coherent flag from a bus + * notifier we can't just assign coherent to the ->dma_coherent flag + * here, but instead have to make sure we only set but never clear it + * for now. + */ + if (coherent) + dev->dma_coherent = true; /* * Don't override the dma_ops if they have already been set. Ideally diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fb8463c028b2..505c8a1ccbe0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -209,7 +209,6 @@ config ARM64 select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_GENERIC_VDSO - select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN @@ -633,6 +632,23 @@ config ARM64_ERRATUM_1530923 config ARM64_WORKAROUND_REPEAT_TLBI bool +config ARM64_ERRATUM_2441007 + bool "Cortex-A55: Completion of affected memory accesses might not be guaranteed by completion of a TLBI" + default y + select ARM64_WORKAROUND_REPEAT_TLBI + help + This option adds a workaround for ARM Cortex-A55 erratum #2441007. + + Under very rare circumstances, affected Cortex-A55 CPUs + may not handle a race between a break-before-make sequence on one + CPU, and another CPU accessing the same page. This could allow a + store to a page that has been unmapped. + + Work around this by adding the affected CPUs to the list that needs + TLB sequences to be done twice. + + If unsure, say Y. + config ARM64_ERRATUM_1286807 bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation" default y @@ -1432,7 +1448,7 @@ config XEN help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int default "14" if ARM64_64K_PAGES default "12" if ARM64_16K_PAGES @@ -2251,6 +2267,3 @@ source "drivers/acpi/Kconfig" source "arch/arm64/kvm/Kconfig" -if CRYPTO -source "arch/arm64/crypto/Kconfig" -endif # CRYPTO diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 6d9d4a58b898..5e56d26a2239 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -133,9 +133,6 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y) CC_FLAGS_FTRACE := -fpatchable-function-entry=2 endif -# Default value -head-y := arch/arm64/kernel/head.o - ifeq ($(CONFIG_KASAN_SW_TAGS), y) KASAN_SHADOW_SCALE_SHIFT := 4 else ifeq ($(CONFIG_KASAN_GENERIC), y) @@ -151,12 +148,17 @@ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a # Default target when executing plain make boot := arch/arm64/boot + +ifeq ($(CONFIG_EFI_ZBOOT),) KBUILD_IMAGE := $(boot)/Image.gz +else +KBUILD_IMAGE := $(boot)/vmlinuz.efi +endif -all: Image.gz +all: $(notdir $(KBUILD_IMAGE)) -Image: vmlinux +Image vmlinuz.efi: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ Image.%: Image diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore index 9a7a9009d43a..af5dc61f8b43 100644 --- a/arch/arm64/boot/.gitignore +++ b/arch/arm64/boot/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only Image Image.gz +vmlinuz* diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index a0e3dedd2883..c65aee088410 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -38,3 +38,9 @@ $(obj)/Image.lzo: $(obj)/Image FORCE $(obj)/Image.zst: $(obj)/Image FORCE $(call if_changed,zstd) + +EFI_ZBOOT_PAYLOAD := Image +EFI_ZBOOT_BFD_TARGET := elf64-littleaarch64 +EFI_ZBOOT_MACH_TYPE := ARM64 + +include $(srctree)/drivers/firmware/efi/libstub/Makefile.zboot diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 53493dc7d976..bb916a0948a8 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1299,7 +1299,7 @@ interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>; phys = <&usb3_phy0>, <&usb3_phy0>; phy-names = "usb2-phy", "usb3-phy"; - snps,dis-u2-freeclk-exists-quirk; + snps,gfladj-refclk-lpm-sel-quirk; }; }; @@ -1341,7 +1341,7 @@ interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>; phys = <&usb3_phy1>, <&usb3_phy1>; phy-names = "usb2-phy", "usb3-phy"; - snps,dis-u2-freeclk-exists-quirk; + snps,gfladj-refclk-lpm-sel-quirk; }; }; diff --git a/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi b/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi index 63e7a39e100e..8166e3c1ff4e 100644 --- a/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi +++ b/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi @@ -5,7 +5,7 @@ #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/input/input.h> -#include <dt-bindings/clk/versaclock.h> +#include <dt-bindings/clock/versaclock.h> / { backlight_lvds: backlight-lvds { diff --git a/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi b/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi index f5c1d74b738b..d3fc8ffd5b4c 100644 --- a/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi +++ b/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi @@ -4,7 +4,7 @@ */ #include <dt-bindings/gpio/gpio.h> -#include <dt-bindings/clk/versaclock.h> +#include <dt-bindings/clock/versaclock.h> / { memory@48000000 { diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 7b49c80286be..0b6af3348e79 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -112,7 +112,6 @@ CONFIG_ACPI_APEI_MEMORY_FAILURE=y CONFIG_ACPI_APEI_EINJ=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y -CONFIG_ARM64_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM64_CE=y CONFIG_CRYPTO_SHA2_ARM64_CE=y CONFIG_CRYPTO_SHA512_ARM64_CE=m @@ -1268,12 +1267,12 @@ CONFIG_QCOM_L3_PMU=y CONFIG_HISI_PMU=y CONFIG_NVMEM_IMX_OCOTP=y CONFIG_NVMEM_IMX_OCOTP_SCU=y -CONFIG_MTK_EFUSE=y -CONFIG_QCOM_QFPROM=y -CONFIG_ROCKCHIP_EFUSE=y +CONFIG_NVMEM_MTK_EFUSE=y +CONFIG_NVMEM_QCOM_QFPROM=y +CONFIG_NVMEM_ROCKCHIP_EFUSE=y CONFIG_NVMEM_SUNXI_SID=y -CONFIG_UNIPHIER_EFUSE=y -CONFIG_MESON_EFUSE=m +CONFIG_NVMEM_UNIPHIER_EFUSE=y +CONFIG_NVMEM_MESON_EFUSE=m CONFIG_NVMEM_RMEM=m CONFIG_NVMEM_LAYERSCAPE_SFP=m CONFIG_FPGA=y diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 60db5bb2ddda..8bd80508a710 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -1,141 +1,282 @@ # SPDX-License-Identifier: GPL-2.0 -menuconfig ARM64_CRYPTO - bool "ARM64 Accelerated Cryptographic Algorithms" - depends on ARM64 +menu "Accelerated Cryptographic Algorithms for CPU (arm64)" + +config CRYPTO_GHASH_ARM64_CE + tristate "Hash functions: GHASH (ARMv8 Crypto Extensions)" + depends on KERNEL_MODE_NEON + select CRYPTO_HASH + select CRYPTO_GF128MUL + select CRYPTO_LIB_AES + select CRYPTO_AEAD help - Say Y here to choose from a selection of cryptographic algorithms - implemented using ARM64 specific CPU features or instructions. + GCM GHASH function (NIST SP800-38D) -if ARM64_CRYPTO + Architecture: arm64 using: + - ARMv8 Crypto Extensions -config CRYPTO_SHA256_ARM64 - tristate "SHA-224/SHA-256 digest algorithm for arm64" - select CRYPTO_HASH +config CRYPTO_NHPOLY1305_NEON + tristate "Hash functions: NHPoly1305 (NEON)" + depends on KERNEL_MODE_NEON + select CRYPTO_NHPOLY1305 + help + NHPoly1305 hash function (Adiantum) -config CRYPTO_SHA512_ARM64 - tristate "SHA-384/SHA-512 digest algorithm for arm64" + Architecture: arm64 using: + - NEON (Advanced SIMD) extensions + +config CRYPTO_POLY1305_NEON + tristate "Hash functions: Poly1305 (NEON)" + depends on KERNEL_MODE_NEON select CRYPTO_HASH + select CRYPTO_ARCH_HAVE_LIB_POLY1305 + help + Poly1305 authenticator algorithm (RFC7539) + + Architecture: arm64 using: + - NEON (Advanced SIMD) extensions config CRYPTO_SHA1_ARM64_CE - tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)" + tristate "Hash functions: SHA-1 (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_SHA1 + help + SHA-1 secure hash algorithm (FIPS 180) + + Architecture: arm64 using: + - ARMv8 Crypto Extensions + +config CRYPTO_SHA256_ARM64 + tristate "Hash functions: SHA-224 and SHA-256" + select CRYPTO_HASH + help + SHA-224 and SHA-256 secure hash algorithms (FIPS 180) + + Architecture: arm64 config CRYPTO_SHA2_ARM64_CE - tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)" + tristate "Hash functions: SHA-224 and SHA-256 (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_SHA256_ARM64 + help + SHA-224 and SHA-256 secure hash algorithms (FIPS 180) + + Architecture: arm64 using: + - ARMv8 Crypto Extensions + +config CRYPTO_SHA512_ARM64 + tristate "Hash functions: SHA-384 and SHA-512" + select CRYPTO_HASH + help + SHA-384 and SHA-512 secure hash algorithms (FIPS 180) + + Architecture: arm64 config CRYPTO_SHA512_ARM64_CE - tristate "SHA-384/SHA-512 digest algorithm (ARMv8 Crypto Extensions)" + tristate "Hash functions: SHA-384 and SHA-512 (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_SHA512_ARM64 + help + SHA-384 and SHA-512 secure hash algorithms (FIPS 180) + + Architecture: arm64 using: + - ARMv8 Crypto Extensions config CRYPTO_SHA3_ARM64 - tristate "SHA3 digest algorithm (ARMv8.2 Crypto Extensions)" + tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_SHA3 + help + SHA-3 secure hash algorithms (FIPS 202) + + Architecture: arm64 using: + - ARMv8.2 Crypto Extensions config CRYPTO_SM3_ARM64_CE - tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)" + tristate "Hash functions: SM3 (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_SM3 + help + SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012) -config CRYPTO_SM4_ARM64_CE - tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_ALGAPI - select CRYPTO_SM4 - -config CRYPTO_SM4_ARM64_CE_BLK - tristate "SM4 in ECB/CBC/CFB/CTR modes using ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON - select CRYPTO_SKCIPHER - select CRYPTO_SM4 - -config CRYPTO_SM4_ARM64_NEON_BLK - tristate "SM4 in ECB/CBC/CFB/CTR modes using NEON instructions" - depends on KERNEL_MODE_NEON - select CRYPTO_SKCIPHER - select CRYPTO_SM4 - -config CRYPTO_GHASH_ARM64_CE - tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON - select CRYPTO_HASH - select CRYPTO_GF128MUL - select CRYPTO_LIB_AES - select CRYPTO_AEAD + Architecture: arm64 using: + - ARMv8.2 Crypto Extensions config CRYPTO_POLYVAL_ARM64_CE - tristate "POLYVAL using ARMv8 Crypto Extensions (for HCTR2)" + tristate "Hash functions: POLYVAL (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_POLYVAL + help + POLYVAL hash function for HCTR2 -config CRYPTO_CRCT10DIF_ARM64_CE - tristate "CRCT10DIF digest algorithm using PMULL instructions" - depends on KERNEL_MODE_NEON && CRC_T10DIF - select CRYPTO_HASH + Architecture: arm64 using: + - ARMv8 Crypto Extensions config CRYPTO_AES_ARM64 - tristate "AES core cipher using scalar instructions" + tristate "Ciphers: AES, modes: ECB, CBC, CTR, CTS, XCTR, XTS" select CRYPTO_AES + help + Block ciphers: AES cipher algorithms (FIPS-197) + Length-preserving ciphers: AES with ECB, CBC, CTR, CTS, + XCTR, and XTS modes + AEAD cipher: AES with CBC, ESSIV, and SHA-256 + for fscrypt and dm-crypt + + Architecture: arm64 config CRYPTO_AES_ARM64_CE - tristate "AES core cipher using ARMv8 Crypto Extensions" + tristate "Ciphers: AES (ARMv8 Crypto Extensions)" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_ALGAPI select CRYPTO_LIB_AES + help + Block ciphers: AES cipher algorithms (FIPS-197) -config CRYPTO_AES_ARM64_CE_CCM - tristate "AES in CCM mode using ARMv8 Crypto Extensions" - depends on ARM64 && KERNEL_MODE_NEON - select CRYPTO_ALGAPI - select CRYPTO_AES_ARM64_CE - select CRYPTO_AEAD - select CRYPTO_LIB_AES + Architecture: arm64 using: + - ARMv8 Crypto Extensions config CRYPTO_AES_ARM64_CE_BLK - tristate "AES in ECB/CBC/CTR/XTS/XCTR modes using ARMv8 Crypto Extensions" + tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_AES_ARM64_CE + help + Length-preserving ciphers: AES cipher algorithms (FIPS-197) + with block cipher modes: + - ECB (Electronic Codebook) mode (NIST SP800-38A) + - CBC (Cipher Block Chaining) mode (NIST SP800-38A) + - CTR (Counter) mode (NIST SP800-38A) + - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E + and IEEE 1619) + + Architecture: arm64 using: + - ARMv8 Crypto Extensions config CRYPTO_AES_ARM64_NEON_BLK - tristate "AES in ECB/CBC/CTR/XTS/XCTR modes using NEON instructions" + tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (NEON)" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_LIB_AES + help + Length-preserving ciphers: AES cipher algorithms (FIPS-197) + with block cipher modes: + - ECB (Electronic Codebook) mode (NIST SP800-38A) + - CBC (Cipher Block Chaining) mode (NIST SP800-38A) + - CTR (Counter) mode (NIST SP800-38A) + - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E + and IEEE 1619) + + Architecture: arm64 using: + - NEON (Advanced SIMD) extensions config CRYPTO_CHACHA20_NEON - tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions" + tristate "Ciphers: ChaCha (NEON)" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA + help + Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12 + stream cipher algorithms -config CRYPTO_POLY1305_NEON - tristate "Poly1305 hash function using scalar or NEON instructions" + Architecture: arm64 using: + - NEON (Advanced SIMD) extensions + +config CRYPTO_AES_ARM64_BS + tristate "Ciphers: AES, modes: ECB/CBC/CTR/XCTR/XTS modes (bit-sliced NEON)" depends on KERNEL_MODE_NEON - select CRYPTO_HASH - select CRYPTO_ARCH_HAVE_LIB_POLY1305 + select CRYPTO_SKCIPHER + select CRYPTO_AES_ARM64_NEON_BLK + select CRYPTO_LIB_AES + help + Length-preserving ciphers: AES cipher algorithms (FIPS-197) + with block cipher modes: + - ECB (Electronic Codebook) mode (NIST SP800-38A) + - CBC (Cipher Block Chaining) mode (NIST SP800-38A) + - CTR (Counter) mode (NIST SP800-38A) + - XCTR mode for HCTR2 + - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E + and IEEE 1619) -config CRYPTO_NHPOLY1305_NEON - tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)" + Architecture: arm64 using: + - bit-sliced algorithm + - NEON (Advanced SIMD) extensions + +config CRYPTO_SM4_ARM64_CE + tristate "Ciphers: SM4 (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON - select CRYPTO_NHPOLY1305 + select CRYPTO_ALGAPI + select CRYPTO_SM4 + help + Block ciphers: SM4 cipher algorithms (OSCCA GB/T 32907-2016) -config CRYPTO_AES_ARM64_BS - tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm" + Architecture: arm64 using: + - ARMv8.2 Crypto Extensions + - NEON (Advanced SIMD) extensions + +config CRYPTO_SM4_ARM64_CE_BLK + tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER - select CRYPTO_AES_ARM64_NEON_BLK + select CRYPTO_SM4 + help + Length-preserving ciphers: SM4 cipher algorithms (OSCCA GB/T 32907-2016) + with block cipher modes: + - ECB (Electronic Codebook) mode (NIST SP800-38A) + - CBC (Cipher Block Chaining) mode (NIST SP800-38A) + - CFB (Cipher Feedback) mode (NIST SP800-38A) + - CTR (Counter) mode (NIST SP800-38A) + + Architecture: arm64 using: + - ARMv8 Crypto Extensions + - NEON (Advanced SIMD) extensions + +config CRYPTO_SM4_ARM64_NEON_BLK + tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR (NEON)" + depends on KERNEL_MODE_NEON + select CRYPTO_SKCIPHER + select CRYPTO_SM4 + help + Length-preserving ciphers: SM4 cipher algorithms (OSCCA GB/T 32907-2016) + with block cipher modes: + - ECB (Electronic Codebook) mode (NIST SP800-38A) + - CBC (Cipher Block Chaining) mode (NIST SP800-38A) + - CFB (Cipher Feedback) mode (NIST SP800-38A) + - CTR (Counter) mode (NIST SP800-38A) + + Architecture: arm64 using: + - NEON (Advanced SIMD) extensions + +config CRYPTO_AES_ARM64_CE_CCM + tristate "AEAD cipher: AES in CCM mode (ARMv8 Crypto Extensions)" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES_ARM64_CE + select CRYPTO_AEAD select CRYPTO_LIB_AES + help + AEAD cipher: AES cipher algorithms (FIPS-197) with + CCM (Counter with Cipher Block Chaining-Message Authentication Code) + authenticated encryption mode (NIST SP800-38C) + + Architecture: arm64 using: + - ARMv8 Crypto Extensions + - NEON (Advanced SIMD) extensions + +config CRYPTO_CRCT10DIF_ARM64_CE + tristate "CRCT10DIF (PMULL)" + depends on KERNEL_MODE_NEON && CRC_T10DIF + select CRYPTO_HASH + help + CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF) + + Architecture: arm64 using + - PMULL (Polynomial Multiply Long) instructions + +endmenu -endif diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c index bbbe351e9045..a406454578f0 100644 --- a/arch/arm64/hyperv/mshyperv.c +++ b/arch/arm64/hyperv/mshyperv.c @@ -38,7 +38,7 @@ static int __init hyperv_init(void) return 0; /* Setup the guest ID */ - guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); + guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); hv_set_vpreg(HV_REGISTER_GUEST_OSID, guest_id); /* Get the features and hints from Hyper-V */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 8aa0d276a636..abc418650fec 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -60,6 +60,7 @@ #define ARM_CPU_IMP_FUJITSU 0x46 #define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_IMP_APPLE 0x61 +#define ARM_CPU_IMP_AMPERE 0xC0 #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -123,6 +124,8 @@ #define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028 #define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029 +#define AMPERE_CPU_PART_AMPERE1 0xAC3 + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -172,6 +175,7 @@ #define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) #define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) +#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e9c9388ccc02..45e2136322ba 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -393,6 +393,7 @@ struct kvm_vcpu_arch { */ struct { u32 mdscr_el1; + bool pstate_ss; } guest_debug_preserved; /* vcpu power state */ @@ -535,6 +536,9 @@ struct kvm_vcpu_arch { #define IN_WFIT __vcpu_single_flag(sflags, BIT(3)) /* vcpu system registers loaded on physical CPU */ #define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4)) +/* Software step state is Active-pending */ +#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5)) + /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b5df82aa99e6..71a1af42f0e8 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1082,24 +1082,13 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * page after fork() + CoW for pfn mappings. We don't always have a * hardware-managed access flag on arm64. */ -static inline bool arch_faults_on_old_pte(void) -{ - /* The register read below requires a stable CPU to make any sense */ - cant_migrate(); - - return !cpu_has_hw_af(); -} -#define arch_faults_on_old_pte arch_faults_on_old_pte +#define arch_has_hw_pte_young cpu_has_hw_af /* * Experimentally, it's cheap to set the access flag in hardware and we * benefit from prefaulting mappings as 'old' to start with. */ -static inline bool arch_wants_old_prefaulted_pte(void) -{ - return !arch_faults_on_old_pte(); -} -#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte +#define arch_wants_old_prefaulted_pte cpu_has_hw_af static inline bool pud_sect_supported(void) { diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 61883518fc50..445aa3af3b76 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -323,9 +323,6 @@ static inline bool is_ttbr1_addr(unsigned long addr) /* Forward declaration, a strange C thing */ struct task_struct; -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - unsigned long __get_wchan(struct task_struct *p); void update_sctlr_el1(u64 sctlr); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index debc1c0b2b7f..7d301700d1a9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -865,6 +865,7 @@ #define SYS_MPIDR_SAFE_VAL (BIT(31)) #define TRFCR_ELx_TS_SHIFT 5 +#define TRFCR_ELx_TS_MASK ((0x3UL) << TRFCR_ELx_TS_SHIFT) #define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT) #define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT) #define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 38a0b0291edb..2f361a883d8c 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -86,8 +86,8 @@ $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so $(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so obj-y += probes/ -head-y := head.o -extra-y += $(head-y) vmlinux.lds +obj-y += head.o +extra-y += vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 58ca4f6b25d6..89ac00084f38 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -230,6 +230,11 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2441007 + { + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + }, +#endif #ifdef CONFIG_ARM64_ERRATUM_2441009 { /* Cortex-A510 r0p0 -> r1p1. Fixed in r1p2 */ diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 98d67444a5b6..27ef7ad3ffd2 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -8,9 +8,9 @@ #include <asm/cpufeature.h> #include <asm/mte.h> -#define for_each_mte_vma(tsk, vma) \ +#define for_each_mte_vma(vmi, vma) \ if (system_supports_mte()) \ - for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \ + for_each_vma(vmi, vma) \ if (vma->vm_flags & VM_MTE) static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) @@ -81,8 +81,9 @@ Elf_Half elf_core_extra_phdrs(void) { struct vm_area_struct *vma; int vma_count = 0; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) + for_each_mte_vma(vmi, vma) vma_count++; return vma_count; @@ -91,8 +92,9 @@ Elf_Half elf_core_extra_phdrs(void) int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) { struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) { + for_each_mte_vma(vmi, vma) { struct elf_phdr phdr; phdr.p_type = PT_AARCH64_MEMTAG_MTE; @@ -116,8 +118,9 @@ size_t elf_core_extra_data_size(void) { struct vm_area_struct *vma; size_t data_size = 0; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) + for_each_mte_vma(vmi, vma) data_size += mte_vma_tag_dump_size(vma); return data_size; @@ -126,8 +129,9 @@ size_t elf_core_extra_data_size(void) int elf_core_write_extra_data(struct coredump_params *cprm) { struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) { + for_each_mte_vma(vmi, vma) { if (vma->vm_flags & VM_DONTDUMP) continue; diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 4aaa5f3d1f65..8151412653de 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -24,9 +24,6 @@ PROVIDE(__efistub_primary_entry_offset = primary_entry - _text); */ PROVIDE(__efistub_memcmp = __pi_memcmp); PROVIDE(__efistub_memchr = __pi_memchr); -PROVIDE(__efistub_memcpy = __pi_memcpy); -PROVIDE(__efistub_memmove = __pi_memmove); -PROVIDE(__efistub_memset = __pi_memset); PROVIDE(__efistub_strlen = __pi_strlen); PROVIDE(__efistub_strnlen = __pi_strnlen); PROVIDE(__efistub_strcmp = __pi_strcmp); @@ -40,16 +37,6 @@ PROVIDE(__efistub__edata = _edata); PROVIDE(__efistub_screen_info = screen_info); PROVIDE(__efistub__ctype = _ctype); -/* - * The __ prefixed memcpy/memset/memmove symbols are provided by KASAN, which - * instruments the conventional ones. Therefore, any references from the EFI - * stub or other position independent, low level C code should be redirected to - * the non-instrumented versions as well. - */ -PROVIDE(__efistub___memcpy = __pi_memcpy); -PROVIDE(__efistub___memmove = __pi_memmove); -PROVIDE(__efistub___memset = __pi_memset); - PROVIDE(__pi___memcpy = __pi_memcpy); PROVIDE(__pi___memmove = __pi_memmove); PROVIDE(__pi___memset = __pi_memset); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index aca88470fb69..7467217c1eaf 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -48,7 +48,12 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, if (!pte_is_tagged) return; - mte_clear_page_tags(page_address(page)); + /* + * Test PG_mte_tagged again in case it was racing with another + * set_pte_at(). + */ + if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + mte_clear_page_tags(page_address(page)); } void mte_sync_tags(pte_t old_pte, pte_t pte) @@ -64,7 +69,7 @@ void mte_sync_tags(pte_t old_pte, pte_t pte) /* if PG_mte_tagged is set, tags have already been initialised */ for (i = 0; i < nr_pages; i++, page++) { - if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + if (!test_bit(PG_mte_tagged, &page->flags)) mte_sync_page_tags(page, old_pte, check_swap, pte_is_tagged); } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 92bcc1768f0b..044a7d7f1f6a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -279,10 +279,6 @@ void flush_thread(void) flush_tagged_addr_state(); } -void release_thread(struct task_struct *dead_task) -{ -} - void arch_release_task_struct(struct task_struct *tsk) { fpsimd_release_task(tsk); @@ -595,7 +591,7 @@ unsigned long __get_wchan(struct task_struct *p) unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() & ~PAGE_MASK; + sp -= prandom_u32_max(PAGE_SIZE); return sp & ~0xf; } diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index a8ea1637b137..bfce41c2a53b 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -868,6 +868,10 @@ u8 spectre_bhb_loop_affected(int scope) MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), {}, }; + static const struct midr_range spectre_bhb_k11_list[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1), + {}, + }; static const struct midr_range spectre_bhb_k8_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), @@ -878,6 +882,8 @@ u8 spectre_bhb_loop_affected(int scope) k = 32; else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) k = 24; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list)) + k = 11; else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) k = 8; diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 733451fe7e41..d72e8f23422d 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -67,7 +67,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, * * The resulting 5 bits of entropy is seen in SP[8:4]. */ - choose_random_kstack_offset(get_random_int() & 0x1FF); + choose_random_kstack_offset(get_random_u16() & 0x1FF); } static inline bool has_syscall_work(unsigned long flags) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 44ebf5b2fc4b..817d788cd866 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -22,46 +22,6 @@ #include <asm/cputype.h> #include <asm/topology.h> -void store_cpu_topology(unsigned int cpuid) -{ - struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; - u64 mpidr; - - if (cpuid_topo->package_id != -1) - goto topology_populated; - - mpidr = read_cpuid_mpidr(); - - /* Uniprocessor systems can rely on default topology values */ - if (mpidr & MPIDR_UP_BITMASK) - return; - - /* - * This would be the place to create cpu topology based on MPIDR. - * - * However, it cannot be trusted to depict the actual topology; some - * pieces of the architecture enforce an artificial cap on Aff0 values - * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an - * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up - * having absolutely no relationship to the actual underlying system - * topology, and cannot be reasonably used as core / package ID. - * - * If the MT bit is set, Aff0 *could* be used to define a thread ID, but - * we still wouldn't be able to obtain a sane core ID. This means we - * need to entirely ignore MPIDR for any topology deduction. - */ - cpuid_topo->thread_id = -1; - cpuid_topo->core_id = cpuid; - cpuid_topo->package_id = cpu_to_node(cpuid); - - pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", - cpuid, cpuid_topo->package_id, cpuid_topo->core_id, - cpuid_topo->thread_id, mpidr); - -topology_populated: - update_siblings_masks(cpuid); -} - #ifdef CONFIG_ACPI static bool __init acpi_cpu_is_threaded(int cpu) { diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index ac93a2ee9c07..99ae81ab91a7 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -133,10 +133,11 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) { struct mm_struct *mm = task->mm; struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); mmap_read_lock(mm); - for (vma = mm->mmap; vma; vma = vma->vm_next) { + for_each_vma(vmi, vma) { unsigned long size = vma->vm_end - vma->vm_start; if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm)) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 917086be5c6b..94d33e296e10 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -666,7 +666,6 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) kvm_vcpu_halt(vcpu); vcpu_clear_flag(vcpu, IN_WFIT); - kvm_clear_request(KVM_REQ_UNHALT, vcpu); preempt_disable(); vgic_v4_load(vcpu); @@ -2270,6 +2269,16 @@ static int __init early_kvm_mode_cfg(char *arg) if (!arg) return -EINVAL; + if (strcmp(arg, "none") == 0) { + kvm_mode = KVM_MODE_NONE; + return 0; + } + + if (!is_hyp_mode_available()) { + pr_warn_once("KVM is not available. Ignoring kvm-arm.mode\n"); + return 0; + } + if (strcmp(arg, "protected") == 0) { if (!is_kernel_in_hyp_mode()) kvm_mode = KVM_MODE_PROTECTED; @@ -2284,11 +2293,6 @@ static int __init early_kvm_mode_cfg(char *arg) return 0; } - if (strcmp(arg, "none") == 0) { - kvm_mode = KVM_MODE_NONE; - return 0; - } - return -EINVAL; } early_param("kvm-arm.mode", early_kvm_mode_cfg); diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 3f7563d768e2..fccf9ec01813 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -32,6 +32,10 @@ static DEFINE_PER_CPU(u64, mdcr_el2); * * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled * after we have restored the preserved value to the main context. + * + * When single-step is enabled by userspace, we tweak PSTATE.SS on every + * guest entry. Preserve PSTATE.SS so we can restore the original value + * for the vcpu after the single-step is disabled. */ static void save_guest_debug_regs(struct kvm_vcpu *vcpu) { @@ -41,6 +45,9 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu) trace_kvm_arm_set_dreg32("Saved MDSCR_EL1", vcpu->arch.guest_debug_preserved.mdscr_el1); + + vcpu->arch.guest_debug_preserved.pstate_ss = + (*vcpu_cpsr(vcpu) & DBG_SPSR_SS); } static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) @@ -51,6 +58,11 @@ static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) trace_kvm_arm_set_dreg32("Restored MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); + + if (vcpu->arch.guest_debug_preserved.pstate_ss) + *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; + else + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; } /** @@ -188,7 +200,18 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) * debugging the system. */ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; + /* + * If the software step state at the last guest exit + * was Active-pending, we don't set DBG_SPSR_SS so + * that the state is maintained (to not run another + * single-step until the pending Software Step + * exception is taken). + */ + if (!vcpu_get_flag(vcpu, DBG_SS_ACTIVE_PENDING)) + *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; + else + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; + mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); mdscr |= DBG_MDSCR_SS; vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); @@ -262,6 +285,15 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) * Restore the guest's debug registers if we were using them. */ if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) { + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)) + /* + * Mark the vcpu as ACTIVE_PENDING + * until Software Step exception is taken. + */ + vcpu_set_flag(vcpu, DBG_SS_ACTIVE_PENDING); + } + restore_guest_debug_regs(vcpu); /* diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index f802a3b3f8db..2ff13a3f8479 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -937,6 +937,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, } else { /* If not enabled clear all flags */ vcpu->guest_debug = 0; + vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING); } out: diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index bbe5b393d689..e778eefcf214 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -152,8 +152,14 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) run->debug.arch.hsr_high = upper_32_bits(esr); run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID; - if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW) + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_WATCHPT_LOW: run->debug.arch.far = vcpu->arch.fault.far_el2; + break; + case ESR_ELx_EC_SOFTSTP_LOW: + vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING); + break; + } return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 9f6385702061..8e9d49a964be 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -143,7 +143,7 @@ static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu) } } -/* Restore VGICv3 state on non_VEH systems */ +/* Restore VGICv3 state on non-VHE systems */ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c9a13e487187..34c5feed9dc1 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -92,9 +92,13 @@ static bool kvm_is_device_pfn(unsigned long pfn) static void *stage2_memcache_zalloc_page(void *arg) { struct kvm_mmu_memory_cache *mc = arg; + void *virt; /* Allocated with __GFP_ZERO, so no need to zero */ - return kvm_mmu_memory_cache_alloc(mc); + virt = kvm_mmu_memory_cache_alloc(mc); + if (virt) + kvm_account_pgtable_pages(virt, 1); + return virt; } static void *kvm_host_zalloc_pages_exact(size_t size) @@ -102,6 +106,21 @@ static void *kvm_host_zalloc_pages_exact(size_t size) return alloc_pages_exact(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); } +static void *kvm_s2_zalloc_pages_exact(size_t size) +{ + void *virt = kvm_host_zalloc_pages_exact(size); + + if (virt) + kvm_account_pgtable_pages(virt, (size >> PAGE_SHIFT)); + return virt; +} + +static void kvm_s2_free_pages_exact(void *virt, size_t size) +{ + kvm_account_pgtable_pages(virt, -(size >> PAGE_SHIFT)); + free_pages_exact(virt, size); +} + static void kvm_host_get_page(void *addr) { get_page(virt_to_page(addr)); @@ -112,6 +131,15 @@ static void kvm_host_put_page(void *addr) put_page(virt_to_page(addr)); } +static void kvm_s2_put_page(void *addr) +{ + struct page *p = virt_to_page(addr); + /* Dropping last refcount, the page will be freed */ + if (page_count(p) == 1) + kvm_account_pgtable_pages(addr, -1); + put_page(p); +} + static int kvm_host_page_count(void *addr) { return page_count(virt_to_page(addr)); @@ -625,10 +653,10 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr) static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { .zalloc_page = stage2_memcache_zalloc_page, - .zalloc_pages_exact = kvm_host_zalloc_pages_exact, - .free_pages_exact = free_pages_exact, + .zalloc_pages_exact = kvm_s2_zalloc_pages_exact, + .free_pages_exact = kvm_s2_free_pages_exact, .get_page = kvm_host_get_page, - .put_page = kvm_host_put_page, + .put_page = kvm_s2_put_page, .page_count = kvm_host_page_count, .phys_to_virt = kvm_host_va, .virt_to_phys = kvm_host_pa, diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2ef1121ab844..f4a7c5abcbca 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1063,13 +1063,12 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, } /* Read a sanitised cpufeature ID register by sys_reg_desc */ -static u64 read_id_reg(const struct kvm_vcpu *vcpu, - struct sys_reg_desc const *r, bool raz) +static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r) { u32 id = reg_to_encoding(r); u64 val; - if (raz) + if (sysreg_visible_as_raz(vcpu, r)) return 0; val = read_sanitised_ftr_reg(id); @@ -1145,34 +1144,37 @@ static unsigned int id_visibility(const struct kvm_vcpu *vcpu, return 0; } -/* cpufeature ID register access trap handlers */ - -static bool __access_id_reg(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r, - bool raz) +static unsigned int aa32_id_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) { - if (p->is_write) - return write_to_read_only(vcpu, p, r); + /* + * AArch32 ID registers are UNKNOWN if AArch32 isn't implemented at any + * EL. Promote to RAZ/WI in order to guarantee consistency between + * systems. + */ + if (!kvm_supports_32bit_el0()) + return REG_RAZ | REG_USER_WI; - p->regval = read_id_reg(vcpu, r, raz); - return true; + return id_visibility(vcpu, r); } +static unsigned int raz_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + return REG_RAZ; +} + +/* cpufeature ID register access trap handlers */ + static bool access_id_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - bool raz = sysreg_visible_as_raz(vcpu, r); - - return __access_id_reg(vcpu, p, r, raz); -} + if (p->is_write) + return write_to_read_only(vcpu, p, r); -static bool access_raz_id_reg(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - return __access_id_reg(vcpu, p, r, true); + p->regval = read_id_reg(vcpu, r); + return true; } /* Visibility overrides for SVE-specific control registers */ @@ -1208,9 +1210,9 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, return -EINVAL; /* We can only differ with CSV[23], and anything else is an error */ - val ^= read_id_reg(vcpu, rd, false); - val &= ~((0xFUL << ID_AA64PFR0_EL1_CSV2_SHIFT) | - (0xFUL << ID_AA64PFR0_EL1_CSV3_SHIFT)); + val ^= read_id_reg(vcpu, rd); + val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3)); if (val) return -EINVAL; @@ -1227,45 +1229,21 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, * are stored, and for set_id_reg() we don't allow the effective value * to be changed. */ -static int __get_id_reg(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd, u64 *val, - bool raz) -{ - *val = read_id_reg(vcpu, rd, raz); - return 0; -} - -static int __set_id_reg(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd, u64 val, - bool raz) -{ - /* This is what we mean by invariant: you can't change it. */ - if (val != read_id_reg(vcpu, rd, raz)) - return -EINVAL; - - return 0; -} - static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 *val) { - bool raz = sysreg_visible_as_raz(vcpu, rd); - - return __get_id_reg(vcpu, rd, val, raz); + *val = read_id_reg(vcpu, rd); + return 0; } static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 val) { - bool raz = sysreg_visible_as_raz(vcpu, rd); - - return __set_id_reg(vcpu, rd, val, raz); -} + /* This is what we mean by invariant: you can't change it. */ + if (val != read_id_reg(vcpu, rd)) + return -EINVAL; -static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - u64 val) -{ - return __set_id_reg(vcpu, rd, val, true); + return 0; } static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, @@ -1367,6 +1345,15 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, .visibility = id_visibility, \ } +/* sys_reg_desc initialiser for known cpufeature ID registers */ +#define AA32_ID_SANITISED(name) { \ + SYS_DESC(SYS_##name), \ + .access = access_id_reg, \ + .get_user = get_id_reg, \ + .set_user = set_id_reg, \ + .visibility = aa32_id_visibility, \ +} + /* * sys_reg_desc initialiser for architecturally unallocated cpufeature ID * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2 @@ -1374,9 +1361,10 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, */ #define ID_UNALLOCATED(crm, op2) { \ Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \ - .access = access_raz_id_reg, \ - .get_user = get_raz_reg, \ - .set_user = set_raz_id_reg, \ + .access = access_id_reg, \ + .get_user = get_id_reg, \ + .set_user = set_id_reg, \ + .visibility = raz_visibility \ } /* @@ -1386,9 +1374,10 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, */ #define ID_HIDDEN(name) { \ SYS_DESC(SYS_##name), \ - .access = access_raz_id_reg, \ - .get_user = get_raz_reg, \ - .set_user = set_raz_id_reg, \ + .access = access_id_reg, \ + .get_user = get_id_reg, \ + .set_user = set_id_reg, \ + .visibility = raz_visibility, \ } /* @@ -1452,33 +1441,33 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* AArch64 mappings of the AArch32 ID registers */ /* CRm=1 */ - ID_SANITISED(ID_PFR0_EL1), - ID_SANITISED(ID_PFR1_EL1), - ID_SANITISED(ID_DFR0_EL1), + AA32_ID_SANITISED(ID_PFR0_EL1), + AA32_ID_SANITISED(ID_PFR1_EL1), + AA32_ID_SANITISED(ID_DFR0_EL1), ID_HIDDEN(ID_AFR0_EL1), - ID_SANITISED(ID_MMFR0_EL1), - ID_SANITISED(ID_MMFR1_EL1), - ID_SANITISED(ID_MMFR2_EL1), - ID_SANITISED(ID_MMFR3_EL1), + AA32_ID_SANITISED(ID_MMFR0_EL1), + AA32_ID_SANITISED(ID_MMFR1_EL1), + AA32_ID_SANITISED(ID_MMFR2_EL1), + AA32_ID_SANITISED(ID_MMFR3_EL1), /* CRm=2 */ - ID_SANITISED(ID_ISAR0_EL1), - ID_SANITISED(ID_ISAR1_EL1), - ID_SANITISED(ID_ISAR2_EL1), - ID_SANITISED(ID_ISAR3_EL1), - ID_SANITISED(ID_ISAR4_EL1), - ID_SANITISED(ID_ISAR5_EL1), - ID_SANITISED(ID_MMFR4_EL1), - ID_SANITISED(ID_ISAR6_EL1), + AA32_ID_SANITISED(ID_ISAR0_EL1), + AA32_ID_SANITISED(ID_ISAR1_EL1), + AA32_ID_SANITISED(ID_ISAR2_EL1), + AA32_ID_SANITISED(ID_ISAR3_EL1), + AA32_ID_SANITISED(ID_ISAR4_EL1), + AA32_ID_SANITISED(ID_ISAR5_EL1), + AA32_ID_SANITISED(ID_MMFR4_EL1), + AA32_ID_SANITISED(ID_ISAR6_EL1), /* CRm=3 */ - ID_SANITISED(MVFR0_EL1), - ID_SANITISED(MVFR1_EL1), - ID_SANITISED(MVFR2_EL1), + AA32_ID_SANITISED(MVFR0_EL1), + AA32_ID_SANITISED(MVFR1_EL1), + AA32_ID_SANITISED(MVFR2_EL1), ID_UNALLOCATED(3,3), - ID_SANITISED(ID_PFR2_EL1), + AA32_ID_SANITISED(ID_PFR2_EL1), ID_HIDDEN(ID_DFR1_EL1), - ID_SANITISED(ID_MMFR5_EL1), + AA32_ID_SANITISED(ID_MMFR5_EL1), ID_UNALLOCATED(3,7), /* AArch64 ID registers */ @@ -2809,6 +2798,9 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, if (!r) return -ENOENT; + if (sysreg_user_write_ignore(vcpu, r)) + return 0; + if (r->set_user) { ret = (r->set_user)(vcpu, r, val); } else { diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index a8c4cc32eb9a..e4ebb3a379fd 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -86,6 +86,7 @@ struct sys_reg_desc { #define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */ #define REG_RAZ (1 << 1) /* RAZ from userspace and guest */ +#define REG_USER_WI (1 << 2) /* WI from userspace only */ static __printf(2, 3) inline void print_sys_reg_msg(const struct sys_reg_params *p, @@ -136,22 +137,31 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r __vcpu_sys_reg(vcpu, r->reg) = r->val; } -static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *r) +static inline unsigned int sysreg_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) { if (likely(!r->visibility)) - return false; + return 0; - return r->visibility(vcpu, r) & REG_HIDDEN; + return r->visibility(vcpu, r); +} + +static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + return sysreg_visibility(vcpu, r) & REG_HIDDEN; } static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - if (likely(!r->visibility)) - return false; + return sysreg_visibility(vcpu, r) & REG_RAZ; +} - return r->visibility(vcpu, r) & REG_RAZ; +static inline bool sysreg_user_write_ignore(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + return sysreg_visibility(vcpu, r) & REG_USER_WI; } static inline int cmp_sys_reg(const struct sys_reg_desc *i1, diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 9d3299a70242..24d7778d1ce6 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -406,7 +406,7 @@ static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its, struct its_ite *ite; for_each_lpi_its(device, ite, its) { - if (!ite->collection || coll != ite->collection) + if (ite->collection != coll) continue; update_affinity_ite(kvm, ite); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 83a512a6ff0d..3cb101e8cb29 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -7,7 +7,7 @@ #include <linux/gfp.h> #include <linux/cache.h> #include <linux/dma-map-ops.h> -#include <linux/dma-iommu.h> +#include <linux/iommu.h> #include <xen/xen.h> #include <asm/cacheflush.h> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 0795028f017c..35e9a468d13e 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -245,7 +245,7 @@ static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry) { VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry)); - return page_folio(pfn_to_page(swp_offset(entry))); + return page_folio(pfn_to_page(swp_offset_pfn(entry))); } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index 4334dec93bd4..bed803d8e158 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -53,7 +53,12 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page) if (!tags) return false; - mte_restore_page_tags(page_address(page), tags); + /* + * Test PG_mte_tagged again in case it was racing with another + * set_pte_at(). + */ + if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + mte_restore_page_tags(page_address(page), tags); return true; } diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 7f1fb36f208c..384757a7eda9 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -732,7 +732,7 @@ EndSysreg Sysreg SCTLR_EL1 3 0 1 0 0 Field 63 TIDCP -Field 62 SPINMASK +Field 62 SPINTMASK Field 61 NMI Field 60 EnTP2 Res0 59:58 diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 3cbc2dc62baf..adee6ab36862 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -332,7 +332,7 @@ config HIGHMEM select KMAP_LOCAL default y -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int "Maximum zone order" default "11" diff --git a/arch/csky/Makefile b/arch/csky/Makefile index 4e1d619fd5c6..0e4237e55758 100644 --- a/arch/csky/Makefile +++ b/arch/csky/Makefile @@ -59,8 +59,6 @@ LDFLAGS += -EL KBUILD_AFLAGS += $(KBUILD_CFLAGS) -head-y := arch/csky/kernel/head.o - core-y += arch/csky/$(CSKYABI)/ libs-y += arch/csky/lib/ \ diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h index 9638206bc44f..63ad71fab30d 100644 --- a/arch/csky/include/asm/processor.h +++ b/arch/csky/include/asm/processor.h @@ -69,11 +69,6 @@ do { \ /* Forward declaration, a strange C thing */ struct task_struct; -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *dead_task) -{ -} - /* Prepare to copy thread state - unlazy all lazy status */ #define prepare_to_copy(tsk) do { } while (0) diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile index 6f14c924b20d..8a868316b912 100644 --- a/arch/csky/kernel/Makefile +++ b/arch/csky/kernel/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only -extra-y := head.o vmlinux.lds +extra-y := vmlinux.lds -obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/ +obj-y += head.o entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/ obj-y += power.o syscall.o syscall_table.o setup.o io.o obj-y += process.o cpu-probe.o ptrace.o stacktrace.o obj-y += probes/ diff --git a/arch/hexagon/Makefile b/arch/hexagon/Makefile index 44312bc147d8..92d005958dfb 100644 --- a/arch/hexagon/Makefile +++ b/arch/hexagon/Makefile @@ -32,5 +32,3 @@ KBUILD_LDFLAGS += $(ldflags-y) TIR_NAME := r19 KBUILD_CFLAGS += -ffixed-$(TIR_NAME) -DTHREADINFO_REG=$(TIR_NAME) -D__linux__ KBUILD_AFLAGS += -DTHREADINFO_REG=$(TIR_NAME) - -head-y := arch/hexagon/kernel/head.o diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h index 615f7e49968e..0cd39c2cdf8f 100644 --- a/arch/hexagon/include/asm/processor.h +++ b/arch/hexagon/include/asm/processor.h @@ -60,10 +60,6 @@ struct thread_struct { #define KSTK_EIP(tsk) (pt_elr(task_pt_regs(tsk))) #define KSTK_ESP(tsk) (pt_psp(task_pt_regs(tsk))) -/* Free all resources held by a thread; defined in process.c */ -extern void release_thread(struct task_struct *dead_task); - -/* Get wait channel for task P. */ extern unsigned long __get_wchan(struct task_struct *p); /* The following stuff is pretty HEXAGON specific. */ diff --git a/arch/hexagon/kernel/Makefile b/arch/hexagon/kernel/Makefile index fae3dce32fde..e73cb321630e 100644 --- a/arch/hexagon/kernel/Makefile +++ b/arch/hexagon/kernel/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -extra-y := head.o vmlinux.lds +extra-y := vmlinux.lds +obj-y += head.o obj-$(CONFIG_SMP) += smp.o obj-y += setup.o irq_cpu.o traps.o syscalltab.o signal.o time.o diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index f0552f98a7ba..e15eeaebd785 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -113,13 +113,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) } /* - * Release any architecture-specific resources locked by thread - */ -void release_thread(struct task_struct *dead_task) -{ -} - -/* * Some archs flush debug and FPU info here */ void flush_thread(void) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 26ac8ea15a9e..c6e06cdc738f 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -200,7 +200,7 @@ config IA64_CYCLONE Say Y here to enable support for IBM EXA Cyclone time source. If you're unsure, answer N. -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int "MAX_ORDER (11 - 17)" if !HUGETLB_PAGE range 11 17 if !HUGETLB_PAGE default "17" if HUGETLB_PAGE diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index e55c2f138656..56c4bb276b6e 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -44,7 +44,6 @@ quiet_cmd_objcopy = OBJCOPY $@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ KBUILD_CFLAGS += $(cflags-y) -head-y := arch/ia64/kernel/head.o libs-y += arch/ia64/lib/ diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig index a3724882295c..3e1337aceb37 100644 --- a/arch/ia64/configs/bigsur_defconfig +++ b/arch/ia64/configs/bigsur_defconfig @@ -20,7 +20,6 @@ CONFIG_UNIX=y CONFIG_INET=y # CONFIG_IPV6 is not set CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=m CONFIG_ATA=m @@ -91,7 +90,6 @@ CONFIG_NFS_V4=m CONFIG_NFSD=m CONFIG_NFSD_V4=y CONFIG_CIFS=m -CONFIG_CIFS_STATS=y CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y CONFIG_NLS_CODEPAGE_437=y diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig index a3dff482a3d7..f8033bacea89 100644 --- a/arch/ia64/configs/generic_defconfig +++ b/arch/ia64/configs/generic_defconfig @@ -39,7 +39,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y # CONFIG_PNP_DEBUG_MESSAGES is not set CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_SGI_XP=m @@ -91,7 +90,6 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y # CONFIG_HW_RANDOM is not set CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y -CONFIG_RAW_DRIVER=m CONFIG_HPET=y CONFIG_AGP=m CONFIG_AGP_I460=m diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig index 4cd46105b020..ffebe6c503f5 100644 --- a/arch/ia64/configs/gensparse_defconfig +++ b/arch/ia64/configs/gensparse_defconfig @@ -31,11 +31,9 @@ CONFIG_IP_MULTICAST=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_ATA=y -CONFIG_BLK_DEV_IDECD=y CONFIG_ATA_GENERIC=y CONFIG_PATA_CMD64X=y CONFIG_ATA_PIIX=y @@ -81,7 +79,6 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y # CONFIG_HW_RANDOM is not set CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y -CONFIG_RAW_DRIVER=m CONFIG_HPET=y CONFIG_AGP=m CONFIG_AGP_I460=m diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index a2045d73adfa..45f5d6e2da0a 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig @@ -36,7 +36,6 @@ CONFIG_IP_MULTICAST=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_ATA=y @@ -85,7 +84,6 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y # CONFIG_HW_RANDOM is not set CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y -CONFIG_RAW_DRIVER=m CONFIG_HPET=y CONFIG_AGP=m CONFIG_AGP_I460=m diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig index 99f8b2a0332b..ed104550d0d5 100644 --- a/arch/ia64/configs/zx1_defconfig +++ b/arch/ia64/configs/zx1_defconfig @@ -30,7 +30,6 @@ CONFIG_PATA_CMD64X=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y -CONFIG_CHR_DEV_OSST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 757c2f6d8d4b..d1978e004054 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -318,13 +318,6 @@ struct thread_struct { struct mm_struct; struct task_struct; -/* - * Free all resources held by a thread. This is called after the - * parent of DEAD_TASK has collected the exit status of the task via - * wait(). - */ -#define release_thread(dead_task) - /* Get wait channel for task P. */ extern unsigned long __get_wchan (struct task_struct *p); diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h index 42ed5248fae9..84e8ce387b69 100644 --- a/arch/ia64/include/asm/sparsemem.h +++ b/arch/ia64/include/asm/sparsemem.h @@ -11,10 +11,10 @@ #define SECTION_SIZE_BITS (30) #define MAX_PHYSMEM_BITS (50) -#ifdef CONFIG_FORCE_MAX_ZONEORDER -#if ((CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS) +#ifdef CONFIG_ARCH_FORCE_MAX_ORDER +#if ((CONFIG_ARCH_FORCE_MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS) #undef SECTION_SIZE_BITS -#define SECTION_SIZE_BITS (CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT) +#define SECTION_SIZE_BITS (CONFIG_ARCH_FORCE_MAX_ORDER - 1 + PAGE_SHIFT) #endif #endif diff --git a/arch/ia64/include/asm/termios.h b/arch/ia64/include/asm/termios.h deleted file mode 100644 index 589c026444cc..000000000000 --- a/arch/ia64/include/asm/termios.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Modified 1999 - * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co - * - * 99/01/28 Added N_IRDA and N_SMSBLOCK - */ -#ifndef _ASM_IA64_TERMIOS_H -#define _ASM_IA64_TERMIOS_H - -#include <uapi/asm/termios.h> - - -/* intr=^C quit=^\ erase=del kill=^U - eof=^D vtime=\0 vmin=\1 sxtc=\0 - start=^Q stop=^S susp=^Z eol=\0 - reprint=^R discard=^U werase=^W lnext=^V - eol2=\0 -*/ -#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" - -/* - * Translate a "termio" structure into a "termios". Ugh. - */ -#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \ - unsigned short __tmp; \ - get_user(__tmp,&(termio)->x); \ - *(unsigned short *) &(termios)->x = __tmp; \ -} - -#define user_termio_to_kernel_termios(termios, termio) \ -({ \ - SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \ - SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \ - SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \ - SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \ - copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \ -}) - -/* - * Translate a "termios" structure into a "termio". Ugh. - */ -#define kernel_termios_to_user_termio(termio, termios) \ -({ \ - put_user((termios)->c_iflag, &(termio)->c_iflag); \ - put_user((termios)->c_oflag, &(termio)->c_oflag); \ - put_user((termios)->c_cflag, &(termio)->c_cflag); \ - put_user((termios)->c_lflag, &(termio)->c_lflag); \ - put_user((termios)->c_line, &(termio)->c_line); \ - copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \ -}) - -#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2)) -#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2)) -#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) -#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) - -#endif /* _ASM_IA64_TERMIOS_H */ diff --git a/arch/ia64/include/uapi/asm/termios.h b/arch/ia64/include/uapi/asm/termios.h deleted file mode 100644 index 199742d08f2c..000000000000 --- a/arch/ia64/include/uapi/asm/termios.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Modified 1999 - * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co - * - * 99/01/28 Added N_IRDA and N_SMSBLOCK - */ -#ifndef _UAPI_ASM_IA64_TERMIOS_H -#define _UAPI_ASM_IA64_TERMIOS_H - - -#include <asm/termbits.h> -#include <asm/ioctls.h> - -struct winsize { - unsigned short ws_row; - unsigned short ws_col; - unsigned short ws_xpixel; - unsigned short ws_ypixel; -}; - -#define NCC 8 -struct termio { - unsigned short c_iflag; /* input mode flags */ - unsigned short c_oflag; /* output mode flags */ - unsigned short c_cflag; /* control mode flags */ - unsigned short c_lflag; /* local mode flags */ - unsigned char c_line; /* line discipline */ - unsigned char c_cc[NCC]; /* control characters */ -}; - -/* modem lines */ -#define TIOCM_LE 0x001 -#define TIOCM_DTR 0x002 -#define TIOCM_RTS 0x004 -#define TIOCM_ST 0x008 -#define TIOCM_SR 0x010 -#define TIOCM_CTS 0x020 -#define TIOCM_CAR 0x040 -#define TIOCM_RNG 0x080 -#define TIOCM_DSR 0x100 -#define TIOCM_CD TIOCM_CAR -#define TIOCM_RI TIOCM_RNG -#define TIOCM_OUT1 0x2000 -#define TIOCM_OUT2 0x4000 -#define TIOCM_LOOP 0x8000 - -/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ - - -#endif /* _UAPI_ASM_IA64_TERMIOS_H */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 08d4a2ba0652..ae9ff07de4ab 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -7,9 +7,9 @@ ifdef CONFIG_DYNAMIC_FTRACE CFLAGS_REMOVE_ftrace.o = -pg endif -extra-y := head.o vmlinux.lds +extra-y := vmlinux.lds -obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o irq.o irq_ia64.o \ +obj-y := head.o entry.o efi.o efi_stub.o gate-data.o fsys.o irq.o irq_ia64.o \ irq_lsapic.o ivt.o pal.o patch.o process.o ptrace.o sal.o \ salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ unwind.o mca.o mca_asm.o topology.o dma-mapping.o iosapic.o acpi.o \ @@ -34,10 +34,7 @@ mca_recovery-y += mca_drv.o mca_drv_asm.o obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-$(CONFIG_IA64_ESI) += esi.o -ifneq ($(CONFIG_IA64_ESI),) -obj-y += esi_stub.o # must be in kernel proper -endif +obj-$(CONFIG_IA64_ESI) += esi.o esi_stub.o # must be in kernel proper obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o obj-$(CONFIG_ELF_CORE) += elfcore.o diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index c62a66710ad6..92ede80d17fe 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1793,7 +1793,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, p->parent = p->real_parent = p->group_leader = p; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); - strncpy(p->comm, type, sizeof(p->comm)-1); + strscpy(p->comm, type, sizeof(p->comm)-1); } /* Caller prevents this from being called after init */ diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index fd6301eafa9d..c05728044272 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -552,7 +552,7 @@ setup_arch (char **cmdline_p) ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); *cmdline_p = __va(ia64_boot_param->command_line); - strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); + strscpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); efi_init(); io_port_init(); diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index e14db25146c2..215bf3f8cb20 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -166,3 +166,29 @@ ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, u force_successful_syscall_return(); return addr; } + +asmlinkage long +ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *tp) +{ + /* + * ia64's clock_gettime() syscall is implemented as a vdso call + * fsys_clock_gettime(). Currently it handles only + * CLOCK_REALTIME and CLOCK_MONOTONIC. Both are based on + * 'ar.itc' counter which gets incremented at a constant + * frequency. It's usually 400MHz, ~2.5x times slower than CPU + * clock frequency. Which is almost a 1ns hrtimer, but not quite. + * + * Let's special-case these timers to report correct precision + * based on ITC frequency and not HZ frequency for supported + * clocks. + */ + switch (which_clock) { + case CLOCK_REALTIME: + case CLOCK_MONOTONIC: + s64 tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq); + struct timespec64 rtn_tp = ns_to_timespec64(tick_ns); + return put_timespec64(&rtn_tp, tp); + } + + return sys_clock_getres(which_clock, tp); +} diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 78b1d03e86e1..72c929d9902b 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -240,7 +240,7 @@ 228 common timer_delete sys_timer_delete 229 common clock_settime sys_clock_settime 230 common clock_gettime sys_clock_gettime -231 common clock_getres sys_clock_getres +231 common clock_getres ia64_clock_getres 232 common clock_nanosleep sys_clock_nanosleep 233 common fstatfs64 sys_fstatfs64 234 common statfs64 sys_statfs64 diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild index ab5373d0a24f..b01f5cdb27e0 100644 --- a/arch/loongarch/Kbuild +++ b/arch/loongarch/Kbuild @@ -1,5 +1,6 @@ obj-y += kernel/ obj-y += mm/ +obj-y += net/ obj-y += vdso/ # for cleaning diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 551dd99e98b8..903096bd87f8 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -50,6 +50,7 @@ config LOONGARCH select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_NO_INSTR @@ -61,6 +62,7 @@ config LOONGARCH select GENERIC_CPU_AUTOPROBE select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY + select GENERIC_IOREMAP if !ARCH_IOREMAP select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW @@ -69,6 +71,7 @@ config LOONGARCH select GENERIC_LIB_CMPDI2 select GENERIC_LIB_LSHRDI3 select GENERIC_LIB_UCMPDI2 + select GENERIC_LIB_DEVMEM_IS_ALLOWED select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD @@ -83,6 +86,7 @@ config LOONGARCH select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS + select HAVE_EBPF_JIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP select HAVE_GENERIC_VDSO @@ -93,6 +97,8 @@ config LOONGARCH select HAVE_NMI select HAVE_PCI select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ select HAVE_SETUP_PER_CPU_AREA if NUMA @@ -105,8 +111,6 @@ config LOONGARCH select MODULES_USE_ELF_RELA if MODULES select NEED_PER_CPU_EMBED_FIRST_CHUNK select NEED_PER_CPU_PAGE_FIRST_CHUNK - select OF - select OF_EARLY_FLATTREE select PCI select PCI_DOMAINS_GENERIC select PCI_ECAM if ACPI @@ -138,6 +142,14 @@ config CPU_HAS_PREFETCH bool default y +config GENERIC_BUG + def_bool y + depends on BUG + +config GENERIC_BUG_RELATIVE_POINTERS + def_bool y + depends on GENERIC_BUG + config GENERIC_CALIBRATE_DELAY def_bool y @@ -159,7 +171,7 @@ config STACKTRACE_SUPPORT bool default y -# MACH_LOONGSON32 and MACH_LOONGSON64 are delibrately carried over from the +# MACH_LOONGSON32 and MACH_LOONGSON64 are deliberately carried over from the # MIPS Loongson code, to preserve Loongson-specific code paths in drivers that # are shared between architectures, and specifically expecting the symbols. config MACH_LOONGSON32 @@ -168,6 +180,9 @@ config MACH_LOONGSON32 config MACH_LOONGSON64 def_bool 64BIT +config FIX_EARLYCON_MEM + def_bool y + config PAGE_SIZE_4KB bool @@ -196,6 +211,9 @@ config SCHED_OMIT_FRAME_POINTER bool default y +config AS_HAS_EXPLICIT_RELOCS + def_bool $(as-instr,x:pcalau12i \$t0$(comma)%pc_hi20(x)) + menu "Kernel type and options" source "kernel/Kconfig.hz" @@ -313,12 +331,20 @@ config DMI config EFI bool "EFI runtime service support" select UCS2_STRING - select EFI_PARAMS_FROM_FDT select EFI_RUNTIME_WRAPPERS help This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). +config EFI_STUB + bool "EFI boot stub support" + default y + depends on EFI + select EFI_GENERIC_STUB + help + This kernel feature allows the kernel to be loaded directly by + EFI firmware without the use of a bootloader. + config SMP bool "Multi-Processing support" help @@ -371,7 +397,7 @@ config NODES_SHIFT default "6" depends on NUMA -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int "Maximum zone order" range 14 64 if PAGE_SIZE_64KB default "14" if PAGE_SIZE_64KB @@ -393,6 +419,46 @@ config FORCE_MAX_ZONEORDER The page size is not necessarily 4KB. Keep this in mind when choosing a value for this option. +config ARCH_IOREMAP + bool "Enable LoongArch DMW-based ioremap()" + help + We use generic TLB-based ioremap() by default since it has page + protection support. However, you can enable LoongArch DMW-based + ioremap() for better performance. + +config KEXEC + bool "Kexec system call" + select KEXEC_CORE + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similarity to the exec system call. + +config CRASH_DUMP + bool "Build kdump crash kernel" + help + Generate crash dump after being started by kexec. This should + be normally only set in special crash dump kernels which are + loaded in the main kernel with kexec-tools into a specially + reserved region and then later executed after a crash by + kdump/kexec. + + For more details see Documentation/admin-guide/kdump/kdump.rst + +config PHYSICAL_START + hex "Physical address where the kernel is loaded" + default "0x90000000a0000000" + depends on CRASH_DUMP + help + This gives the XKPRANGE address where the kernel is loaded. + If you plan to use kernel for capturing the crash dump change + this value to start of the reserved region (the "X" value as + specified in the "crashkernel=YM@XM" command line boot parameter + passed to the panic-ed kernel). + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index ec3de6191276..f4cb54d5afd6 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -7,7 +7,14 @@ boot := arch/loongarch/boot KBUILD_DEFCONFIG := loongson3_defconfig -KBUILD_IMAGE = $(boot)/vmlinux +image-name-y := vmlinux +image-name-$(CONFIG_EFI_ZBOOT) := vmlinuz + +ifndef CONFIG_EFI_STUB +KBUILD_IMAGE := $(boot)/vmlinux.elf +else +KBUILD_IMAGE := $(boot)/$(image-name-y).efi +endif # # Select the object file format to substitute into the linker script. @@ -36,15 +43,37 @@ endif cflags-y += -G0 -pipe -msoft-float LDFLAGS_vmlinux += -G0 -static -n -nostdlib + +# When the assembler supports explicit relocation hint, we must use it. +# GCC may have -mexplicit-relocs off by default if it was built with an old +# assembler, so we force it via an option. +# +# When the assembler does not supports explicit relocation hint, we can't use +# it. Disable it if the compiler supports it. +# +# If you've seen "unknown reloc hint" message building the kernel and you are +# now wondering why "-mexplicit-relocs" is not wrapped with cc-option: the +# combination of a "new" assembler and "old" compiler is not supported. Either +# upgrade the compiler or downgrade the assembler. +ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS +cflags-y += -mexplicit-relocs +KBUILD_CFLAGS_KERNEL += -mdirect-extern-access +else +cflags-y += $(call cc-option,-mno-explicit-relocs) KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel KBUILD_CFLAGS_KERNEL += -Wa,-mla-global-with-pcrel KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs +endif cflags-y += -ffreestanding cflags-y += $(call cc-option, -mno-check-zero-division) +ifndef CONFIG_PHYSICAL_START load-y = 0x9000000000200000 +else +load-y = $(CONFIG_PHYSICAL_START) +endif bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) drivers-$(CONFIG_PCI) += arch/loongarch/pci/ @@ -72,9 +101,8 @@ CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif -head-y := arch/loongarch/kernel/head.o - libs-y += arch/loongarch/lib/ +libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare @@ -86,13 +114,13 @@ PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/loongarch/vdso $@ -all: $(KBUILD_IMAGE) +all: $(notdir $(KBUILD_IMAGE)) -$(KBUILD_IMAGE): vmlinux - $(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $@ +vmlinux.elf vmlinux.efi vmlinuz.efi: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $(boot)/$@ install: - $(Q)install -D -m 755 $(KBUILD_IMAGE) $(INSTALL_PATH)/vmlinux-$(KERNELRELEASE) + $(Q)install -D -m 755 $(KBUILD_IMAGE) $(INSTALL_PATH)/$(image-name-y)-$(KERNELRELEASE) $(Q)install -D -m 644 .config $(INSTALL_PATH)/config-$(KERNELRELEASE) $(Q)install -D -m 644 System.map $(INSTALL_PATH)/System.map-$(KERNELRELEASE) diff --git a/arch/loongarch/boot/.gitignore b/arch/loongarch/boot/.gitignore index 49423ee96ef3..e5dc594dc4b6 100644 --- a/arch/loongarch/boot/.gitignore +++ b/arch/loongarch/boot/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only vmlinux* +vmlinuz* diff --git a/arch/loongarch/boot/Makefile b/arch/loongarch/boot/Makefile index 0125b17edc98..4e1c374c5782 100644 --- a/arch/loongarch/boot/Makefile +++ b/arch/loongarch/boot/Makefile @@ -8,9 +8,19 @@ drop-sections := .comment .note .options .note.gnu.build-id strip-flags := $(addprefix --remove-section=,$(drop-sections)) -S OBJCOPYFLAGS_vmlinux.efi := -O binary $(strip-flags) -targets := vmlinux quiet_cmd_strip = STRIP $@ cmd_strip = $(STRIP) -s -o $@ $< -$(obj)/vmlinux: vmlinux FORCE +targets := vmlinux.elf +$(obj)/vmlinux.elf: vmlinux FORCE $(call if_changed,strip) + +targets += vmlinux.efi +$(obj)/vmlinux.efi: vmlinux FORCE + $(call if_changed,objcopy) + +EFI_ZBOOT_PAYLOAD := vmlinux.efi +EFI_ZBOOT_BFD_TARGET := elf64-loongarch +EFI_ZBOOT_MACH_TYPE := LOONGARCH64 + +include $(srctree)/drivers/firmware/efi/libstub/Makefile.zboot diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 3712552e18d3..3540e9c0a631 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -4,6 +4,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BPF_SYSCALL=y +CONFIG_BPF_JIT=y CONFIG_PREEMPT=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y @@ -45,6 +46,7 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y CONFIG_NR_CPUS=64 CONFIG_NUMA=y +CONFIG_KEXEC=y CONFIG_PAGE_SIZE_16KB=y CONFIG_HZ_250=y CONFIG_ACPI=y @@ -55,6 +57,7 @@ CONFIG_ACPI_DOCK=y CONFIG_ACPI_IPMI=m CONFIG_ACPI_PCI_SLOT=y CONFIG_ACPI_HOTPLUG_MEMORY=y +CONFIG_EFI_ZBOOT=y CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y CONFIG_EFI_CAPSULE_LOADER=m CONFIG_EFI_TEST=m @@ -65,6 +68,8 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_PARTITION_ADVANCED=y +CONFIG_BSD_DISKLABEL=y +CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BINFMT_MISC=m @@ -82,8 +87,11 @@ CONFIG_ZSMALLOC=m CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y +CONFIG_TLS=m +CONFIG_TLS_DEVICE=y CONFIG_XFRM_USER=y CONFIG_NET_KEY=y +CONFIG_XDP_SOCKETS=y CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -95,6 +103,7 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE_DEMUX=m CONFIG_IP_MROUTE=y CONFIG_INET_ESP=m CONFIG_INET_UDP_DIAG=y @@ -102,6 +111,7 @@ CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y +CONFIG_INET6_ESP=m CONFIG_IPV6_MROUTE=y CONFIG_NETWORK_PHY_TIMESTAMPING=y CONFIG_NETFILTER=y @@ -112,10 +122,11 @@ CONFIG_NF_LOG_NETDEV=m CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_SNMP=m +CONFIG_NF_CONNTRACK_PPTP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_TABLES=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -200,7 +211,6 @@ CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_LOG_ARP=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -254,10 +264,14 @@ CONFIG_BPFILTER=y CONFIG_IP_SCTP=m CONFIG_RDS=y CONFIG_L2TP=m +CONFIG_L2TP_V3=y +CONFIG_L2TP_IP=m +CONFIG_L2TP_ETH=m CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y CONFIG_VLAN_8021Q_MVRP=y +CONFIG_LLC2=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_PRIO=m @@ -282,9 +296,33 @@ CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=y CONFIG_CGROUP_NET_PRIO=y +CONFIG_BPF_STREAM_PARSER=y CONFIG_BT=m +CONFIG_BT_RFCOMM=m +CONFIG_BT_RFCOMM_TTY=y +CONFIG_BT_BNEP=m +CONFIG_BT_BNEP_MC_FILTER=y +CONFIG_BT_BNEP_PROTO_FILTER=y +CONFIG_BT_HIDP=m +CONFIG_BT_HS=y CONFIG_BT_HCIBTUSB=m -# CONFIG_BT_HCIBTUSB_BCM is not set +CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y +CONFIG_BT_HCIBTUSB_MTK=y +CONFIG_BT_HCIUART=m +CONFIG_BT_HCIUART_BCSP=y +CONFIG_BT_HCIUART_ATH3K=y +CONFIG_BT_HCIUART_INTEL=y +CONFIG_BT_HCIUART_AG6XX=y +CONFIG_BT_HCIBCM203X=m +CONFIG_BT_HCIBPA10X=m +CONFIG_BT_HCIBFUSB=m +CONFIG_BT_HCIDTL1=m +CONFIG_BT_HCIBT3C=m +CONFIG_BT_HCIBLUECARD=m +CONFIG_BT_HCIVHCI=m +CONFIG_BT_MRVL=m +CONFIG_BT_ATH3K=m +CONFIG_BT_VIRTIO=m CONFIG_CFG80211=m CONFIG_CFG80211_WEXT=y CONFIG_MAC80211=m @@ -329,7 +367,6 @@ CONFIG_PARPORT_PC_FIFO=y CONFIG_ZRAM=m CONFIG_ZRAM_DEF_COMP_ZSTD=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=y CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 @@ -486,6 +523,7 @@ CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=m CONFIG_PPP_MULTILINK=y CONFIG_PPPOE=m +CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m @@ -505,7 +543,6 @@ CONFIG_ATH9K_HTC=m CONFIG_IWLWIFI=m CONFIG_IWLDVM=m CONFIG_IWLMVM=m -CONFIG_IWLWIFI_BCAST_FILTERING=y CONFIG_HOSTAP=m CONFIG_MT7601U=m CONFIG_RT2X00=m @@ -521,6 +558,14 @@ CONFIG_RTL8821AE=m CONFIG_RTL8192CU=m # CONFIG_RTLWIFI_DEBUG is not set CONFIG_RTL8XXXU=m +CONFIG_RTW88=m +CONFIG_RTW88_8822BE=m +CONFIG_RTW88_8822CE=m +CONFIG_RTW88_8723DE=m +CONFIG_RTW88_8821CE=m +CONFIG_RTW89=m +CONFIG_RTW89_8852AE=m +CONFIG_RTW89_8852CE=m CONFIG_ZD1211RW=m CONFIG_USB_NET_RNDIS_WLAN=m CONFIG_INPUT_MOUSEDEV=y @@ -651,6 +696,11 @@ CONFIG_USB_SERIAL_FTDI_SIO=m CONFIG_USB_SERIAL_PL2303=m CONFIG_USB_SERIAL_OPTION=m CONFIG_USB_GADGET=y +CONFIG_TYPEC=m +CONFIG_TYPEC_TCPM=m +CONFIG_TYPEC_TCPCI=m +CONFIG_TYPEC_UCSI=m +CONFIG_UCSI_ACPI=m CONFIG_INFINIBAND=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y @@ -688,7 +738,6 @@ CONFIG_COMEDI_NI_PCIDIO=m CONFIG_COMEDI_NI_PCIMIO=m CONFIG_STAGING=y CONFIG_R8188EU=m -# CONFIG_88EU_AP_MODE is not set CONFIG_PM_DEVFREQ=y CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y CONFIG_DEVFREQ_GOV_PERFORMANCE=y @@ -772,14 +821,12 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 83bc0681e72b..77ad8e6f0906 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -1,12 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += dma-contiguous.h generic-y += export.h +generic-y += mcs_spinlock.h generic-y += parport.h generic-y += early_ioremap.h generic-y += qrwlock.h -generic-y += qrwlock_types.h -generic-y += spinlock.h -generic-y += spinlock_types.h +generic-y += qspinlock.h generic-y += rwsem.h generic-y += segment.h generic-y += user.h @@ -21,7 +20,6 @@ generic-y += shmbuf.h generic-y += statfs.h generic-y += socket.h generic-y += sockios.h -generic-y += termios.h generic-y += termbits.h generic-y += poll.h generic-y += param.h diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index e02ac4af7f6e..ed0910e8b856 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -36,8 +36,13 @@ struct loongson_system_configuration { }; extern u64 efi_system_table; -extern unsigned long fw_arg0, fw_arg1; +extern unsigned long fw_arg0, fw_arg1, fw_arg2; extern struct loongson_board_info b_info; extern struct loongson_system_configuration loongson_sysconf; +static inline bool io_master(int cpu) +{ + return test_bit(cpu, &loongson_sysconf.cores_io_master); +} + #endif /* _ASM_BOOTINFO_H */ diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h index bda49108a76d..d4ca3ba25418 100644 --- a/arch/loongarch/include/asm/bug.h +++ b/arch/loongarch/include/asm/bug.h @@ -2,21 +2,59 @@ #ifndef __ASM_BUG_H #define __ASM_BUG_H -#include <linux/compiler.h> +#include <asm/break.h> +#include <linux/stringify.h> + +#ifndef CONFIG_DEBUG_BUGVERBOSE +#define _BUGVERBOSE_LOCATION(file, line) +#else +#define __BUGVERBOSE_LOCATION(file, line) \ + .pushsection .rodata.str, "aMS", @progbits, 1; \ + 10002: .string file; \ + .popsection; \ + \ + .long 10002b - .; \ + .short line; +#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line) +#endif -#ifdef CONFIG_BUG +#ifndef CONFIG_GENERIC_BUG +#define __BUG_ENTRY(flags) +#else +#define __BUG_ENTRY(flags) \ + .pushsection __bug_table, "aw"; \ + .align 2; \ + 10000: .long 10001f - .; \ + _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ + .short flags; \ + .popsection; \ + 10001: +#endif -#include <asm/break.h> +#define ASM_BUG_FLAGS(flags) \ + __BUG_ENTRY(flags) \ + break BRK_BUG -static inline void __noreturn BUG(void) -{ - __asm__ __volatile__("break %0" : : "i" (BRK_BUG)); - unreachable(); -} +#define ASM_BUG() ASM_BUG_FLAGS(0) -#define HAVE_ARCH_BUG +#define __BUG_FLAGS(flags) \ + asm_inline volatile (__stringify(ASM_BUG_FLAGS(flags))); -#endif +#define __WARN_FLAGS(flags) \ +do { \ + instrumentation_begin(); \ + __BUG_FLAGS(BUGFLAG_WARNING|(flags)); \ + instrumentation_end(); \ +} while (0) + +#define BUG() \ +do { \ + instrumentation_begin(); \ + __BUG_FLAGS(0); \ + unreachable(); \ +} while (0) + +#define HAVE_ARCH_BUG #include <asm-generic/bug.h> diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h index 670900141b7c..0681788eb474 100644 --- a/arch/loongarch/include/asm/cacheflush.h +++ b/arch/loongarch/include/asm/cacheflush.h @@ -6,10 +6,33 @@ #define _ASM_CACHEFLUSH_H #include <linux/mm.h> -#include <asm/cpu-features.h> +#include <asm/cpu-info.h> #include <asm/cacheops.h> -extern void local_flush_icache_range(unsigned long start, unsigned long end); +static inline bool cache_present(struct cache_desc *cdesc) +{ + return cdesc->flags & CACHE_PRESENT; +} + +static inline bool cache_private(struct cache_desc *cdesc) +{ + return cdesc->flags & CACHE_PRIVATE; +} + +static inline bool cache_inclusive(struct cache_desc *cdesc) +{ + return cdesc->flags & CACHE_INCLUSIVE; +} + +static inline unsigned int cpu_last_level_cache_line_size(void) +{ + int cache_present = boot_cpu_data.cache_leaves_present; + + return boot_cpu_data.cache_leaves[cache_present - 1].linesz; +} + +asmlinkage void __flush_cache_all(void); +void local_flush_icache_range(unsigned long start, unsigned long end); #define flush_icache_range local_flush_icache_range #define flush_icache_user_range local_flush_icache_range @@ -35,44 +58,30 @@ extern void local_flush_icache_range(unsigned long start, unsigned long end); : \ : "i" (op), "ZC" (*(unsigned char *)(addr))) -static inline void flush_icache_line_indexed(unsigned long addr) -{ - cache_op(Index_Invalidate_I, addr); -} - -static inline void flush_dcache_line_indexed(unsigned long addr) -{ - cache_op(Index_Writeback_Inv_D, addr); -} - -static inline void flush_vcache_line_indexed(unsigned long addr) -{ - cache_op(Index_Writeback_Inv_V, addr); -} - -static inline void flush_scache_line_indexed(unsigned long addr) -{ - cache_op(Index_Writeback_Inv_S, addr); -} - -static inline void flush_icache_line(unsigned long addr) -{ - cache_op(Hit_Invalidate_I, addr); -} - -static inline void flush_dcache_line(unsigned long addr) -{ - cache_op(Hit_Writeback_Inv_D, addr); -} - -static inline void flush_vcache_line(unsigned long addr) -{ - cache_op(Hit_Writeback_Inv_V, addr); -} - -static inline void flush_scache_line(unsigned long addr) +static inline void flush_cache_line(int leaf, unsigned long addr) { - cache_op(Hit_Writeback_Inv_S, addr); + switch (leaf) { + case Cache_LEAF0: + cache_op(Index_Writeback_Inv_LEAF0, addr); + break; + case Cache_LEAF1: + cache_op(Index_Writeback_Inv_LEAF1, addr); + break; + case Cache_LEAF2: + cache_op(Index_Writeback_Inv_LEAF2, addr); + break; + case Cache_LEAF3: + cache_op(Index_Writeback_Inv_LEAF3, addr); + break; + case Cache_LEAF4: + cache_op(Index_Writeback_Inv_LEAF4, addr); + break; + case Cache_LEAF5: + cache_op(Index_Writeback_Inv_LEAF5, addr); + break; + default: + break; + } } #include <asm-generic/cacheflush.h> diff --git a/arch/loongarch/include/asm/cacheops.h b/arch/loongarch/include/asm/cacheops.h index dc280efecebd..0f4a86f8e2be 100644 --- a/arch/loongarch/include/asm/cacheops.h +++ b/arch/loongarch/include/asm/cacheops.h @@ -8,16 +8,18 @@ #define __ASM_CACHEOPS_H /* - * Most cache ops are split into a 2 bit field identifying the cache, and a 3 + * Most cache ops are split into a 3 bit field identifying the cache, and a 2 * bit field identifying the cache operation. */ -#define CacheOp_Cache 0x03 -#define CacheOp_Op 0x1c +#define CacheOp_Cache 0x07 +#define CacheOp_Op 0x18 -#define Cache_I 0x00 -#define Cache_D 0x01 -#define Cache_V 0x02 -#define Cache_S 0x03 +#define Cache_LEAF0 0x00 +#define Cache_LEAF1 0x01 +#define Cache_LEAF2 0x02 +#define Cache_LEAF3 0x03 +#define Cache_LEAF4 0x04 +#define Cache_LEAF5 0x05 #define Index_Invalidate 0x08 #define Index_Writeback_Inv 0x08 @@ -25,13 +27,17 @@ #define Hit_Writeback_Inv 0x10 #define CacheOp_User_Defined 0x18 -#define Index_Invalidate_I (Cache_I | Index_Invalidate) -#define Index_Writeback_Inv_D (Cache_D | Index_Writeback_Inv) -#define Index_Writeback_Inv_V (Cache_V | Index_Writeback_Inv) -#define Index_Writeback_Inv_S (Cache_S | Index_Writeback_Inv) -#define Hit_Invalidate_I (Cache_I | Hit_Invalidate) -#define Hit_Writeback_Inv_D (Cache_D | Hit_Writeback_Inv) -#define Hit_Writeback_Inv_V (Cache_V | Hit_Writeback_Inv) -#define Hit_Writeback_Inv_S (Cache_S | Hit_Writeback_Inv) +#define Index_Writeback_Inv_LEAF0 (Cache_LEAF0 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF1 (Cache_LEAF1 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF2 (Cache_LEAF2 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF3 (Cache_LEAF3 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF4 (Cache_LEAF4 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF5 (Cache_LEAF5 | Index_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF0 (Cache_LEAF0 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF1 (Cache_LEAF1 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF2 (Cache_LEAF2 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF3 (Cache_LEAF3 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF4 (Cache_LEAF4 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF5 (Cache_LEAF5 | Hit_Writeback_Inv) #endif /* __ASM_CACHEOPS_H */ diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h index ae19e33c7754..ecfa6cf79806 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -61,8 +61,8 @@ static inline unsigned int __xchg_small(volatile void *ptr, unsigned int val, return (old32 & mask) >> shift; } -static inline unsigned long __xchg(volatile void *ptr, unsigned long x, - int size) +static __always_inline unsigned long +__xchg(volatile void *ptr, unsigned long x, int size) { switch (size) { case 1: @@ -159,8 +159,8 @@ static inline unsigned int __cmpxchg_small(volatile void *ptr, unsigned int old, return (old32 & mask) >> shift; } -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, unsigned int size) +static __always_inline unsigned long +__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { case 1: diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index a8d87c40a0eb..b07974218393 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -19,11 +19,6 @@ #define cpu_has_loongarch32 (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_32BIT) #define cpu_has_loongarch64 (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT) -#define cpu_icache_line_size() cpu_data[0].icache.linesz -#define cpu_dcache_line_size() cpu_data[0].dcache.linesz -#define cpu_vcache_line_size() cpu_data[0].vcache.linesz -#define cpu_scache_line_size() cpu_data[0].scache.linesz - #ifdef CONFIG_32BIT # define cpu_has_64bits (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT) # define cpu_vabits 31 diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h index b6c4f96079df..cd73a6f57fe3 100644 --- a/arch/loongarch/include/asm/cpu-info.h +++ b/arch/loongarch/include/asm/cpu-info.h @@ -10,18 +10,28 @@ #include <asm/loongarch.h> +/* cache_desc->flags */ +enum { + CACHE_PRESENT = (1 << 0), + CACHE_PRIVATE = (1 << 1), /* core private cache */ + CACHE_INCLUSIVE = (1 << 2), /* include the inner level caches */ +}; + /* * Descriptor for a cache */ struct cache_desc { - unsigned int waysize; /* Bytes per way */ + unsigned char type; + unsigned char level; unsigned short sets; /* Number of lines per set */ unsigned char ways; /* Number of ways */ unsigned char linesz; /* Size of line in bytes */ - unsigned char waybit; /* Bits to select in a cache set */ unsigned char flags; /* Flags describing cache properties */ }; +#define CACHE_LEVEL_MAX 3 +#define CACHE_LEAVES_MAX 6 + struct cpuinfo_loongarch { u64 asid_cache; unsigned long asid_mask; @@ -40,11 +50,8 @@ struct cpuinfo_loongarch { int tlbsizemtlb; int tlbsizestlbsets; int tlbsizestlbways; - struct cache_desc icache; /* Primary I-cache */ - struct cache_desc dcache; /* Primary D or combined I/D cache */ - struct cache_desc vcache; /* Victim cache, between pcache and scache */ - struct cache_desc scache; /* Secondary cache */ - struct cache_desc tcache; /* Tertiary/split secondary cache */ + int cache_leaves_present; /* number of cache_leaves[] elements */ + struct cache_desc cache_leaves[CACHE_LEAVES_MAX]; int core; /* physical core number in package */ int package;/* physical package number */ int vabits; /* Virtual Address size in bits */ diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h index 9d44c6948be1..174567b00ddb 100644 --- a/arch/loongarch/include/asm/efi.h +++ b/arch/loongarch/include/asm/efi.h @@ -17,9 +17,16 @@ void efifb_setup_from_dmi(struct screen_info *si, const char *opt); #define arch_efi_call_virt_teardown() #define EFI_ALLOC_ALIGN SZ_64K +#define EFI_RT_VIRTUAL_OFFSET CSR_DMW0_BASE -struct screen_info *alloc_screen_info(void); -void free_screen_info(struct screen_info *si); +static inline struct screen_info *alloc_screen_info(void) +{ + return &screen_info; +} + +static inline void free_screen_info(struct screen_info *si) +{ +} static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) { diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h index 5f3ff4781fda..7af0cebf28d7 100644 --- a/arch/loongarch/include/asm/elf.h +++ b/arch/loongarch/include/asm/elf.h @@ -74,6 +74,43 @@ #define R_LARCH_SUB64 56 #define R_LARCH_GNU_VTINHERIT 57 #define R_LARCH_GNU_VTENTRY 58 +#define R_LARCH_B16 64 +#define R_LARCH_B21 65 +#define R_LARCH_B26 66 +#define R_LARCH_ABS_HI20 67 +#define R_LARCH_ABS_LO12 68 +#define R_LARCH_ABS64_LO20 69 +#define R_LARCH_ABS64_HI12 70 +#define R_LARCH_PCALA_HI20 71 +#define R_LARCH_PCALA_LO12 72 +#define R_LARCH_PCALA64_LO20 73 +#define R_LARCH_PCALA64_HI12 74 +#define R_LARCH_GOT_PC_HI20 75 +#define R_LARCH_GOT_PC_LO12 76 +#define R_LARCH_GOT64_PC_LO20 77 +#define R_LARCH_GOT64_PC_HI12 78 +#define R_LARCH_GOT_HI20 79 +#define R_LARCH_GOT_LO12 80 +#define R_LARCH_GOT64_LO20 81 +#define R_LARCH_GOT64_HI12 82 +#define R_LARCH_TLS_LE_HI20 83 +#define R_LARCH_TLS_LE_LO12 84 +#define R_LARCH_TLS_LE64_LO20 85 +#define R_LARCH_TLS_LE64_HI12 86 +#define R_LARCH_TLS_IE_PC_HI20 87 +#define R_LARCH_TLS_IE_PC_LO12 88 +#define R_LARCH_TLS_IE64_PC_LO20 89 +#define R_LARCH_TLS_IE64_PC_HI12 90 +#define R_LARCH_TLS_IE_HI20 91 +#define R_LARCH_TLS_IE_LO12 92 +#define R_LARCH_TLS_IE64_LO20 93 +#define R_LARCH_TLS_IE64_HI12 94 +#define R_LARCH_TLS_LD_PC_HI20 95 +#define R_LARCH_TLS_LD_HI20 96 +#define R_LARCH_TLS_GD_PC_HI20 97 +#define R_LARCH_TLS_GD_HI20 98 +#define R_LARCH_32_PCREL 99 +#define R_LARCH_RELAX 100 #ifndef ELF_ARCH diff --git a/arch/loongarch/include/asm/fixmap.h b/arch/loongarch/include/asm/fixmap.h index b3541dfa2013..d2e55ae55bb9 100644 --- a/arch/loongarch/include/asm/fixmap.h +++ b/arch/loongarch/include/asm/fixmap.h @@ -10,4 +10,19 @@ #define NR_FIX_BTMAPS 64 +enum fixed_addresses { + FIX_HOLE, + FIX_EARLYCON_MEM_BASE, + __end_of_fixed_addresses +}; + +#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXMAP_PAGE_IO PAGE_KERNEL_SUC + +extern void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags); + +#include <asm-generic/fixmap.h> + #endif diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 7b07cbb3188c..fce1843ceebb 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -8,6 +8,8 @@ #include <linux/types.h> #include <asm/asm.h> +#define INSN_BREAK 0x002a0000 + #define ADDR_IMMMASK_LU52ID 0xFFF0000000000000 #define ADDR_IMMMASK_LU32ID 0x000FFFFF00000000 #define ADDR_IMMMASK_ADDU16ID 0x00000000FFFF0000 @@ -18,9 +20,16 @@ #define ADDR_IMM(addr, INSN) ((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN) +enum reg0i26_op { + b_op = 0x14, + bl_op = 0x15, +}; + enum reg1i20_op { lu12iw_op = 0x0a, lu32id_op = 0x0b, + pcaddu12i_op = 0x0e, + pcaddu18i_op = 0x0f, }; enum reg1i21_op { @@ -28,10 +37,34 @@ enum reg1i21_op { bnez_op = 0x11, }; +enum reg2_op { + revb2h_op = 0x0c, + revb4h_op = 0x0d, + revb2w_op = 0x0e, + revbd_op = 0x0f, + revh2w_op = 0x10, + revhd_op = 0x11, +}; + +enum reg2i5_op { + slliw_op = 0x81, + srliw_op = 0x89, + sraiw_op = 0x91, +}; + +enum reg2i6_op { + sllid_op = 0x41, + srlid_op = 0x45, + sraid_op = 0x49, +}; + enum reg2i12_op { addiw_op = 0x0a, addid_op = 0x0b, lu52id_op = 0x0c, + andi_op = 0x0d, + ori_op = 0x0e, + xori_op = 0x0f, ldb_op = 0xa0, ldh_op = 0xa1, ldw_op = 0xa2, @@ -40,6 +73,20 @@ enum reg2i12_op { sth_op = 0xa5, stw_op = 0xa6, std_op = 0xa7, + ldbu_op = 0xa8, + ldhu_op = 0xa9, + ldwu_op = 0xaa, +}; + +enum reg2i14_op { + llw_op = 0x20, + scw_op = 0x21, + lld_op = 0x22, + scd_op = 0x23, + ldptrw_op = 0x24, + stptrw_op = 0x25, + ldptrd_op = 0x26, + stptrd_op = 0x27, }; enum reg2i16_op { @@ -52,6 +99,71 @@ enum reg2i16_op { bgeu_op = 0x1b, }; +enum reg2bstrd_op { + bstrinsd_op = 0x2, + bstrpickd_op = 0x3, +}; + +enum reg3_op { + addw_op = 0x20, + addd_op = 0x21, + subw_op = 0x22, + subd_op = 0x23, + nor_op = 0x28, + and_op = 0x29, + or_op = 0x2a, + xor_op = 0x2b, + orn_op = 0x2c, + andn_op = 0x2d, + sllw_op = 0x2e, + srlw_op = 0x2f, + sraw_op = 0x30, + slld_op = 0x31, + srld_op = 0x32, + srad_op = 0x33, + mulw_op = 0x38, + mulhw_op = 0x39, + mulhwu_op = 0x3a, + muld_op = 0x3b, + mulhd_op = 0x3c, + mulhdu_op = 0x3d, + divw_op = 0x40, + modw_op = 0x41, + divwu_op = 0x42, + modwu_op = 0x43, + divd_op = 0x44, + modd_op = 0x45, + divdu_op = 0x46, + moddu_op = 0x47, + ldxb_op = 0x7000, + ldxh_op = 0x7008, + ldxw_op = 0x7010, + ldxd_op = 0x7018, + stxb_op = 0x7020, + stxh_op = 0x7028, + stxw_op = 0x7030, + stxd_op = 0x7038, + ldxbu_op = 0x7040, + ldxhu_op = 0x7048, + ldxwu_op = 0x7050, + amswapw_op = 0x70c0, + amswapd_op = 0x70c1, + amaddw_op = 0x70c2, + amaddd_op = 0x70c3, + amandw_op = 0x70c4, + amandd_op = 0x70c5, + amorw_op = 0x70c6, + amord_op = 0x70c7, + amxorw_op = 0x70c8, + amxord_op = 0x70c9, +}; + +enum reg3sa2_op { + alslw_op = 0x02, + alslwu_op = 0x03, + alsld_op = 0x16, +}; + struct reg0i26_format { unsigned int immediate_h : 10; unsigned int immediate_l : 16; @@ -71,6 +183,26 @@ struct reg1i21_format { unsigned int opcode : 6; }; +struct reg2_format { + unsigned int rd : 5; + unsigned int rj : 5; + unsigned int opcode : 22; +}; + +struct reg2i5_format { + unsigned int rd : 5; + unsigned int rj : 5; + unsigned int immediate : 5; + unsigned int opcode : 17; +}; + +struct reg2i6_format { + unsigned int rd : 5; + unsigned int rj : 5; + unsigned int immediate : 6; + unsigned int opcode : 16; +}; + struct reg2i12_format { unsigned int rd : 5; unsigned int rj : 5; @@ -78,6 +210,13 @@ struct reg2i12_format { unsigned int opcode : 10; }; +struct reg2i14_format { + unsigned int rd : 5; + unsigned int rj : 5; + unsigned int immediate : 14; + unsigned int opcode : 8; +}; + struct reg2i16_format { unsigned int rd : 5; unsigned int rj : 5; @@ -85,13 +224,43 @@ struct reg2i16_format { unsigned int opcode : 6; }; +struct reg2bstrd_format { + unsigned int rd : 5; + unsigned int rj : 5; + unsigned int lsbd : 6; + unsigned int msbd : 6; + unsigned int opcode : 10; +}; + +struct reg3_format { + unsigned int rd : 5; + unsigned int rj : 5; + unsigned int rk : 5; + unsigned int opcode : 17; +}; + +struct reg3sa2_format { + unsigned int rd : 5; + unsigned int rj : 5; + unsigned int rk : 5; + unsigned int immediate : 2; + unsigned int opcode : 15; +}; + union loongarch_instruction { unsigned int word; - struct reg0i26_format reg0i26_format; - struct reg1i20_format reg1i20_format; - struct reg1i21_format reg1i21_format; - struct reg2i12_format reg2i12_format; - struct reg2i16_format reg2i16_format; + struct reg0i26_format reg0i26_format; + struct reg1i20_format reg1i20_format; + struct reg1i21_format reg1i21_format; + struct reg2_format reg2_format; + struct reg2i5_format reg2i5_format; + struct reg2i6_format reg2i6_format; + struct reg2i12_format reg2i12_format; + struct reg2i14_format reg2i14_format; + struct reg2i16_format reg2i16_format; + struct reg2bstrd_format reg2bstrd_format; + struct reg3_format reg3_format; + struct reg3sa2_format reg3sa2_format; }; #define LOONGARCH_INSN_SIZE sizeof(union loongarch_instruction) @@ -166,4 +335,235 @@ u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest); +static inline bool signed_imm_check(long val, unsigned int bit) +{ + return -(1L << (bit - 1)) <= val && val < (1L << (bit - 1)); +} + +static inline bool unsigned_imm_check(unsigned long val, unsigned int bit) +{ + return val < (1UL << bit); +} + +#define DEF_EMIT_REG0I26_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + int offset) \ +{ \ + unsigned int immediate_l, immediate_h; \ + \ + immediate_l = offset & 0xffff; \ + offset >>= 16; \ + immediate_h = offset & 0x3ff; \ + \ + insn->reg0i26_format.opcode = OP; \ + insn->reg0i26_format.immediate_l = immediate_l; \ + insn->reg0i26_format.immediate_h = immediate_h; \ +} + +DEF_EMIT_REG0I26_FORMAT(b, b_op) + +#define DEF_EMIT_REG1I20_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, int imm) \ +{ \ + insn->reg1i20_format.opcode = OP; \ + insn->reg1i20_format.immediate = imm; \ + insn->reg1i20_format.rd = rd; \ +} + +DEF_EMIT_REG1I20_FORMAT(lu12iw, lu12iw_op) +DEF_EMIT_REG1I20_FORMAT(lu32id, lu32id_op) +DEF_EMIT_REG1I20_FORMAT(pcaddu18i, pcaddu18i_op) + +#define DEF_EMIT_REG2_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj) \ +{ \ + insn->reg2_format.opcode = OP; \ + insn->reg2_format.rd = rd; \ + insn->reg2_format.rj = rj; \ +} + +DEF_EMIT_REG2_FORMAT(revb2h, revb2h_op) +DEF_EMIT_REG2_FORMAT(revb2w, revb2w_op) +DEF_EMIT_REG2_FORMAT(revbd, revbd_op) + +#define DEF_EMIT_REG2I5_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int imm) \ +{ \ + insn->reg2i5_format.opcode = OP; \ + insn->reg2i5_format.immediate = imm; \ + insn->reg2i5_format.rd = rd; \ + insn->reg2i5_format.rj = rj; \ +} + +DEF_EMIT_REG2I5_FORMAT(slliw, slliw_op) +DEF_EMIT_REG2I5_FORMAT(srliw, srliw_op) +DEF_EMIT_REG2I5_FORMAT(sraiw, sraiw_op) + +#define DEF_EMIT_REG2I6_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int imm) \ +{ \ + insn->reg2i6_format.opcode = OP; \ + insn->reg2i6_format.immediate = imm; \ + insn->reg2i6_format.rd = rd; \ + insn->reg2i6_format.rj = rj; \ +} + +DEF_EMIT_REG2I6_FORMAT(sllid, sllid_op) +DEF_EMIT_REG2I6_FORMAT(srlid, srlid_op) +DEF_EMIT_REG2I6_FORMAT(sraid, sraid_op) + +#define DEF_EMIT_REG2I12_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int imm) \ +{ \ + insn->reg2i12_format.opcode = OP; \ + insn->reg2i12_format.immediate = imm; \ + insn->reg2i12_format.rd = rd; \ + insn->reg2i12_format.rj = rj; \ +} + +DEF_EMIT_REG2I12_FORMAT(addiw, addiw_op) +DEF_EMIT_REG2I12_FORMAT(addid, addid_op) +DEF_EMIT_REG2I12_FORMAT(lu52id, lu52id_op) +DEF_EMIT_REG2I12_FORMAT(andi, andi_op) +DEF_EMIT_REG2I12_FORMAT(ori, ori_op) +DEF_EMIT_REG2I12_FORMAT(xori, xori_op) +DEF_EMIT_REG2I12_FORMAT(ldbu, ldbu_op) +DEF_EMIT_REG2I12_FORMAT(ldhu, ldhu_op) +DEF_EMIT_REG2I12_FORMAT(ldwu, ldwu_op) +DEF_EMIT_REG2I12_FORMAT(ldd, ldd_op) +DEF_EMIT_REG2I12_FORMAT(stb, stb_op) +DEF_EMIT_REG2I12_FORMAT(sth, sth_op) +DEF_EMIT_REG2I12_FORMAT(stw, stw_op) +DEF_EMIT_REG2I12_FORMAT(std, std_op) + +#define DEF_EMIT_REG2I14_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int imm) \ +{ \ + insn->reg2i14_format.opcode = OP; \ + insn->reg2i14_format.immediate = imm; \ + insn->reg2i14_format.rd = rd; \ + insn->reg2i14_format.rj = rj; \ +} + +DEF_EMIT_REG2I14_FORMAT(llw, llw_op) +DEF_EMIT_REG2I14_FORMAT(scw, scw_op) +DEF_EMIT_REG2I14_FORMAT(lld, lld_op) +DEF_EMIT_REG2I14_FORMAT(scd, scd_op) +DEF_EMIT_REG2I14_FORMAT(ldptrw, ldptrw_op) +DEF_EMIT_REG2I14_FORMAT(stptrw, stptrw_op) +DEF_EMIT_REG2I14_FORMAT(ldptrd, ldptrd_op) +DEF_EMIT_REG2I14_FORMAT(stptrd, stptrd_op) + +#define DEF_EMIT_REG2I16_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rj, \ + enum loongarch_gpr rd, \ + int offset) \ +{ \ + insn->reg2i16_format.opcode = OP; \ + insn->reg2i16_format.immediate = offset; \ + insn->reg2i16_format.rj = rj; \ + insn->reg2i16_format.rd = rd; \ +} + +DEF_EMIT_REG2I16_FORMAT(beq, beq_op) +DEF_EMIT_REG2I16_FORMAT(bne, bne_op) +DEF_EMIT_REG2I16_FORMAT(blt, blt_op) +DEF_EMIT_REG2I16_FORMAT(bge, bge_op) +DEF_EMIT_REG2I16_FORMAT(bltu, bltu_op) +DEF_EMIT_REG2I16_FORMAT(bgeu, bgeu_op) +DEF_EMIT_REG2I16_FORMAT(jirl, jirl_op) + +#define DEF_EMIT_REG2BSTRD_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int msbd, \ + int lsbd) \ +{ \ + insn->reg2bstrd_format.opcode = OP; \ + insn->reg2bstrd_format.msbd = msbd; \ + insn->reg2bstrd_format.lsbd = lsbd; \ + insn->reg2bstrd_format.rj = rj; \ + insn->reg2bstrd_format.rd = rd; \ +} + +DEF_EMIT_REG2BSTRD_FORMAT(bstrpickd, bstrpickd_op) + +#define DEF_EMIT_REG3_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + enum loongarch_gpr rk) \ +{ \ + insn->reg3_format.opcode = OP; \ + insn->reg3_format.rd = rd; \ + insn->reg3_format.rj = rj; \ + insn->reg3_format.rk = rk; \ +} + +DEF_EMIT_REG3_FORMAT(addd, addd_op) +DEF_EMIT_REG3_FORMAT(subd, subd_op) +DEF_EMIT_REG3_FORMAT(muld, muld_op) +DEF_EMIT_REG3_FORMAT(divdu, divdu_op) +DEF_EMIT_REG3_FORMAT(moddu, moddu_op) +DEF_EMIT_REG3_FORMAT(and, and_op) +DEF_EMIT_REG3_FORMAT(or, or_op) +DEF_EMIT_REG3_FORMAT(xor, xor_op) +DEF_EMIT_REG3_FORMAT(sllw, sllw_op) +DEF_EMIT_REG3_FORMAT(slld, slld_op) +DEF_EMIT_REG3_FORMAT(srlw, srlw_op) +DEF_EMIT_REG3_FORMAT(srld, srld_op) +DEF_EMIT_REG3_FORMAT(sraw, sraw_op) +DEF_EMIT_REG3_FORMAT(srad, srad_op) +DEF_EMIT_REG3_FORMAT(ldxbu, ldxbu_op) +DEF_EMIT_REG3_FORMAT(ldxhu, ldxhu_op) +DEF_EMIT_REG3_FORMAT(ldxwu, ldxwu_op) +DEF_EMIT_REG3_FORMAT(ldxd, ldxd_op) +DEF_EMIT_REG3_FORMAT(stxb, stxb_op) +DEF_EMIT_REG3_FORMAT(stxh, stxh_op) +DEF_EMIT_REG3_FORMAT(stxw, stxw_op) +DEF_EMIT_REG3_FORMAT(stxd, stxd_op) +DEF_EMIT_REG3_FORMAT(amaddw, amaddw_op) +DEF_EMIT_REG3_FORMAT(amaddd, amaddd_op) +DEF_EMIT_REG3_FORMAT(amandw, amandw_op) +DEF_EMIT_REG3_FORMAT(amandd, amandd_op) +DEF_EMIT_REG3_FORMAT(amorw, amorw_op) +DEF_EMIT_REG3_FORMAT(amord, amord_op) +DEF_EMIT_REG3_FORMAT(amxorw, amxorw_op) +DEF_EMIT_REG3_FORMAT(amxord, amxord_op) +DEF_EMIT_REG3_FORMAT(amswapw, amswapw_op) +DEF_EMIT_REG3_FORMAT(amswapd, amswapd_op) + +#define DEF_EMIT_REG3SA2_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + enum loongarch_gpr rk, \ + int imm) \ +{ \ + insn->reg3sa2_format.opcode = OP; \ + insn->reg3sa2_format.immediate = imm; \ + insn->reg3sa2_format.rd = rd; \ + insn->reg3sa2_format.rj = rj; \ + insn->reg3sa2_format.rk = rk; \ +} + +DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op) + #endif /* _ASM_INST_H */ diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 999944ea1cea..402a7d9e3a53 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -27,71 +27,38 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size); #define early_memremap early_ioremap #define early_memunmap early_iounmap +#ifdef CONFIG_ARCH_IOREMAP + static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, unsigned long prot_val) { - if (prot_val == _CACHE_CC) + if (prot_val & _CACHE_CC) return (void __iomem *)(unsigned long)(CACHE_BASE + offset); else return (void __iomem *)(unsigned long)(UNCACHE_BASE + offset); } -/* - * ioremap - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - */ -#define ioremap(offset, size) \ - ioremap_prot((offset), (size), _CACHE_SUC) +#define ioremap(offset, size) \ + ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_SUC)) -/* - * ioremap_wc - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_wc performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * but accelerated by means of write-combining feature. It is specifically - * useful for PCIe prefetchable windows, which may vastly improve a - * communications performance. If it was determined on boot stage, what - * CPU CCA doesn't support WUC, the method shall fall-back to the - * _CACHE_SUC option (see cpu_probe() method). - */ -#define ioremap_wc(offset, size) \ - ioremap_prot((offset), (size), _CACHE_WUC) +#define iounmap(addr) ((void)(addr)) + +#endif /* - * ioremap_cache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map + * On LoongArch, ioremap() has two variants, ioremap_wc() and ioremap_cache(). + * They map bus memory into CPU space, the mapped memory is marked uncachable + * (_CACHE_SUC), uncachable but accelerated by write-combine (_CACHE_WUC) and + * cachable (_CACHE_CC) respectively for CPU access. * - * ioremap_cache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked cachable by - * the CPU. Also enables full write-combining. Useful for some - * memory-like regions on I/O busses. + * @offset: bus address of the memory + * @size: size of the resource to map */ -#define ioremap_cache(offset, size) \ - ioremap_prot((offset), (size), _CACHE_CC) +#define ioremap_wc(offset, size) \ + ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_WUC)) -static inline void iounmap(const volatile void __iomem *addr) -{ -} +#define ioremap_cache(offset, size) \ + ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL)) #define mmiowb() asm volatile ("dbar 0" ::: "memory") @@ -107,4 +74,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t #include <asm-generic/io.h> +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +extern int valid_phys_addr_range(phys_addr_t addr, size_t size); +extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); + #endif /* _ASM_IO_H */ diff --git a/arch/loongarch/include/asm/kexec.h b/arch/loongarch/include/asm/kexec.h new file mode 100644 index 000000000000..cf95cd3eb2de --- /dev/null +++ b/arch/loongarch/include/asm/kexec.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * kexec.h for kexec + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_KEXEC_H +#define _ASM_KEXEC_H + +#include <asm/stacktrace.h> +#include <asm/page.h> + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + /* Maximum address we can use for the control code buffer */ +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) + +/* Reserve a page for the control code buffer */ +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE + +/* The native architecture */ +#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH + +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + if (oldregs) + memcpy(newregs, oldregs, sizeof(*newregs)); + else + prepare_frametrace(newregs); +} + +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + unsigned long efi_boot; + unsigned long cmdline_ptr; + unsigned long systable_ptr; +}; + +typedef void (*do_kexec_t)(unsigned long efi_boot, + unsigned long cmdline_ptr, + unsigned long systable_ptr, + unsigned long start_addr, + unsigned long first_ind_entry); + +struct kimage; +extern const unsigned char relocate_new_kernel[]; +extern const size_t relocate_new_kernel_size; +extern void kexec_reboot(void); + +#ifdef CONFIG_SMP +extern atomic_t kexec_ready_to_reboot; +extern const unsigned char kexec_smp_wait[]; +#endif + +#endif /* !_ASM_KEXEC_H */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 3ba4f7e87cd2..7f8d57a61c8b 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -187,36 +187,15 @@ static inline u32 read_cpucfg(u32 reg) #define CPUCFG16_L3_DINCL BIT(16) #define LOONGARCH_CPUCFG17 0x11 -#define CPUCFG17_L1I_WAYS_M GENMASK(15, 0) -#define CPUCFG17_L1I_SETS_M GENMASK(23, 16) -#define CPUCFG17_L1I_SIZE_M GENMASK(30, 24) -#define CPUCFG17_L1I_WAYS 0 -#define CPUCFG17_L1I_SETS 16 -#define CPUCFG17_L1I_SIZE 24 - #define LOONGARCH_CPUCFG18 0x12 -#define CPUCFG18_L1D_WAYS_M GENMASK(15, 0) -#define CPUCFG18_L1D_SETS_M GENMASK(23, 16) -#define CPUCFG18_L1D_SIZE_M GENMASK(30, 24) -#define CPUCFG18_L1D_WAYS 0 -#define CPUCFG18_L1D_SETS 16 -#define CPUCFG18_L1D_SIZE 24 - #define LOONGARCH_CPUCFG19 0x13 -#define CPUCFG19_L2_WAYS_M GENMASK(15, 0) -#define CPUCFG19_L2_SETS_M GENMASK(23, 16) -#define CPUCFG19_L2_SIZE_M GENMASK(30, 24) -#define CPUCFG19_L2_WAYS 0 -#define CPUCFG19_L2_SETS 16 -#define CPUCFG19_L2_SIZE 24 - #define LOONGARCH_CPUCFG20 0x14 -#define CPUCFG20_L3_WAYS_M GENMASK(15, 0) -#define CPUCFG20_L3_SETS_M GENMASK(23, 16) -#define CPUCFG20_L3_SIZE_M GENMASK(30, 24) -#define CPUCFG20_L3_WAYS 0 -#define CPUCFG20_L3_SETS 16 -#define CPUCFG20_L3_SIZE 24 +#define CPUCFG_CACHE_WAYS_M GENMASK(15, 0) +#define CPUCFG_CACHE_SETS_M GENMASK(23, 16) +#define CPUCFG_CACHE_LSIZE_M GENMASK(30, 24) +#define CPUCFG_CACHE_WAYS 0 +#define CPUCFG_CACHE_SETS 16 +#define CPUCFG_CACHE_LSIZE 24 #define LOONGARCH_CPUCFG48 0x30 #define CPUCFG48_MCSR_LCK BIT(0) diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index 9f6718df1854..b29b19a46f42 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -17,10 +17,15 @@ struct mod_section { }; struct mod_arch_specific { + struct mod_section got; struct mod_section plt; struct mod_section plt_idx; }; +struct got_entry { + Elf_Addr symbol_addr; +}; + struct plt_entry { u32 inst_lu12iw; u32 inst_lu32id; @@ -29,10 +34,16 @@ struct plt_entry { }; struct plt_idx_entry { - unsigned long symbol_addr; + Elf_Addr symbol_addr; }; -Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val); +Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val); +Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val); + +static inline struct got_entry emit_got_entry(Elf_Addr val) +{ + return (struct got_entry) { val }; +} static inline struct plt_entry emit_plt_entry(unsigned long val) { @@ -77,4 +88,16 @@ static inline struct plt_entry *get_plt_entry(unsigned long val, return plt + plt_idx; } +static inline struct got_entry *get_got_entry(Elf_Addr val, + const struct mod_section *sec) +{ + struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr; + int i; + + for (i = 0; i < sec->num_entries; i++) + if (got[i].symbol_addr == val) + return &got[i]; + return NULL; +} + #endif /* _ASM_MODULE_H */ diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h index 31c1c0db11a3..a3d1bc0fcc72 100644 --- a/arch/loongarch/include/asm/module.lds.h +++ b/arch/loongarch/include/asm/module.lds.h @@ -2,6 +2,7 @@ /* Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ SECTIONS { . = ALIGN(4); + .got : { BYTE(0) } .plt : { BYTE(0) } .plt.idx : { BYTE(0) } } diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 0bd6b0110198..ad8d88494554 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -8,6 +8,15 @@ #include <asm/cmpxchg.h> #include <asm/loongarch.h> +/* + * The "address" (in fact, offset from $r21) of a per-CPU variable is close to + * the loading address of main kernel image, but far from where the modules are + * loaded. Tell the compiler this fact when using explicit relocs. + */ +#if defined(MODULE) && defined(CONFIG_AS_HAS_EXPLICIT_RELOCS) +#define PER_CPU_ATTRIBUTES __attribute__((model("extreme"))) +#endif + /* Use r21 for fast access */ register unsigned long __my_cpu_offset __asm__("$r21"); diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h index dcb3b17053a8..2a35a0bc2aaa 100644 --- a/arch/loongarch/include/asm/perf_event.h +++ b/arch/loongarch/include/asm/perf_event.h @@ -6,5 +6,7 @@ #ifndef __LOONGARCH_PERF_EVENT_H__ #define __LOONGARCH_PERF_EVENT_H__ -/* Nothing to show here; the file is required by linux/perf_event.h. */ + +#define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs + #endif /* __LOONGARCH_PERF_EVENT_H__ */ diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index 9ca147a29bab..3d1e0a69975a 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -83,8 +83,11 @@ _PAGE_GLOBAL | _PAGE_KERN | _CACHE_SUC) #define PAGE_KERNEL_WUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ _PAGE_GLOBAL | _PAGE_KERN | _CACHE_WUC) + #ifndef __ASSEMBLY__ +#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_SUC) + #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t _prot) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 8ea57e2f0e04..946704bee599 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -412,6 +412,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, __update_tlb(vma, address, ptep); } +#define __HAVE_ARCH_UPDATE_MMU_TLB +#define update_mmu_tlb update_mmu_cache + static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index 1c4b4308378d..6954dc5d24e9 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -176,9 +176,6 @@ struct thread_struct { struct task_struct; -/* Free all resources held by a thread. */ -#define release_thread(thread) do { } while (0) - enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_HALT, IDLE_NOMWAIT, IDLE_POLL}; extern unsigned long boot_option_idle_override; diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index 6d7d2a3e23dd..ca373f8e3c4d 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -13,7 +13,9 @@ extern unsigned long eentry; extern unsigned long tlbrentry; +extern void tlb_init(int cpu); extern void cpu_cache_init(void); +extern void cache_error_setup(void); extern void per_cpu_trap_init(int cpu); extern void set_handler(unsigned long offset, void *addr, unsigned long len); extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len); diff --git a/arch/loongarch/include/asm/spinlock.h b/arch/loongarch/include/asm/spinlock.h new file mode 100644 index 000000000000..7cb3476999be --- /dev/null +++ b/arch/loongarch/include/asm/spinlock.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#ifndef _ASM_SPINLOCK_H +#define _ASM_SPINLOCK_H + +#include <asm/processor.h> +#include <asm/qspinlock.h> +#include <asm/qrwlock.h> + +#endif /* _ASM_SPINLOCK_H */ diff --git a/arch/loongarch/include/asm/spinlock_types.h b/arch/loongarch/include/asm/spinlock_types.h new file mode 100644 index 000000000000..7458d036c161 --- /dev/null +++ b/arch/loongarch/include/asm/spinlock_types.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#ifndef _ASM_SPINLOCK_TYPES_H +#define _ASM_SPINLOCK_TYPES_H + +#include <asm-generic/qspinlock_types.h> +#include <asm-generic/qrwlock_types.h> + +#endif diff --git a/arch/loongarch/include/uapi/asm/bpf_perf_event.h b/arch/loongarch/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..eb6e2fd2a1f0 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include <linux/ptrace.h> + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/loongarch/include/uapi/asm/perf_regs.h b/arch/loongarch/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..29d69c00fc7a --- /dev/null +++ b/arch/loongarch/include/uapi/asm/perf_regs.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_LOONGARCH_PERF_REGS_H +#define _ASM_LOONGARCH_PERF_REGS_H + +enum perf_event_loongarch_regs { + PERF_REG_LOONGARCH_PC, + PERF_REG_LOONGARCH_R1, + PERF_REG_LOONGARCH_R2, + PERF_REG_LOONGARCH_R3, + PERF_REG_LOONGARCH_R4, + PERF_REG_LOONGARCH_R5, + PERF_REG_LOONGARCH_R6, + PERF_REG_LOONGARCH_R7, + PERF_REG_LOONGARCH_R8, + PERF_REG_LOONGARCH_R9, + PERF_REG_LOONGARCH_R10, + PERF_REG_LOONGARCH_R11, + PERF_REG_LOONGARCH_R12, + PERF_REG_LOONGARCH_R13, + PERF_REG_LOONGARCH_R14, + PERF_REG_LOONGARCH_R15, + PERF_REG_LOONGARCH_R16, + PERF_REG_LOONGARCH_R17, + PERF_REG_LOONGARCH_R18, + PERF_REG_LOONGARCH_R19, + PERF_REG_LOONGARCH_R20, + PERF_REG_LOONGARCH_R21, + PERF_REG_LOONGARCH_R22, + PERF_REG_LOONGARCH_R23, + PERF_REG_LOONGARCH_R24, + PERF_REG_LOONGARCH_R25, + PERF_REG_LOONGARCH_R26, + PERF_REG_LOONGARCH_R27, + PERF_REG_LOONGARCH_R28, + PERF_REG_LOONGARCH_R29, + PERF_REG_LOONGARCH_R30, + PERF_REG_LOONGARCH_R31, + PERF_REG_LOONGARCH_MAX, +}; +#endif /* _ASM_LOONGARCH_PERF_REGS_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index e5be17009fe8..42be564278fa 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -3,9 +3,9 @@ # Makefile for the Linux/LoongArch kernel. # -extra-y := head.o vmlinux.lds +extra-y := vmlinux.lds -obj-y += cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ +obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \ elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o @@ -23,7 +23,14 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_NUMA) += numa.o +obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o + +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o + obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o + CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/cacheinfo.c b/arch/loongarch/kernel/cacheinfo.c index 4662b06269f4..c7988f757281 100644 --- a/arch/loongarch/kernel/cacheinfo.c +++ b/arch/loongarch/kernel/cacheinfo.c @@ -5,73 +5,34 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include <linux/cacheinfo.h> +#include <linux/topology.h> #include <asm/bootinfo.h> #include <asm/cpu-info.h> -/* Populates leaf and increments to next leaf */ -#define populate_cache(cache, leaf, c_level, c_type) \ -do { \ - leaf->type = c_type; \ - leaf->level = c_level; \ - leaf->coherency_line_size = c->cache.linesz; \ - leaf->number_of_sets = c->cache.sets; \ - leaf->ways_of_associativity = c->cache.ways; \ - leaf->size = c->cache.linesz * c->cache.sets * \ - c->cache.ways; \ - if (leaf->level > 2) \ - leaf->size *= nodes_per_package; \ - leaf++; \ -} while (0) - int init_cache_level(unsigned int cpu) { - struct cpuinfo_loongarch *c = ¤t_cpu_data; + int cache_present = current_cpu_data.cache_leaves_present; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - int levels = 0, leaves = 0; - - /* - * If Dcache is not set, we assume the cache structures - * are not properly initialized. - */ - if (c->dcache.waysize) - levels += 1; - else - return -ENOENT; - - - leaves += (c->icache.waysize) ? 2 : 1; - - if (c->vcache.waysize) { - levels++; - leaves++; - } - if (c->scache.waysize) { - levels++; - leaves++; - } + this_cpu_ci->num_levels = + current_cpu_data.cache_leaves[cache_present - 1].level; + this_cpu_ci->num_leaves = cache_present; - if (c->tcache.waysize) { - levels++; - leaves++; - } - - this_cpu_ci->num_levels = levels; - this_cpu_ci->num_leaves = leaves; return 0; } static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { - return !((this_leaf->level == 1) || (this_leaf->level == 2)); + return (!(*(unsigned char *)(this_leaf->priv) & CACHE_PRIVATE) + && !(*(unsigned char *)(sib_leaf->priv) & CACHE_PRIVATE)); } static void cache_cpumap_setup(unsigned int cpu) { - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf, *sib_leaf; unsigned int index; + struct cacheinfo *this_leaf, *sib_leaf; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); for (index = 0; index < this_cpu_ci->num_leaves; index++) { unsigned int i; @@ -85,8 +46,10 @@ static void cache_cpumap_setup(unsigned int cpu) for_each_online_cpu(i) { struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); - if (i == cpu || !sib_cpu_ci->info_list) - continue;/* skip if itself or no cacheinfo */ + if (i == cpu || !sib_cpu_ci->info_list || + (cpu_to_node(i) != cpu_to_node(cpu))) + continue; + sib_leaf = sib_cpu_ci->info_list + index; if (cache_leaves_are_shared(this_leaf, sib_leaf)) { cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); @@ -98,31 +61,24 @@ static void cache_cpumap_setup(unsigned int cpu) int populate_cache_leaves(unsigned int cpu) { - int level = 1, nodes_per_package = 1; - struct cpuinfo_loongarch *c = ¤t_cpu_data; + int i, cache_present = current_cpu_data.cache_leaves_present; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; - - if (loongson_sysconf.nr_nodes > 1) - nodes_per_package = loongson_sysconf.cores_per_package - / loongson_sysconf.cores_per_node; - - if (c->icache.waysize) { - populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA); - populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST); - } else { - populate_cache(dcache, this_leaf, level++, CACHE_TYPE_UNIFIED); + struct cache_desc *cd, *cdesc = current_cpu_data.cache_leaves; + + for (i = 0; i < cache_present; i++) { + cd = cdesc + i; + + this_leaf->type = cd->type; + this_leaf->level = cd->level; + this_leaf->coherency_line_size = cd->linesz; + this_leaf->number_of_sets = cd->sets; + this_leaf->ways_of_associativity = cd->ways; + this_leaf->size = cd->linesz * cd->sets * cd->ways; + this_leaf->priv = &cd->flags; + this_leaf++; } - if (c->vcache.waysize) - populate_cache(vcache, this_leaf, level++, CACHE_TYPE_UNIFIED); - - if (c->scache.waysize) - populate_cache(scache, this_leaf, level++, CACHE_TYPE_UNIFIED); - - if (c->tcache.waysize) - populate_cache(tcache, this_leaf, level++, CACHE_TYPE_UNIFIED); - cache_cpumap_setup(cpu); this_cpu_ci->cpu_map_populated = true; diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 529ab8f44ec6..255a09876ef2 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -187,7 +187,9 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int uint64_t *vendor = (void *)(&cpu_full_name[VENDOR_OFFSET]); uint64_t *cpuname = (void *)(&cpu_full_name[CPUNAME_OFFSET]); - __cpu_full_name[cpu] = cpu_full_name; + if (!__cpu_full_name[cpu]) + __cpu_full_name[cpu] = cpu_full_name; + *vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR); *cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME); diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c new file mode 100644 index 000000000000..e559307c1092 --- /dev/null +++ b/arch/loongarch/kernel/crash_dump.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/crash_dump.h> +#include <linux/io.h> +#include <linux/uio.h> + +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB); + if (!vaddr) + return -ENOMEM; + + csize = copy_to_iter(vaddr + offset, csize, iter); + + memunmap(vaddr); + + return csize; +} diff --git a/arch/loongarch/kernel/efi-header.S b/arch/loongarch/kernel/efi-header.S new file mode 100644 index 000000000000..8c1d229a2afa --- /dev/null +++ b/arch/loongarch/kernel/efi-header.S @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include <linux/pe.h> +#include <linux/sizes.h> + + .macro __EFI_PE_HEADER + .long PE_MAGIC +.Lcoff_header: + .short IMAGE_FILE_MACHINE_LOONGARCH64 /* Machine */ + .short .Lsection_count /* NumberOfSections */ + .long 0 /* TimeDateStamp */ + .long 0 /* PointerToSymbolTable */ + .long 0 /* NumberOfSymbols */ + .short .Lsection_table - .Loptional_header /* SizeOfOptionalHeader */ + .short IMAGE_FILE_DEBUG_STRIPPED | \ + IMAGE_FILE_EXECUTABLE_IMAGE | \ + IMAGE_FILE_LINE_NUMS_STRIPPED /* Characteristics */ + +.Loptional_header: + .short PE_OPT_MAGIC_PE32PLUS /* PE32+ format */ + .byte 0x02 /* MajorLinkerVersion */ + .byte 0x14 /* MinorLinkerVersion */ + .long __inittext_end - .Lefi_header_end /* SizeOfCode */ + .long _end - __initdata_begin /* SizeOfInitializedData */ + .long 0 /* SizeOfUninitializedData */ + .long __efistub_efi_pe_entry - _head /* AddressOfEntryPoint */ + .long .Lefi_header_end - _head /* BaseOfCode */ + +.Lextra_header_fields: + .quad 0 /* ImageBase */ + .long PECOFF_SEGMENT_ALIGN /* SectionAlignment */ + .long PECOFF_FILE_ALIGN /* FileAlignment */ + .short 0 /* MajorOperatingSystemVersion */ + .short 0 /* MinorOperatingSystemVersion */ + .short LINUX_EFISTUB_MAJOR_VERSION /* MajorImageVersion */ + .short LINUX_EFISTUB_MINOR_VERSION /* MinorImageVersion */ + .short 0 /* MajorSubsystemVersion */ + .short 0 /* MinorSubsystemVersion */ + .long 0 /* Win32VersionValue */ + + .long _end - _head /* SizeOfImage */ + + /* Everything before the kernel image is considered part of the header */ + .long .Lefi_header_end - _head /* SizeOfHeaders */ + .long 0 /* CheckSum */ + .short IMAGE_SUBSYSTEM_EFI_APPLICATION /* Subsystem */ + .short 0 /* DllCharacteristics */ + .quad 0 /* SizeOfStackReserve */ + .quad 0 /* SizeOfStackCommit */ + .quad 0 /* SizeOfHeapReserve */ + .quad 0 /* SizeOfHeapCommit */ + .long 0 /* LoaderFlags */ + .long (.Lsection_table - .) / 8 /* NumberOfRvaAndSizes */ + + .quad 0 /* ExportTable */ + .quad 0 /* ImportTable */ + .quad 0 /* ResourceTable */ + .quad 0 /* ExceptionTable */ + .quad 0 /* CertificationTable */ + .quad 0 /* BaseRelocationTable */ + + /* Section table */ +.Lsection_table: + .ascii ".text\0\0\0" + .long __inittext_end - .Lefi_header_end /* VirtualSize */ + .long .Lefi_header_end - _head /* VirtualAddress */ + .long __inittext_end - .Lefi_header_end /* SizeOfRawData */ + .long .Lefi_header_end - _head /* PointerToRawData */ + + .long 0 /* PointerToRelocations */ + .long 0 /* PointerToLineNumbers */ + .short 0 /* NumberOfRelocations */ + .short 0 /* NumberOfLineNumbers */ + .long IMAGE_SCN_CNT_CODE | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_EXECUTE /* Characteristics */ + + .ascii ".data\0\0\0" + .long _end - __initdata_begin /* VirtualSize */ + .long __initdata_begin - _head /* VirtualAddress */ + .long _edata - __initdata_begin /* SizeOfRawData */ + .long __initdata_begin - _head /* PointerToRawData */ + + .long 0 /* PointerToRelocations */ + .long 0 /* PointerToLineNumbers */ + .short 0 /* NumberOfRelocations */ + .short 0 /* NumberOfLineNumbers */ + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE /* Characteristics */ + + .set .Lsection_count, (. - .Lsection_table) / 40 + + .balign 0x10000 /* PECOFF_SEGMENT_ALIGN */ +.Lefi_header_end: + .endm diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index a50b60c587fa..a31329971133 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -27,8 +27,13 @@ static unsigned long efi_nr_tables; static unsigned long efi_config_table; +static unsigned long __initdata boot_memmap = EFI_INVALID_TABLE_ADDR; + static efi_system_table_t *efi_systab; -static efi_config_table_type_t arch_tables[] __initdata = {{},}; +static efi_config_table_type_t arch_tables[] __initdata = { + {LINUX_EFI_BOOT_MEMMAP_GUID, &boot_memmap, "MEMMAP" }, + {}, +}; void __init efi_runtime_init(void) { @@ -51,6 +56,7 @@ void __init efi_init(void) { int size; void *config_tables; + struct efi_boot_memmap *tbl; if (!efi_system_table) return; @@ -61,6 +67,8 @@ void __init efi_init(void) return; } + efi_systab_report_header(&efi_systab->hdr, efi_systab->fw_vendor); + set_bit(EFI_64BIT, &efi.flags); efi_nr_tables = efi_systab->nr_tables; efi_config_table = (unsigned long)efi_systab->tables; @@ -69,4 +77,27 @@ void __init efi_init(void) config_tables = early_memremap(efi_config_table, efi_nr_tables * size); efi_config_parse_tables(config_tables, efi_systab->nr_tables, arch_tables); early_memunmap(config_tables, efi_nr_tables * size); + + set_bit(EFI_CONFIG_TABLES, &efi.flags); + + if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI) + memblock_reserve(screen_info.lfb_base, screen_info.lfb_size); + + if (boot_memmap == EFI_INVALID_TABLE_ADDR) + return; + + tbl = early_memremap_ro(boot_memmap, sizeof(*tbl)); + if (tbl) { + struct efi_memory_map_data data; + + data.phys_map = boot_memmap + sizeof(*tbl); + data.size = tbl->map_size; + data.desc_size = tbl->desc_size; + data.desc_version = tbl->desc_ver; + + if (efi_memmap_init_early(&data) < 0) + panic("Unable to map EFI memory map.\n"); + + early_memunmap(tbl, sizeof(*tbl)); + } } diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index 82b478a5c665..6d56a463b091 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c @@ -8,7 +8,6 @@ #include <linux/efi.h> #include <linux/export.h> #include <linux/memblock.h> -#include <linux/of_fdt.h> #include <asm/early_ioremap.h> #include <asm/bootinfo.h> #include <asm/loongson.h> @@ -20,21 +19,17 @@ EXPORT_SYMBOL(loongson_sysconf); void __init init_environ(void) { int efi_boot = fw_arg0; - struct efi_memory_map_data data; - void *fdt_ptr = early_memremap_ro(fw_arg1, SZ_64K); + char *cmdline = early_memremap_ro(fw_arg1, COMMAND_LINE_SIZE); if (efi_boot) set_bit(EFI_BOOT, &efi.flags); else clear_bit(EFI_BOOT, &efi.flags); - early_init_dt_scan(fdt_ptr); - early_init_fdt_reserve_self(); - efi_system_table = efi_get_fdt_params(&data); + strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE); + early_memunmap(cmdline, COMMAND_LINE_SIZE); - efi_memmap_init_early(&data); - memblock_reserve(data.phys_map & PAGE_MASK, - PAGE_ALIGN(data.size + (data.phys_map & ~PAGE_MASK))); + efi_system_table = fw_arg2; } static int __init init_cpu_fullname(void) diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 331864369e49..97425779ce9f 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -8,10 +8,35 @@ #include <asm/addrspace.h> #include <asm/asm.h> #include <asm/asmmacro.h> +#include <asm/bug.h> #include <asm/regdef.h> #include <asm/loongarch.h> #include <asm/stackframe.h> +#ifdef CONFIG_EFI_STUB + +#include "efi-header.S" + + __HEAD + +_head: + .word MZ_MAGIC /* "MZ", MS-DOS header */ + .org 0x8 + .dword kernel_entry /* Kernel entry point */ + .dword _end - _text /* Kernel image effective size */ + .quad 0 /* Kernel image load offset from start of RAM */ + .org 0x3c /* 0x20 ~ 0x3b reserved */ + .long pe_header - _head /* Offset to the PE header */ + +pe_header: + __EFI_PE_HEADER + +SYM_DATA(kernel_asize, .long _end - _text); +SYM_DATA(kernel_fsize, .long _edata - _text); +SYM_DATA(kernel_offset, .long kernel_offset - _text); + +#endif + __REF .align 12 @@ -37,25 +62,27 @@ SYM_CODE_START(kernel_entry) # kernel entry point li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0 csrwr t0, LOONGARCH_CSR_EUEN - la t0, __bss_start # clear .bss + la.pcrel t0, __bss_start # clear .bss st.d zero, t0, 0 - la t1, __bss_stop - LONGSIZE + la.pcrel t1, __bss_stop - LONGSIZE 1: addi.d t0, t0, LONGSIZE st.d zero, t0, 0 bne t0, t1, 1b - la t0, fw_arg0 + la.pcrel t0, fw_arg0 st.d a0, t0, 0 # firmware arguments - la t0, fw_arg1 + la.pcrel t0, fw_arg1 st.d a1, t0, 0 + la.pcrel t0, fw_arg2 + st.d a2, t0, 0 /* KSave3 used for percpu base, initialized as 0 */ csrwr zero, PERCPU_BASE_KS /* GPR21 used for percpu base (runtime), initialized as 0 */ move u0, zero - la tp, init_thread_union + la.pcrel tp, init_thread_union /* Set the SP after an empty pt_regs. */ PTR_LI sp, (_THREAD_SIZE - 32 - PT_SIZE) PTR_ADD sp, sp, tp @@ -63,6 +90,7 @@ SYM_CODE_START(kernel_entry) # kernel entry point PTR_ADDI sp, sp, -4 * SZREG # init stack pointer bl start_kernel + ASM_BUG() SYM_CODE_END(kernel_entry) @@ -94,6 +122,8 @@ SYM_CODE_START(smpboot_entry) ld.d tp, t0, CPU_BOOT_TINFO bl start_secondary + ASM_BUG() + SYM_CODE_END(smpboot_entry) #endif /* CONFIG_SMP */ diff --git a/arch/loongarch/kernel/image-vars.h b/arch/loongarch/kernel/image-vars.h new file mode 100644 index 000000000000..88f5d81702df --- /dev/null +++ b/arch/loongarch/kernel/image-vars.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#ifndef __LOONGARCH_KERNEL_IMAGE_VARS_H +#define __LOONGARCH_KERNEL_IMAGE_VARS_H + +#ifdef CONFIG_EFI_STUB + +__efistub_memcmp = memcmp; +__efistub_memchr = memchr; +__efistub_strcat = strcat; +__efistub_strcmp = strcmp; +__efistub_strlen = strlen; +__efistub_strncat = strncat; +__efistub_strnstr = strnstr; +__efistub_strnlen = strnlen; +__efistub_strrchr = strrchr; +__efistub_kernel_entry = kernel_entry; +__efistub_kernel_asize = kernel_asize; +__efistub_kernel_fsize = kernel_fsize; +__efistub_kernel_offset = kernel_offset; +__efistub_screen_info = screen_info; + +#endif + +#endif /* __LOONGARCH_KERNEL_IMAGE_VARS_H */ diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c new file mode 100644 index 000000000000..2dcb9e003657 --- /dev/null +++ b/arch/loongarch/kernel/machine_kexec.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * machine_kexec.c for kexec + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ +#include <linux/compiler.h> +#include <linux/cpu.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/libfdt.h> +#include <linux/mm.h> +#include <linux/of_fdt.h> +#include <linux/reboot.h> +#include <linux/sched.h> +#include <linux/sched/task_stack.h> + +#include <asm/bootinfo.h> +#include <asm/cacheflush.h> +#include <asm/page.h> + +/* 0x100000 ~ 0x200000 is safe */ +#define KEXEC_CONTROL_CODE TO_CACHE(0x100000UL) +#define KEXEC_CMDLINE_ADDR TO_CACHE(0x108000UL) + +static unsigned long reboot_code_buffer; +static cpumask_t cpus_in_crash = CPU_MASK_NONE; + +#ifdef CONFIG_SMP +static void (*relocated_kexec_smp_wait)(void *); +atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0); +#endif + +static unsigned long efi_boot; +static unsigned long cmdline_ptr; +static unsigned long systable_ptr; +static unsigned long start_addr; +static unsigned long first_ind_entry; + +static void kexec_image_info(const struct kimage *kimage) +{ + unsigned long i; + + pr_debug("kexec kimage info:\n"); + pr_debug("\ttype: %d\n", kimage->type); + pr_debug("\tstart: %lx\n", kimage->start); + pr_debug("\thead: %lx\n", kimage->head); + pr_debug("\tnr_segments: %lu\n", kimage->nr_segments); + + for (i = 0; i < kimage->nr_segments; i++) { + pr_debug("\t segment[%lu]: %016lx - %016lx", i, + kimage->segment[i].mem, + kimage->segment[i].mem + kimage->segment[i].memsz); + pr_debug("\t\t0x%lx bytes, %lu pages\n", + (unsigned long)kimage->segment[i].memsz, + (unsigned long)kimage->segment[i].memsz / PAGE_SIZE); + } +} + +int machine_kexec_prepare(struct kimage *kimage) +{ + int i; + char *bootloader = "kexec"; + void *cmdline_ptr = (void *)KEXEC_CMDLINE_ADDR; + + kexec_image_info(kimage); + + kimage->arch.efi_boot = fw_arg0; + kimage->arch.systable_ptr = fw_arg2; + + /* Find the command line */ + for (i = 0; i < kimage->nr_segments; i++) { + if (!strncmp(bootloader, (char __user *)kimage->segment[i].buf, strlen(bootloader))) { + if (!copy_from_user(cmdline_ptr, kimage->segment[i].buf, COMMAND_LINE_SIZE)) + kimage->arch.cmdline_ptr = (unsigned long)cmdline_ptr; + break; + } + } + + if (!kimage->arch.cmdline_ptr) { + pr_err("Command line not included in the provided image\n"); + return -EINVAL; + } + + /* kexec/kdump need a safe page to save reboot_code_buffer */ + kimage->control_code_page = virt_to_page((void *)KEXEC_CONTROL_CODE); + + reboot_code_buffer = (unsigned long)page_address(kimage->control_code_page); + memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size); + +#ifdef CONFIG_SMP + /* All secondary cpus now may jump to kexec_smp_wait cycle */ + relocated_kexec_smp_wait = reboot_code_buffer + (void *)(kexec_smp_wait - relocate_new_kernel); +#endif + + return 0; +} + +void machine_kexec_cleanup(struct kimage *kimage) +{ +} + +void kexec_reboot(void) +{ + do_kexec_t do_kexec = NULL; + + /* + * We know we were online, and there will be no incoming IPIs at + * this point. Mark online again before rebooting so that the crash + * analysis tool will see us correctly. + */ + set_cpu_online(smp_processor_id(), true); + + /* Ensure remote CPUs observe that we're online before rebooting. */ + smp_mb__after_atomic(); + + /* + * Make sure we get correct instructions written by the + * machine_kexec_prepare() CPU. + */ + __asm__ __volatile__ ("\tibar 0\n"::); + +#ifdef CONFIG_SMP + /* All secondary cpus go to kexec_smp_wait */ + if (smp_processor_id() > 0) { + relocated_kexec_smp_wait(NULL); + unreachable(); + } +#endif + + do_kexec = (void *)reboot_code_buffer; + do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry); + + unreachable(); +} + + +#ifdef CONFIG_SMP +static void kexec_shutdown_secondary(void *regs) +{ + int cpu = smp_processor_id(); + + if (!cpu_online(cpu)) + return; + + /* We won't be sent IPIs any more. */ + set_cpu_online(cpu, false); + + local_irq_disable(); + while (!atomic_read(&kexec_ready_to_reboot)) + cpu_relax(); + + kexec_reboot(); +} + +static void crash_shutdown_secondary(void *passed_regs) +{ + int cpu = smp_processor_id(); + struct pt_regs *regs = passed_regs; + + /* + * If we are passed registers, use those. Otherwise get the + * regs from the last interrupt, which should be correct, as + * we are in an interrupt. But if the regs are not there, + * pull them from the top of the stack. They are probably + * wrong, but we need something to keep from crashing again. + */ + if (!regs) + regs = get_irq_regs(); + if (!regs) + regs = task_pt_regs(current); + + if (!cpu_online(cpu)) + return; + + /* We won't be sent IPIs any more. */ + set_cpu_online(cpu, false); + + local_irq_disable(); + if (!cpumask_test_cpu(cpu, &cpus_in_crash)) + crash_save_cpu(regs, cpu); + cpumask_set_cpu(cpu, &cpus_in_crash); + + while (!atomic_read(&kexec_ready_to_reboot)) + cpu_relax(); + + kexec_reboot(); +} + +void crash_smp_send_stop(void) +{ + unsigned int ncpus; + unsigned long timeout; + static int cpus_stopped; + + /* + * This function can be called twice in panic path, but obviously + * we should execute this only once. + */ + if (cpus_stopped) + return; + + cpus_stopped = 1; + + /* Excluding the panic cpu */ + ncpus = num_online_cpus() - 1; + + smp_call_function(crash_shutdown_secondary, NULL, 0); + smp_wmb(); + + /* + * The crash CPU sends an IPI and wait for other CPUs to + * respond. Delay of at least 10 seconds. + */ + timeout = MSEC_PER_SEC * 10; + pr_emerg("Sending IPI to other cpus...\n"); + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) { + mdelay(1); + cpu_relax(); + } +} +#endif /* defined(CONFIG_SMP) */ + +void machine_shutdown(void) +{ + int cpu; + + /* All CPUs go to reboot_code_buffer */ + for_each_possible_cpu(cpu) + if (!cpu_online(cpu)) + cpu_device_up(get_cpu_device(cpu)); + +#ifdef CONFIG_SMP + smp_call_function(kexec_shutdown_secondary, NULL, 0); +#endif +} + +void machine_crash_shutdown(struct pt_regs *regs) +{ + int crashing_cpu; + + local_irq_disable(); + + crashing_cpu = smp_processor_id(); + crash_save_cpu(regs, crashing_cpu); + +#ifdef CONFIG_SMP + crash_smp_send_stop(); +#endif + cpumask_set_cpu(crashing_cpu, &cpus_in_crash); + + pr_info("Starting crashdump kernel...\n"); +} + +void machine_kexec(struct kimage *image) +{ + unsigned long entry, *ptr; + struct kimage_arch *internal = &image->arch; + + efi_boot = internal->efi_boot; + cmdline_ptr = internal->cmdline_ptr; + systable_ptr = internal->systable_ptr; + + start_addr = (unsigned long)phys_to_virt(image->start); + + first_ind_entry = (image->type == KEXEC_TYPE_DEFAULT) ? + (unsigned long)phys_to_virt(image->head & PAGE_MASK) : 0; + + /* + * The generic kexec code builds a page list with physical + * addresses. they are directly accessible through XKPRANGE + * hence the phys_to_virt() call. + */ + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); + ptr = (entry & IND_INDIRECTION) ? + phys_to_virt(entry & PAGE_MASK) : ptr + 1) { + if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION || + *ptr & IND_DESTINATION) + *ptr = (unsigned long) phys_to_virt(*ptr); + } + + /* Mark offline before disabling local irq. */ + set_cpu_online(smp_processor_id(), false); + + /* We do not want to be bothered. */ + local_irq_disable(); + + pr_notice("EFI boot flag 0x%lx\n", efi_boot); + pr_notice("Command line at 0x%lx\n", cmdline_ptr); + pr_notice("System table at 0x%lx\n", systable_ptr); + pr_notice("We will call new kernel at 0x%lx\n", start_addr); + pr_notice("Bye ...\n"); + + /* Make reboot code buffer available to the boot CPU. */ + flush_cache_all(); + +#ifdef CONFIG_SMP + atomic_set(&kexec_ready_to_reboot, 1); +#endif + + kexec_reboot(); +} diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c index 7423361b0ebc..4a4107a6a965 100644 --- a/arch/loongarch/kernel/mem.c +++ b/arch/loongarch/kernel/mem.c @@ -58,7 +58,4 @@ void __init memblock_init(void) /* Reserve the kernel text/data/bss */ memblock_reserve(__pa_symbol(&_text), __pa_symbol(&_end) - __pa_symbol(&_text)); - - /* Reserve the initrd */ - reserve_initrd_mem(); } diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index 6d498288977d..d296a70b758f 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -7,7 +7,33 @@ #include <linux/kernel.h> #include <linux/module.h> -Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val) +Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val) +{ + struct mod_section *got_sec = &mod->arch.got; + int i = got_sec->num_entries; + struct got_entry *got = get_got_entry(val, got_sec); + + if (got) + return (Elf_Addr)got; + + /* There is no GOT entry for val yet, create a new one. */ + got = (struct got_entry *)got_sec->shdr->sh_addr; + got[i] = emit_got_entry(val); + + got_sec->num_entries++; + if (got_sec->num_entries > got_sec->max_entries) { + /* + * This may happen when the module contains a GOT_HI20 without + * a paired GOT_LO12. Such a module is broken, reject it. + */ + pr_err("%s: module contains bad GOT relocation\n", mod->name); + return 0; + } + + return (Elf_Addr)&got[i]; +} + +Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val) { int nr; struct mod_section *plt_sec = &mod->arch.plt; @@ -50,15 +76,25 @@ static bool duplicate_rela(const Elf_Rela *rela, int idx) return false; } -static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts) +static void count_max_entries(Elf_Rela *relas, int num, + unsigned int *plts, unsigned int *gots) { unsigned int i, type; for (i = 0; i < num; i++) { type = ELF_R_TYPE(relas[i].r_info); - if (type == R_LARCH_SOP_PUSH_PLT_PCREL) { + switch (type) { + case R_LARCH_SOP_PUSH_PLT_PCREL: + case R_LARCH_B26: if (!duplicate_rela(relas, i)) (*plts)++; + break; + case R_LARCH_GOT_PC_HI20: + if (!duplicate_rela(relas, i)) + (*gots)++; + break; + default: + break; /* Do nothing. */ } } } @@ -66,18 +102,24 @@ static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts) int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - unsigned int i, num_plts = 0; + unsigned int i, num_plts = 0, num_gots = 0; /* * Find the empty .plt sections. */ for (i = 0; i < ehdr->e_shnum; i++) { - if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) + if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) + mod->arch.got.shdr = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) mod->arch.plt.shdr = sechdrs + i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx")) mod->arch.plt_idx.shdr = sechdrs + i; } + if (!mod->arch.got.shdr) { + pr_err("%s: module GOT section(s) missing\n", mod->name); + return -ENOEXEC; + } if (!mod->arch.plt.shdr) { pr_err("%s: module PLT section(s) missing\n", mod->name); return -ENOEXEC; @@ -100,9 +142,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, if (!(dst_sec->sh_flags & SHF_EXECINSTR)) continue; - count_max_entries(relas, num_rela, &num_plts); + count_max_entries(relas, num_rela, &num_plts, &num_gots); } + mod->arch.got.shdr->sh_type = SHT_NOBITS; + mod->arch.got.shdr->sh_flags = SHF_ALLOC; + mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry); + mod->arch.got.num_entries = 0; + mod->arch.got.max_entries = num_gots; + mod->arch.plt.shdr->sh_type = SHT_NOBITS; mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES; diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 638427ff0d51..097595b2fc14 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -18,16 +18,6 @@ #include <linux/string.h> #include <linux/kernel.h> -static inline bool signed_imm_check(long val, unsigned int bit) -{ - return -(1L << (bit - 1)) <= val && val < (1L << (bit - 1)); -} - -static inline bool unsigned_imm_check(unsigned long val, unsigned int bit) -{ - return val < (1UL << bit); -} - static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top) { if (*rela_stack_top >= RELA_STACK_DEPTH) @@ -281,6 +271,96 @@ static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v, } } +static int apply_r_larch_b26(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + ptrdiff_t offset = (void *)v - (void *)location; + union loongarch_instruction *insn = (union loongarch_instruction *)location; + + if (offset >= SZ_128M) + v = module_emit_plt_entry(mod, v); + + if (offset < -SZ_128M) + v = module_emit_plt_entry(mod, v); + + offset = (void *)v - (void *)location; + + if (offset & 3) { + pr_err("module %s: jump offset = 0x%llx unaligned! dangerous R_LARCH_B26 (%u) relocation\n", + mod->name, (long long)offset, type); + return -ENOEXEC; + } + + if (!signed_imm_check(offset, 28)) { + pr_err("module %s: jump offset = 0x%llx overflow! dangerous R_LARCH_B26 (%u) relocation\n", + mod->name, (long long)offset, type); + return -ENOEXEC; + } + + offset >>= 2; + insn->reg0i26_format.immediate_l = offset & 0xffff; + insn->reg0i26_format.immediate_h = (offset >> 16) & 0x3ff; + + return 0; +} + +static int apply_r_larch_pcala(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + union loongarch_instruction *insn = (union loongarch_instruction *)location; + /* Use s32 for a sign-extension deliberately. */ + s32 offset_hi20 = (void *)((v + 0x800) & ~0xfff) - + (void *)((Elf_Addr)location & ~0xfff); + Elf_Addr anchor = (((Elf_Addr)location) & ~0xfff) + offset_hi20; + ptrdiff_t offset_rem = (void *)v - (void *)anchor; + + switch (type) { + case R_LARCH_PCALA_LO12: + insn->reg2i12_format.immediate = v & 0xfff; + break; + case R_LARCH_PCALA_HI20: + v = offset_hi20 >> 12; + insn->reg1i20_format.immediate = v & 0xfffff; + break; + case R_LARCH_PCALA64_LO20: + v = offset_rem >> 32; + insn->reg1i20_format.immediate = v & 0xfffff; + break; + case R_LARCH_PCALA64_HI12: + v = offset_rem >> 52; + insn->reg2i12_format.immediate = v & 0xfff; + break; + default: + pr_err("%s: Unsupport relocation type %u\n", mod->name, type); + return -EINVAL; + } + + return 0; +} + +static int apply_r_larch_got_pc(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + Elf_Addr got = module_emit_got_entry(mod, v); + + if (!got) + return -EINVAL; + + switch (type) { + case R_LARCH_GOT_PC_LO12: + type = R_LARCH_PCALA_LO12; + break; + case R_LARCH_GOT_PC_HI20: + type = R_LARCH_PCALA_HI20; + break; + default: + pr_err("%s: Unsupport relocation type %u\n", mod->name, type); + return -EINVAL; + } + + return apply_r_larch_pcala(mod, location, got, rela_stack, rela_stack_top, type); +} + /* * reloc_handlers_rela() - Apply a particular relocation to a module * @mod: the module to apply the reloc to @@ -296,7 +376,7 @@ typedef int (*reloc_rela_handler)(struct module *mod, u32 *location, Elf_Addr v, /* The handlers for known reloc types */ static reloc_rela_handler reloc_rela_handlers[] = { - [R_LARCH_NONE ... R_LARCH_SUB64] = apply_r_larch_error, + [R_LARCH_NONE ... R_LARCH_RELAX] = apply_r_larch_error, [R_LARCH_NONE] = apply_r_larch_none, [R_LARCH_32] = apply_r_larch_32, @@ -310,6 +390,9 @@ static reloc_rela_handler reloc_rela_handlers[] = { [R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] = apply_r_larch_sop, [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field, [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub, + [R_LARCH_B26] = apply_r_larch_b26, + [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12] = apply_r_larch_pcala, + [R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12] = apply_r_larch_got_pc, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c new file mode 100644 index 000000000000..707bd32e5c4f --- /dev/null +++ b/arch/loongarch/kernel/perf_event.c @@ -0,0 +1,887 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux performance counter support for LoongArch. + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + * + * Derived from MIPS: + * Copyright (C) 2010 MIPS Technologies, Inc. + * Copyright (C) 2011 Cavium Networks, Inc. + * Author: Deng-Cheng Zhu + */ + +#include <linux/cpumask.h> +#include <linux/interrupt.h> +#include <linux/smp.h> +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/uaccess.h> +#include <linux/sched/task_stack.h> + +#include <asm/irq.h> +#include <asm/irq_regs.h> +#include <asm/stacktrace.h> +#include <asm/unwind.h> + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long +user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) +{ + unsigned long err; + unsigned long __user *user_frame_tail; + struct stack_frame buftail; + + user_frame_tail = (unsigned long __user *)(fp - sizeof(struct stack_frame)); + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(user_frame_tail, sizeof(buftail))) + return 0; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, user_frame_tail, sizeof(buftail)); + pagefault_enable(); + + if (err || (unsigned long)user_frame_tail >= buftail.fp) + return 0; + + perf_callchain_store(entry, buftail.ra); + + return buftail.fp; +} + +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned long fp; + + if (perf_guest_state()) { + /* We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->csr_era); + + fp = regs->regs[22]; + + while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf)) + fp = user_backtrace(entry, fp); +} + +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + struct unwind_state state; + unsigned long addr; + + for (unwind_start(&state, current, regs); + !unwind_done(&state); unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + if (!addr || perf_callchain_store(entry, addr)) + return; + } +} + +#define LOONGARCH_MAX_HWEVENTS 32 + +struct cpu_hw_events { + /* Array of events on this cpu. */ + struct perf_event *events[LOONGARCH_MAX_HWEVENTS]; + + /* + * Set the bit (indexed by the counter number) when the counter + * is used for an event. + */ + unsigned long used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)]; + + /* + * Software copy of the control register for each performance counter. + */ + unsigned int saved_ctrl[LOONGARCH_MAX_HWEVENTS]; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .saved_ctrl = {0}, +}; + +/* The description of LoongArch performance events. */ +struct loongarch_perf_event { + unsigned int event_id; +}; + +static struct loongarch_perf_event raw_event; +static DEFINE_MUTEX(raw_event_mutex); + +#define C(x) PERF_COUNT_HW_CACHE_##x +#define HW_OP_UNSUPPORTED 0xffffffff +#define CACHE_OP_UNSUPPORTED 0xffffffff + +#define PERF_MAP_ALL_UNSUPPORTED \ + [0 ... PERF_COUNT_HW_MAX - 1] = {HW_OP_UNSUPPORTED} + +#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ +[0 ... C(MAX) - 1] = { \ + [0 ... C(OP_MAX) - 1] = { \ + [0 ... C(RESULT_MAX) - 1] = {CACHE_OP_UNSUPPORTED}, \ + }, \ +} + +struct loongarch_pmu { + u64 max_period; + u64 valid_count; + u64 overflow; + const char *name; + unsigned int num_counters; + u64 (*read_counter)(unsigned int idx); + void (*write_counter)(unsigned int idx, u64 val); + const struct loongarch_perf_event *(*map_raw_event)(u64 config); + const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX]; + const struct loongarch_perf_event (*cache_event_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; +}; + +static struct loongarch_pmu loongarch_pmu; + +#define M_PERFCTL_EVENT(event) (event & CSR_PERFCTRL_EVENT) + +#define M_PERFCTL_COUNT_EVENT_WHENEVER (CSR_PERFCTRL_PLV0 | \ + CSR_PERFCTRL_PLV1 | \ + CSR_PERFCTRL_PLV2 | \ + CSR_PERFCTRL_PLV3 | \ + CSR_PERFCTRL_IE) + +#define M_PERFCTL_CONFIG_MASK 0x1f0000 + +static void pause_local_counters(void); +static void resume_local_counters(void); + +static u64 loongarch_pmu_read_counter(unsigned int idx) +{ + u64 val = -1; + + switch (idx) { + case 0: + val = read_csr_perfcntr0(); + break; + case 1: + val = read_csr_perfcntr1(); + break; + case 2: + val = read_csr_perfcntr2(); + break; + case 3: + val = read_csr_perfcntr3(); + break; + default: + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); + return 0; + } + + return val; +} + +static void loongarch_pmu_write_counter(unsigned int idx, u64 val) +{ + switch (idx) { + case 0: + write_csr_perfcntr0(val); + return; + case 1: + write_csr_perfcntr1(val); + return; + case 2: + write_csr_perfcntr2(val); + return; + case 3: + write_csr_perfcntr3(val); + return; + default: + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); + return; + } +} + +static unsigned int loongarch_pmu_read_control(unsigned int idx) +{ + unsigned int val = -1; + + switch (idx) { + case 0: + val = read_csr_perfctrl0(); + break; + case 1: + val = read_csr_perfctrl1(); + break; + case 2: + val = read_csr_perfctrl2(); + break; + case 3: + val = read_csr_perfctrl3(); + break; + default: + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); + return 0; + } + + return val; +} + +static void loongarch_pmu_write_control(unsigned int idx, unsigned int val) +{ + switch (idx) { + case 0: + write_csr_perfctrl0(val); + return; + case 1: + write_csr_perfctrl1(val); + return; + case 2: + write_csr_perfctrl2(val); + return; + case 3: + write_csr_perfctrl3(val); + return; + default: + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); + return; + } +} + +static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) +{ + int i; + + for (i = 0; i < loongarch_pmu.num_counters; i++) { + if (!test_and_set_bit(i, cpuc->used_mask)) + return i; + } + + return -EAGAIN; +} + +static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) +{ + unsigned int cpu; + struct perf_event *event = container_of(evt, struct perf_event, hw); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); + + /* Make sure interrupt enabled. */ + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | + (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE; + + cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); + + /* + * We do not actually let the counter run. Leave it until start(). + */ + pr_debug("Enabling perf counter for CPU%d\n", cpu); +} + +static void loongarch_pmu_disable_event(int idx) +{ + unsigned long flags; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); + + local_irq_save(flags); + cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) & + ~M_PERFCTL_COUNT_EVENT_WHENEVER; + loongarch_pmu_write_control(idx, cpuc->saved_ctrl[idx]); + local_irq_restore(flags); +} + +static int loongarch_pmu_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, + int idx) +{ + int ret = 0; + u64 left = local64_read(&hwc->period_left); + u64 period = hwc->sample_period; + + if (unlikely((left + period) & (1ULL << 63))) { + /* left underflowed by more than period. */ + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } else if (unlikely((left + period) <= period)) { + /* left underflowed by less than period. */ + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (left > loongarch_pmu.max_period) { + left = loongarch_pmu.max_period; + local64_set(&hwc->period_left, left); + } + + local64_set(&hwc->prev_count, loongarch_pmu.overflow - left); + + loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left); + + perf_event_update_userpage(event); + + return ret; +} + +static void loongarch_pmu_event_update(struct perf_event *event, + struct hw_perf_event *hwc, + int idx) +{ + u64 delta; + u64 prev_raw_count, new_raw_count; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = loongarch_pmu.read_counter(idx); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = new_raw_count - prev_raw_count; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); +} + +static void loongarch_pmu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* Set the period for the event. */ + loongarch_pmu_event_set_period(event, hwc, hwc->idx); + + /* Enable the event. */ + loongarch_pmu_enable_event(hwc, hwc->idx); +} + +static void loongarch_pmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + /* We are working on a local event. */ + loongarch_pmu_disable_event(hwc->idx); + barrier(); + loongarch_pmu_event_update(event, hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static int loongarch_pmu_add(struct perf_event *event, int flags) +{ + int idx, err = 0; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + perf_pmu_disable(event->pmu); + + /* To look for a free counter for this event. */ + idx = loongarch_pmu_alloc_counter(cpuc, hwc); + if (idx < 0) { + err = idx; + goto out; + } + + /* + * If there is an event in the counter we are going to use then + * make sure it is disabled. + */ + event->hw.idx = idx; + loongarch_pmu_disable_event(idx); + cpuc->events[idx] = event; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + loongarch_pmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + +out: + perf_pmu_enable(event->pmu); + return err; +} + +static void loongarch_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); + + loongarch_pmu_stop(event, PERF_EF_UPDATE); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +static void loongarch_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* Don't read disabled counters! */ + if (hwc->idx < 0) + return; + + loongarch_pmu_event_update(event, hwc, hwc->idx); +} + +static void loongarch_pmu_enable(struct pmu *pmu) +{ + resume_local_counters(); +} + +static void loongarch_pmu_disable(struct pmu *pmu) +{ + pause_local_counters(); +} + +static DEFINE_MUTEX(pmu_reserve_mutex); +static atomic_t active_events = ATOMIC_INIT(0); + +static int get_pmc_irq(void) +{ + struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); + + if (d) + return irq_create_mapping(d, EXCCODE_PMC - EXCCODE_INT_START); + + return -EINVAL; +} + +static void reset_counters(void *arg); +static int __hw_perf_event_init(struct perf_event *event); + +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { + on_each_cpu(reset_counters, NULL, 1); + free_irq(get_pmc_irq(), &loongarch_pmu); + mutex_unlock(&pmu_reserve_mutex); + } +} + +static void handle_associated_event(struct cpu_hw_events *cpuc, int idx, + struct perf_sample_data *data, struct pt_regs *regs) +{ + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc = &event->hw; + + loongarch_pmu_event_update(event, hwc, idx); + data->period = event->hw.last_period; + if (!loongarch_pmu_event_set_period(event, hwc, idx)) + return; + + if (perf_event_overflow(event, data, regs)) + loongarch_pmu_disable_event(idx); +} + +static irqreturn_t pmu_handle_irq(int irq, void *dev) +{ + int n; + int handled = IRQ_NONE; + uint64_t counter; + struct pt_regs *regs; + struct perf_sample_data data; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * First we pause the local counters, so that when we are locked + * here, the counters are all paused. When it gets locked due to + * perf_disable(), the timer interrupt handler will be delayed. + * + * See also loongarch_pmu_start(). + */ + pause_local_counters(); + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0, 0); + + for (n = 0; n < loongarch_pmu.num_counters; n++) { + if (test_bit(n, cpuc->used_mask)) { + counter = loongarch_pmu.read_counter(n); + if (counter & loongarch_pmu.overflow) { + handle_associated_event(cpuc, n, &data, regs); + handled = IRQ_HANDLED; + } + } + } + + resume_local_counters(); + + /* + * Do all the work for the pending perf events. We can do this + * in here because the performance counter interrupt is a regular + * interrupt, not NMI. + */ + if (handled == IRQ_HANDLED) + irq_work_run(); + + return handled; +} + +static int loongarch_pmu_event_init(struct perf_event *event) +{ + int r, irq; + unsigned long flags; + + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + /* Init it to avoid false validate_group */ + event->hw.event_base = 0xffffffff; + return -ENOENT; + } + + if (event->cpu >= 0 && !cpu_online(event->cpu)) + return -ENODEV; + + irq = get_pmc_irq(); + flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED; + if (!atomic_inc_not_zero(&active_events)) { + mutex_lock(&pmu_reserve_mutex); + if (atomic_read(&active_events) == 0) { + r = request_irq(irq, pmu_handle_irq, flags, "Perf_PMU", &loongarch_pmu); + if (r < 0) { + mutex_unlock(&pmu_reserve_mutex); + pr_warn("PMU IRQ request failed\n"); + return -ENODEV; + } + } + atomic_inc(&active_events); + mutex_unlock(&pmu_reserve_mutex); + } + + return __hw_perf_event_init(event); +} + +static struct pmu pmu = { + .pmu_enable = loongarch_pmu_enable, + .pmu_disable = loongarch_pmu_disable, + .event_init = loongarch_pmu_event_init, + .add = loongarch_pmu_add, + .del = loongarch_pmu_del, + .start = loongarch_pmu_start, + .stop = loongarch_pmu_stop, + .read = loongarch_pmu_read, +}; + +static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) +{ + return (pev->event_id & 0xff); +} + +static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) +{ + const struct loongarch_perf_event *pev; + + pev = &(*loongarch_pmu.general_event_map)[idx]; + + if (pev->event_id == HW_OP_UNSUPPORTED) + return ERR_PTR(-ENOENT); + + return pev; +} + +static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config) +{ + unsigned int cache_type, cache_op, cache_result; + const struct loongarch_perf_event *pev; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return ERR_PTR(-EINVAL); + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return ERR_PTR(-EINVAL); + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return ERR_PTR(-EINVAL); + + pev = &((*loongarch_pmu.cache_event_map) + [cache_type] + [cache_op] + [cache_result]); + + if (pev->event_id == CACHE_OP_UNSUPPORTED) + return ERR_PTR(-ENOENT); + + return pev; +} + +static int validate_group(struct perf_event *event) +{ + struct cpu_hw_events fake_cpuc; + struct perf_event *sibling, *leader = event->group_leader; + + memset(&fake_cpuc, 0, sizeof(fake_cpuc)); + + if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0) + return -EINVAL; + + for_each_sibling_event(sibling, leader) { + if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0) + return -EINVAL; + } + + if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0) + return -EINVAL; + + return 0; +} + +static void reset_counters(void *arg) +{ + int n; + int counters = loongarch_pmu.num_counters; + + for (n = 0; n < counters; n++) { + loongarch_pmu_write_control(n, 0); + loongarch_pmu.write_counter(n, 0); + } +} + +static const struct loongarch_perf_event loongson_event_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00 }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x09 }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 }, +}; + +static const struct loongarch_perf_event loongson_cache_map + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +PERF_CACHE_MAP_ALL_UNSUPPORTED, +[C(L1D)] = { + /* + * Like some other architectures (e.g. ARM), the performance + * counters don't differentiate between read and write + * accesses/misses, so this isn't strictly correct, but it's the + * best we can do. Writes and reads get combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x8 }, + [C(RESULT_MISS)] = { 0x9 }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x8 }, + [C(RESULT_MISS)] = { 0x9 }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { 0xaa }, + [C(RESULT_MISS)] = { 0xa9 }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x6 }, + [C(RESULT_MISS)] = { 0x7 }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0xc }, + [C(RESULT_MISS)] = { 0xd }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0xc }, + [C(RESULT_MISS)] = { 0xd }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x3b }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x4 }, + [C(RESULT_MISS)] = { 0x3c }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x4 }, + [C(RESULT_MISS)] = { 0x3c }, + }, +}, +[C(BPU)] = { + /* Using the same code for *HW_BRANCH* */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x02 }, + [C(RESULT_MISS)] = { 0x03 }, + }, +}, +}; + +static int __hw_perf_event_init(struct perf_event *event) +{ + int err; + struct hw_perf_event *hwc = &event->hw; + struct perf_event_attr *attr = &event->attr; + const struct loongarch_perf_event *pev; + + /* Returning LoongArch event descriptor for generic perf event. */ + if (PERF_TYPE_HARDWARE == event->attr.type) { + if (event->attr.config >= PERF_COUNT_HW_MAX) + return -EINVAL; + pev = loongarch_pmu_map_general_event(event->attr.config); + } else if (PERF_TYPE_HW_CACHE == event->attr.type) { + pev = loongarch_pmu_map_cache_event(event->attr.config); + } else if (PERF_TYPE_RAW == event->attr.type) { + /* We are working on the global raw event. */ + mutex_lock(&raw_event_mutex); + pev = loongarch_pmu.map_raw_event(event->attr.config); + } else { + /* The event type is not (yet) supported. */ + return -EOPNOTSUPP; + } + + if (IS_ERR(pev)) { + if (PERF_TYPE_RAW == event->attr.type) + mutex_unlock(&raw_event_mutex); + return PTR_ERR(pev); + } + + /* + * We allow max flexibility on how each individual counter shared + * by the single CPU operates (the mode exclusion and the range). + */ + hwc->config_base = CSR_PERFCTRL_IE; + + hwc->event_base = loongarch_pmu_perf_event_encode(pev); + if (PERF_TYPE_RAW == event->attr.type) + mutex_unlock(&raw_event_mutex); + + if (!attr->exclude_user) { + hwc->config_base |= CSR_PERFCTRL_PLV3; + hwc->config_base |= CSR_PERFCTRL_PLV2; + } + if (!attr->exclude_kernel) { + hwc->config_base |= CSR_PERFCTRL_PLV0; + } + if (!attr->exclude_hv) { + hwc->config_base |= CSR_PERFCTRL_PLV1; + } + + hwc->config_base &= M_PERFCTL_CONFIG_MASK; + /* + * The event can belong to another cpu. We do not assign a local + * counter for it for now. + */ + hwc->idx = -1; + hwc->config = 0; + + if (!hwc->sample_period) { + hwc->sample_period = loongarch_pmu.max_period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + err = 0; + if (event->group_leader != event) + err = validate_group(event); + + event->destroy = hw_perf_event_destroy; + + if (err) + event->destroy(event); + + return err; +} + +static void pause_local_counters(void) +{ + unsigned long flags; + int ctr = loongarch_pmu.num_counters; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + local_irq_save(flags); + do { + ctr--; + cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(ctr); + loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] & + ~M_PERFCTL_COUNT_EVENT_WHENEVER); + } while (ctr > 0); + local_irq_restore(flags); +} + +static void resume_local_counters(void) +{ + int ctr = loongarch_pmu.num_counters; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + do { + ctr--; + loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]); + } while (ctr > 0); +} + +static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) +{ + raw_event.event_id = config & 0xff; + + return &raw_event; +} + +static int __init init_hw_perf_events(void) +{ + int counters; + + if (!cpu_has_pmp) + return -ENODEV; + + pr_info("Performance counters: "); + counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1; + + loongarch_pmu.num_counters = counters; + loongarch_pmu.max_period = (1ULL << 63) - 1; + loongarch_pmu.valid_count = (1ULL << 63) - 1; + loongarch_pmu.overflow = 1ULL << 63; + loongarch_pmu.name = "loongarch/loongson64"; + loongarch_pmu.read_counter = loongarch_pmu_read_counter; + loongarch_pmu.write_counter = loongarch_pmu_write_counter; + loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event; + loongarch_pmu.general_event_map = &loongson_event_map; + loongarch_pmu.cache_event_map = &loongson_cache_map; + + on_each_cpu(reset_counters, NULL, 1); + + pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n", + loongarch_pmu.name, counters, 64); + + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + + return 0; +} +early_initcall(init_hw_perf_events); diff --git a/arch/loongarch/kernel/perf_regs.c b/arch/loongarch/kernel/perf_regs.c new file mode 100644 index 000000000000..263ac4ab5af6 --- /dev/null +++ b/arch/loongarch/kernel/perf_regs.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Loongson Technology Corporation Limited + * + * Derived from MIPS: + * Copyright (C) 2013 Cavium, Inc. + */ + +#include <linux/perf_event.h> + +#include <asm/ptrace.h> + +#ifdef CONFIG_32BIT +u64 perf_reg_abi(struct task_struct *tsk) +{ + return PERF_SAMPLE_REGS_ABI_32; +} +#else /* Must be CONFIG_64BIT */ +u64 perf_reg_abi(struct task_struct *tsk) +{ + if (test_tsk_thread_flag(tsk, TIF_32BIT_REGS)) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} +#endif /* CONFIG_32BIT */ + +int perf_reg_validate(u64 mask) +{ + if (!mask) + return -EINVAL; + if (mask & ~((1ull << PERF_REG_LOONGARCH_MAX) - 1)) + return -EINVAL; + return 0; +} + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_LOONGARCH_MAX)) + return 0; + + if ((u32)idx == PERF_REG_LOONGARCH_PC) + return regs->csr_era; + + return regs->regs[idx]; +} + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 660492f064e7..1256e3582475 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -293,7 +293,7 @@ unsigned long stack_top(void) unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() & ~PAGE_MASK; + sp -= prandom_u32_max(PAGE_SIZE); return sp & STACK_ALIGN; } diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S new file mode 100644 index 000000000000..d13252553a7c --- /dev/null +++ b/arch/loongarch/kernel/relocate_kernel.S @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * relocate_kernel.S for kexec + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#include <linux/kexec.h> + +#include <asm/asm.h> +#include <asm/asmmacro.h> +#include <asm/regdef.h> +#include <asm/loongarch.h> +#include <asm/stackframe.h> +#include <asm/addrspace.h> + +SYM_CODE_START(relocate_new_kernel) + /* + * a0: EFI boot flag for the new kernel + * a1: Command line pointer for the new kernel + * a2: System table pointer for the new kernel + * a3: Start address to jump to after relocation + * a4: Pointer to the current indirection page entry + */ + move s0, a4 + + /* + * In case of a kdump/crash kernel, the indirection page is not + * populated as the kernel is directly copied to a reserved location + */ + beqz s0, done + +process_entry: + PTR_L s1, s0, 0 + PTR_ADDI s0, s0, SZREG + + /* destination page */ + andi s2, s1, IND_DESTINATION + beqz s2, 1f + li.w t0, ~0x1 + and s3, s1, t0 /* store destination addr in s3 */ + b process_entry + +1: + /* indirection page, update s0 */ + andi s2, s1, IND_INDIRECTION + beqz s2, 1f + li.w t0, ~0x2 + and s0, s1, t0 + b process_entry + +1: + /* done page */ + andi s2, s1, IND_DONE + beqz s2, 1f + b done + +1: + /* source page */ + andi s2, s1, IND_SOURCE + beqz s2, process_entry + li.w t0, ~0x8 + and s1, s1, t0 + li.w s5, (1 << _PAGE_SHIFT) / SZREG + +copy_word: + /* copy page word by word */ + REG_L s4, s1, 0 + REG_S s4, s3, 0 + PTR_ADDI s3, s3, SZREG + PTR_ADDI s1, s1, SZREG + LONG_ADDI s5, s5, -1 + beqz s5, process_entry + b copy_word + b process_entry + +done: + ibar 0 + dbar 0 + + /* + * Jump to the new kernel, + * make sure the values of a0, a1, a2 and a3 are not changed. + */ + jr a3 +SYM_CODE_END(relocate_new_kernel) + +#ifdef CONFIG_SMP +/* + * Other CPUs should wait until code is relocated and + * then start at the entry point from LOONGARCH_IOCSR_MBUF0. + */ +SYM_CODE_START(kexec_smp_wait) +1: li.w t0, 0x100 /* wait for init loop */ +2: addi.w t0, t0, -1 /* limit mailbox access */ + bnez t0, 2b + li.w t1, LOONGARCH_IOCSR_MBUF0 + iocsrrd.w s0, t1 /* check PC as an indicator */ + beqz s0, 1b + iocsrrd.d s0, t1 /* get PC via mailbox */ + + li.d t0, CACHE_BASE + or s0, s0, t0 /* s0 = TO_CACHE(s0) */ + jr s0 /* jump to initial PC */ +SYM_CODE_END(kexec_smp_wait) +#endif + +relocate_new_kernel_end: + +SYM_DATA_START(relocate_new_kernel_size) + PTR relocate_new_kernel_end - relocate_new_kernel +SYM_DATA_END(relocate_new_kernel_size) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index d97c69dbe553..1eb63fa9bc81 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -19,6 +19,8 @@ #include <linux/memblock.h> #include <linux/initrd.h> #include <linux/ioport.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> #include <linux/root_dev.h> #include <linux/console.h> #include <linux/pfn.h> @@ -49,11 +51,9 @@ #define SMBIOS_CORE_PACKAGE_OFFSET 0x23 #define LOONGSON_EFI_ENABLE (1 << 3) -#ifdef CONFIG_VT -struct screen_info screen_info; -#endif +struct screen_info screen_info __section(".data"); -unsigned long fw_arg0, fw_arg1; +unsigned long fw_arg0, fw_arg1, fw_arg2; DEFINE_PER_CPU(unsigned long, kernelsp); struct cpuinfo_loongarch cpu_data[NR_CPUS] __read_mostly; @@ -122,16 +122,9 @@ static void __init parse_cpu_table(const struct dmi_header *dm) static void __init parse_bios_table(const struct dmi_header *dm) { - int bios_extern; char *dmi_data = (char *)dm; - bios_extern = *(dmi_data + SMBIOS_BIOSEXTERN_OFFSET); b_info.bios_size = (*(dmi_data + SMBIOS_BIOSSIZE_OFFSET) + 1) << 6; - - if (bios_extern & LOONGSON_EFI_ENABLE) - set_bit(EFI_BOOT, &efi.flags); - else - clear_bit(EFI_BOOT, &efi.flags); } static void __init find_tokens(const struct dmi_header *dm, void *dummy) @@ -194,9 +187,70 @@ static int __init early_parse_mem(char *p) } early_param("mem", early_parse_mem); +static void __init arch_reserve_vmcore(void) +{ +#ifdef CONFIG_PROC_VMCORE + u64 i; + phys_addr_t start, end; + + if (!is_kdump_kernel()) + return; + + if (!elfcorehdr_size) { + for_each_mem_range(i, &start, &end) { + if (elfcorehdr_addr >= start && elfcorehdr_addr < end) { + /* + * Reserve from the elf core header to the end of + * the memory segment, that should all be kdump + * reserved memory. + */ + elfcorehdr_size = end - elfcorehdr_addr; + break; + } + } + } + + if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) { + pr_warn("elfcorehdr is overlapped\n"); + return; + } + + memblock_reserve(elfcorehdr_addr, elfcorehdr_size); + + pr_info("Reserving %llu KiB of memory at 0x%llx for elfcorehdr\n", + elfcorehdr_size >> 10, elfcorehdr_addr); +#endif +} + +static void __init arch_parse_crashkernel(void) +{ +#ifdef CONFIG_KEXEC + int ret; + unsigned long long start; + unsigned long long total_mem; + unsigned long long crash_base, crash_size; + + total_mem = memblock_phys_mem_size(); + ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); + if (ret < 0 || crash_size <= 0) + return; + + start = memblock_phys_alloc_range(crash_size, 1, crash_base, crash_base + crash_size); + if (start != crash_base) { + pr_warn("Invalid memory region reserved for crash kernel\n"); + return; + } + + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; +#endif +} + void __init platform_init(void) { - efi_init(); + arch_reserve_vmcore(); + arch_parse_crashkernel(); + #ifdef CONFIG_ACPI_TABLE_UPGRADE acpi_table_upgrade(); #endif @@ -299,6 +353,15 @@ static void __init resource_init(void) request_resource(res, &data_resource); request_resource(res, &bss_resource); } + +#ifdef CONFIG_KEXEC + if (crashk_res.start < crashk_res.end) { + insert_resource(&iomem_resource, &crashk_res); + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n", + (unsigned long)((crashk_res.end - crashk_res.start + 1) >> 20), + (unsigned long)(crashk_res.start >> 20)); + } +#endif } static int __init reserve_memblock_reserved_regions(void) @@ -346,7 +409,7 @@ static void __init prefill_possible_map(void) for (; i < NR_CPUS; i++) set_cpu_possible(i, false); - nr_cpu_ids = possible; + set_nr_cpu_ids(possible); } #endif @@ -356,11 +419,13 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; init_environ(); + efi_init(); memblock_init(); + pagetable_init(); parse_early_param(); + reserve_initrd_mem(); platform_init(); - pagetable_init(); arch_mem_init(cmdline_p); resource_init(); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index b5fab308dcf2..781a4d4bdddc 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -240,11 +240,6 @@ void loongson3_smp_finish(void) #ifdef CONFIG_HOTPLUG_CPU -static bool io_master(int cpu) -{ - return test_bit(cpu, &loongson_sysconf.cores_io_master); -} - int loongson3_cpu_disable(void) { unsigned long flags; diff --git a/arch/loongarch/kernel/sysrq.c b/arch/loongarch/kernel/sysrq.c new file mode 100644 index 000000000000..366baef72d29 --- /dev/null +++ b/arch/loongarch/kernel/sysrq.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * LoongArch specific sysrq operations. + * + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/sysrq.h> +#include <linux/workqueue.h> + +#include <asm/cpu-features.h> +#include <asm/tlb.h> + +/* + * Dump TLB entries on all CPUs. + */ + +static DEFINE_SPINLOCK(show_lock); + +static void sysrq_tlbdump_single(void *dummy) +{ + unsigned long flags; + + spin_lock_irqsave(&show_lock, flags); + + pr_info("CPU%d:\n", smp_processor_id()); + dump_tlb_regs(); + pr_info("\n"); + dump_tlb_all(); + pr_info("\n"); + + spin_unlock_irqrestore(&show_lock, flags); +} + +#ifdef CONFIG_SMP +static void sysrq_tlbdump_othercpus(struct work_struct *dummy) +{ + smp_call_function(sysrq_tlbdump_single, NULL, 0); +} + +static DECLARE_WORK(sysrq_tlbdump, sysrq_tlbdump_othercpus); +#endif + +static void sysrq_handle_tlbdump(int key) +{ + sysrq_tlbdump_single(NULL); +#ifdef CONFIG_SMP + schedule_work(&sysrq_tlbdump); +#endif +} + +static struct sysrq_key_op sysrq_tlbdump_op = { + .handler = sysrq_handle_tlbdump, + .help_msg = "show-tlbs(x)", + .action_msg = "Show TLB entries", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; + +static int __init loongarch_sysrq_init(void) +{ + return register_sysrq_key('x', &sysrq_tlbdump_op); +} +arch_initcall(loongarch_sysrq_init); diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c index ab1a75c0b5a6..caa7cd859078 100644 --- a/arch/loongarch/kernel/topology.c +++ b/arch/loongarch/kernel/topology.c @@ -5,6 +5,7 @@ #include <linux/node.h> #include <linux/nodemask.h> #include <linux/percpu.h> +#include <asm/bootinfo.h> static DEFINE_PER_CPU(struct cpu, cpu_devices); @@ -40,7 +41,7 @@ static int __init topology_init(void) for_each_present_cpu(i) { struct cpu *c = &per_cpu(cpu_devices, i); - c->hotpluggable = !!i; + c->hotpluggable = !io_master(i); ret = register_cpu(c, i); if (ret < 0) pr_warn("topology_init: register_cpu %d failed (%d)\n", i, ret); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 5010e95cef84..1a4dce84ebc6 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -10,6 +10,7 @@ #include <linux/entry-common.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/kexec.h> #include <linux/module.h> #include <linux/extable.h> #include <linux/mm.h> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs) oops_exit(); + if (regs && kexec_should_crash(current)) + crash_kexec(regs); + if (in_interrupt()) panic("Fatal exception in interrupt"); @@ -374,6 +378,29 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) irqentry_exit(regs, state); } +#ifdef CONFIG_GENERIC_BUG +int is_valid_bugaddr(unsigned long addr) +{ + return 1; +} +#endif /* CONFIG_GENERIC_BUG */ + +static void bug_handler(struct pt_regs *regs) +{ + switch (report_bug(regs->csr_era, regs)) { + case BUG_TRAP_TYPE_BUG: + case BUG_TRAP_TYPE_NONE: + die_if_kernel("Oops - BUG", regs); + force_sig(SIGTRAP); + break; + + case BUG_TRAP_TYPE_WARN: + /* Skip the BUG instruction and continue */ + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + } +} + asmlinkage void noinstr do_bp(struct pt_regs *regs) { bool user = user_mode(regs); @@ -427,8 +454,7 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) switch (bcode) { case BRK_BUG: - die_if_kernel("Kernel bug detected", regs); - force_sig(SIGTRAP); + bug_handler(regs); break; case BRK_DIVZERO: die_if_kernel("Break instruction in kernel code", regs); @@ -620,9 +646,6 @@ asmlinkage void noinstr do_vint(struct pt_regs *regs, unsigned long sp) irqentry_exit(regs, state); } -extern void tlb_init(int cpu); -extern void cache_error_setup(void); - unsigned long eentry; unsigned long tlbrentry; diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c index f32c38abd791..8c9826062652 100644 --- a/arch/loongarch/kernel/vdso.c +++ b/arch/loongarch/kernel/vdso.c @@ -78,7 +78,7 @@ static unsigned long vdso_base(void) unsigned long base = STACK_TOP; if (current->flags & PF_RANDOMIZE) { - base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1); + base += prandom_u32_max(VDSO_RANDOMIZE_SIZE); base = PAGE_ALIGN(base); } diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index f4831df5d2f9..b3309a5e695b 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -12,6 +12,7 @@ #define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) #include <asm-generic/vmlinux.lds.h> +#include "image-vars.h" /* * Max avaliable Page Size is 64K, so we set SectionAlignment @@ -54,6 +55,10 @@ SECTIONS EXCEPTION_TABLE(16) + .got : ALIGN(16) { *(.got) } + .plt : ALIGN(16) { *(.plt) } + .got.plt : ALIGN(16) { *(.got.plt) } + . = ALIGN(PECOFF_SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c index e8c68dcf6ab2..72685a48eaf0 100644 --- a/arch/loongarch/mm/cache.c +++ b/arch/loongarch/mm/cache.c @@ -6,8 +6,8 @@ * Copyright (C) 1994 - 2003, 06, 07 by Ralf Baechle (ralf@linux-mips.org) * Copyright (C) 2007 MIPS Technologies, Inc. */ +#include <linux/cacheinfo.h> #include <linux/export.h> -#include <linux/fcntl.h> #include <linux/fs.h> #include <linux/highmem.h> #include <linux/kernel.h> @@ -16,14 +16,21 @@ #include <linux/sched.h> #include <linux/syscalls.h> +#include <asm/bootinfo.h> #include <asm/cacheflush.h> #include <asm/cpu.h> #include <asm/cpu-features.h> -#include <asm/dma.h> #include <asm/loongarch.h> +#include <asm/numa.h> #include <asm/processor.h> #include <asm/setup.h> +void cache_error_setup(void) +{ + extern char __weak except_vec_cex; + set_merr_handler(0x0, &except_vec_cex, 0x80); +} + /* * LoongArch maintains ICache/DCache coherency by hardware, * we just need "ibar" to avoid instruction hazard here. @@ -34,109 +41,121 @@ void local_flush_icache_range(unsigned long start, unsigned long end) } EXPORT_SYMBOL(local_flush_icache_range); -void cache_error_setup(void) -{ - extern char __weak except_vec_cex; - set_merr_handler(0x0, &except_vec_cex, 0x80); -} - -static unsigned long icache_size __read_mostly; -static unsigned long dcache_size __read_mostly; -static unsigned long vcache_size __read_mostly; -static unsigned long scache_size __read_mostly; - -static char *way_string[] = { NULL, "direct mapped", "2-way", - "3-way", "4-way", "5-way", "6-way", "7-way", "8-way", - "9-way", "10-way", "11-way", "12-way", - "13-way", "14-way", "15-way", "16-way", -}; - -static void probe_pcache(void) +static void flush_cache_leaf(unsigned int leaf) { - struct cpuinfo_loongarch *c = ¤t_cpu_data; - unsigned int lsize, sets, ways; - unsigned int config; - - config = read_cpucfg(LOONGARCH_CPUCFG17); - lsize = 1 << ((config & CPUCFG17_L1I_SIZE_M) >> CPUCFG17_L1I_SIZE); - sets = 1 << ((config & CPUCFG17_L1I_SETS_M) >> CPUCFG17_L1I_SETS); - ways = ((config & CPUCFG17_L1I_WAYS_M) >> CPUCFG17_L1I_WAYS) + 1; - - c->icache.linesz = lsize; - c->icache.sets = sets; - c->icache.ways = ways; - icache_size = sets * ways * lsize; - c->icache.waysize = icache_size / c->icache.ways; - - config = read_cpucfg(LOONGARCH_CPUCFG18); - lsize = 1 << ((config & CPUCFG18_L1D_SIZE_M) >> CPUCFG18_L1D_SIZE); - sets = 1 << ((config & CPUCFG18_L1D_SETS_M) >> CPUCFG18_L1D_SETS); - ways = ((config & CPUCFG18_L1D_WAYS_M) >> CPUCFG18_L1D_WAYS) + 1; - - c->dcache.linesz = lsize; - c->dcache.sets = sets; - c->dcache.ways = ways; - dcache_size = sets * ways * lsize; - c->dcache.waysize = dcache_size / c->dcache.ways; - - c->options |= LOONGARCH_CPU_PREFETCH; - - pr_info("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n", - icache_size >> 10, way_string[c->icache.ways], "VIPT", c->icache.linesz); - - pr_info("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n", - dcache_size >> 10, way_string[c->dcache.ways], "VIPT", "no aliases", c->dcache.linesz); + int i, j, nr_nodes; + uint64_t addr = CSR_DMW0_BASE; + struct cache_desc *cdesc = current_cpu_data.cache_leaves + leaf; + + nr_nodes = cache_private(cdesc) ? 1 : loongson_sysconf.nr_nodes; + + do { + for (i = 0; i < cdesc->sets; i++) { + for (j = 0; j < cdesc->ways; j++) { + flush_cache_line(leaf, addr); + addr++; + } + + addr -= cdesc->ways; + addr += cdesc->linesz; + } + addr += (1ULL << NODE_ADDRSPACE_SHIFT); + } while (--nr_nodes > 0); } -static void probe_vcache(void) +asmlinkage __visible void __flush_cache_all(void) { - struct cpuinfo_loongarch *c = ¤t_cpu_data; - unsigned int lsize, sets, ways; - unsigned int config; - - config = read_cpucfg(LOONGARCH_CPUCFG19); - lsize = 1 << ((config & CPUCFG19_L2_SIZE_M) >> CPUCFG19_L2_SIZE); - sets = 1 << ((config & CPUCFG19_L2_SETS_M) >> CPUCFG19_L2_SETS); - ways = ((config & CPUCFG19_L2_WAYS_M) >> CPUCFG19_L2_WAYS) + 1; - - c->vcache.linesz = lsize; - c->vcache.sets = sets; - c->vcache.ways = ways; - vcache_size = lsize * sets * ways; - c->vcache.waysize = vcache_size / c->vcache.ways; - - pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n", - vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz); + int leaf; + struct cache_desc *cdesc = current_cpu_data.cache_leaves; + unsigned int cache_present = current_cpu_data.cache_leaves_present; + + leaf = cache_present - 1; + if (cache_inclusive(cdesc + leaf)) { + flush_cache_leaf(leaf); + return; + } + + for (leaf = 0; leaf < cache_present; leaf++) + flush_cache_leaf(leaf); } -static void probe_scache(void) -{ - struct cpuinfo_loongarch *c = ¤t_cpu_data; - unsigned int lsize, sets, ways; - unsigned int config; - - config = read_cpucfg(LOONGARCH_CPUCFG20); - lsize = 1 << ((config & CPUCFG20_L3_SIZE_M) >> CPUCFG20_L3_SIZE); - sets = 1 << ((config & CPUCFG20_L3_SETS_M) >> CPUCFG20_L3_SETS); - ways = ((config & CPUCFG20_L3_WAYS_M) >> CPUCFG20_L3_WAYS) + 1; - - c->scache.linesz = lsize; - c->scache.sets = sets; - c->scache.ways = ways; - /* 4 cores. scaches are shared */ - scache_size = lsize * sets * ways; - c->scache.waysize = scache_size / c->scache.ways; - - pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", - scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); -} +#define L1IUPRE (1 << 0) +#define L1IUUNIFY (1 << 1) +#define L1DPRE (1 << 2) + +#define LXIUPRE (1 << 0) +#define LXIUUNIFY (1 << 1) +#define LXIUPRIV (1 << 2) +#define LXIUINCL (1 << 3) +#define LXDPRE (1 << 4) +#define LXDPRIV (1 << 5) +#define LXDINCL (1 << 6) + +#define populate_cache_properties(cfg0, cdesc, level, leaf) \ +do { \ + unsigned int cfg1; \ + \ + cfg1 = read_cpucfg(LOONGARCH_CPUCFG17 + leaf); \ + if (level == 1) { \ + cdesc->flags |= CACHE_PRIVATE; \ + } else { \ + if (cfg0 & LXIUPRIV) \ + cdesc->flags |= CACHE_PRIVATE; \ + if (cfg0 & LXIUINCL) \ + cdesc->flags |= CACHE_INCLUSIVE; \ + } \ + cdesc->level = level; \ + cdesc->flags |= CACHE_PRESENT; \ + cdesc->ways = ((cfg1 & CPUCFG_CACHE_WAYS_M) >> CPUCFG_CACHE_WAYS) + 1; \ + cdesc->sets = 1 << ((cfg1 & CPUCFG_CACHE_SETS_M) >> CPUCFG_CACHE_SETS); \ + cdesc->linesz = 1 << ((cfg1 & CPUCFG_CACHE_LSIZE_M) >> CPUCFG_CACHE_LSIZE); \ + cdesc++; leaf++; \ +} while (0) void cpu_cache_init(void) { - probe_pcache(); - probe_vcache(); - probe_scache(); - + unsigned int leaf = 0, level = 1; + unsigned int config = read_cpucfg(LOONGARCH_CPUCFG16); + struct cache_desc *cdesc = current_cpu_data.cache_leaves; + + if (config & L1IUPRE) { + if (config & L1IUUNIFY) + cdesc->type = CACHE_TYPE_UNIFIED; + else + cdesc->type = CACHE_TYPE_INST; + populate_cache_properties(config, cdesc, level, leaf); + } + + if (config & L1DPRE) { + cdesc->type = CACHE_TYPE_DATA; + populate_cache_properties(config, cdesc, level, leaf); + } + + config = config >> 3; + for (level = 2; level <= CACHE_LEVEL_MAX; level++) { + if (!config) + break; + + if (config & LXIUPRE) { + if (config & LXIUUNIFY) + cdesc->type = CACHE_TYPE_UNIFIED; + else + cdesc->type = CACHE_TYPE_INST; + populate_cache_properties(config, cdesc, level, leaf); + } + + if (config & LXDPRE) { + cdesc->type = CACHE_TYPE_DATA; + populate_cache_properties(config, cdesc, level, leaf); + } + + config = config >> 7; + } + + BUG_ON(leaf > CACHE_LEAVES_MAX); + + current_cpu_data.cache_leaves_present = leaf; + current_cpu_data.options |= LOONGARCH_CPU_PREFETCH; shm_align_mask = PAGE_SIZE - 1; } diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 0532ed5ba43d..080061793c85 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -152,6 +152,70 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #endif +static pte_t *fixmap_pte(unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(addr); + p4d = p4d_offset(pgd, addr); + + if (pgd_none(*pgd)) { + pud_t *new __maybe_unused; + + new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + pgd_populate(&init_mm, pgd, new); +#ifndef __PAGETABLE_PUD_FOLDED + pud_init((unsigned long)new, (unsigned long)invalid_pmd_table); +#endif + } + + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd_t *new __maybe_unused; + + new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + pud_populate(&init_mm, pud, new); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_init((unsigned long)new, (unsigned long)invalid_pte_table); +#endif + } + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t *new __maybe_unused; + + new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd, new); + } + + return pte_offset_kernel(pmd, addr); +} + +void __init __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *ptep; + + BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); + + ptep = fixmap_pte(addr); + if (!pte_none(*ptep)) { + pte_ERROR(*ptep); + return; + } + + if (pgprot_val(flags)) + set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); + else { + pte_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + } +} + /* * Align swapper_pg_dir in to 64K, allows its address to be loaded * with a single LUI instruction in the TLB handlers. If we used diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index 381a569635a9..fbe1a4856fc4 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -3,6 +3,8 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include <linux/export.h> +#include <linux/io.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/mman.h> @@ -116,3 +118,30 @@ int __virt_addr_valid(volatile void *kaddr) return pfn_valid(PFN_DOWN(PHYSADDR(kaddr))); } EXPORT_SYMBOL_GPL(__virt_addr_valid); + +/* + * You really shouldn't be using read() or write() on /dev/mem. This might go + * away in the future. + */ +int valid_phys_addr_range(phys_addr_t addr, size_t size) +{ + /* + * Check whether addr is covered by a memory region without the + * MEMBLOCK_NOMAP attribute, and whether that region covers the + * entire range. In theory, this could lead to false negatives + * if the range is covered by distinct but adjacent memory regions + * that only differ in other attributes. However, few of such + * attributes have been defined, and it is debatable whether it + * follows that /dev/mem read() calls should be able traverse + * such boundaries. + */ + return memblock_is_region_memory(addr, size) && memblock_is_map_memory(addr); +} + +/* + * Do not allow /dev/mem mappings beyond the supported physical range. + */ +int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) +{ + return !(((pfn << PAGE_SHIFT) + size) & ~(GENMASK_ULL(cpu_pabits, 0))); +} diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 9818ce11546b..da3681f131c8 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -258,7 +258,7 @@ extern long exception_handlers[VECSIZE * 128 / sizeof(long)]; void setup_tlb_handler(int cpu) { setup_ptwalker(); - output_pgtable_bits_defines(); + local_flush_tlb_all(); /* The tlb handlers are generated only once */ if (cpu == 0) { @@ -301,6 +301,7 @@ void tlb_init(int cpu) write_csr_pagesize(PS_DEFAULT_SIZE); write_csr_stlbpgsize(PS_DEFAULT_SIZE); write_csr_tlbrefill_pagesize(PS_DEFAULT_SIZE); + setup_tlb_handler(cpu); - local_flush_tlb_all(); + output_pgtable_bits_defines(); } diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index 39743337999e..d8ee8fbc8c67 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -10,15 +10,20 @@ #include <asm/regdef.h> #include <asm/stackframe.h> +#define PTRS_PER_PGD_BITS (PAGE_SHIFT - 3) +#define PTRS_PER_PUD_BITS (PAGE_SHIFT - 3) +#define PTRS_PER_PMD_BITS (PAGE_SHIFT - 3) +#define PTRS_PER_PTE_BITS (PAGE_SHIFT - 3) + .macro tlb_do_page_fault, write SYM_FUNC_START(tlb_do_page_fault_\write) SAVE_ALL - csrrd a2, LOONGARCH_CSR_BADV - move a0, sp - REG_S a2, sp, PT_BVADDR - li.w a1, \write - la.abs t0, do_page_fault - jirl ra, t0, 0 + csrrd a2, LOONGARCH_CSR_BADV + move a0, sp + REG_S a2, sp, PT_BVADDR + li.w a1, \write + la.abs t0, do_page_fault + jirl ra, t0, 0 RESTORE_ALL_AND_RET SYM_FUNC_END(tlb_do_page_fault_\write) .endm @@ -29,133 +34,115 @@ SYM_FUNC_START(handle_tlb_protect) BACKUP_T0T1 SAVE_ALL - move a0, sp - move a1, zero - csrrd a2, LOONGARCH_CSR_BADV - REG_S a2, sp, PT_BVADDR - la.abs t0, do_page_fault - jirl ra, t0, 0 + move a0, sp + move a1, zero + csrrd a2, LOONGARCH_CSR_BADV + REG_S a2, sp, PT_BVADDR + la.abs t0, do_page_fault + jirl ra, t0, 0 RESTORE_ALL_AND_RET SYM_FUNC_END(handle_tlb_protect) SYM_FUNC_START(handle_tlb_load) - csrwr t0, EXCEPTION_KS0 - csrwr t1, EXCEPTION_KS1 - csrwr ra, EXCEPTION_KS2 + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + csrwr ra, EXCEPTION_KS2 /* * The vmalloc handling is not in the hotpath. */ - csrrd t0, LOONGARCH_CSR_BADV - bltz t0, vmalloc_load - csrrd t1, LOONGARCH_CSR_PGDL + csrrd t0, LOONGARCH_CSR_BADV + bltz t0, vmalloc_load + csrrd t1, LOONGARCH_CSR_PGDL vmalloc_done_load: /* Get PGD offset in bytes */ - srli.d t0, t0, PGDIR_SHIFT - andi t0, t0, (PTRS_PER_PGD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + bstrpick.d ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT + alsl.d t1, ra, t1, 3 #if CONFIG_PGTABLE_LEVELS > 3 - csrrd t0, LOONGARCH_CSR_BADV - ld.d t1, t1, 0 - srli.d t0, t0, PUD_SHIFT - andi t0, t0, (PTRS_PER_PUD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + ld.d t1, t1, 0 + bstrpick.d ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT + alsl.d t1, ra, t1, 3 #endif #if CONFIG_PGTABLE_LEVELS > 2 - csrrd t0, LOONGARCH_CSR_BADV - ld.d t1, t1, 0 - srli.d t0, t0, PMD_SHIFT - andi t0, t0, (PTRS_PER_PMD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + ld.d t1, t1, 0 + bstrpick.d ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT + alsl.d t1, ra, t1, 3 #endif - ld.d ra, t1, 0 + ld.d ra, t1, 0 /* * For huge tlb entries, pmde doesn't contain an address but * instead contains the tlb pte. Check the PAGE_HUGE bit and * see if we need to jump to huge tlb processing. */ - andi t0, ra, _PAGE_HUGE - bnez t0, tlb_huge_update_load + rotri.d ra, ra, _PAGE_HUGE_SHIFT + 1 + bltz ra, tlb_huge_update_load - csrrd t0, LOONGARCH_CSR_BADV - srli.d t0, t0, PAGE_SHIFT - andi t0, t0, (PTRS_PER_PTE - 1) - slli.d t0, t0, _PTE_T_LOG2 - add.d t1, ra, t0 + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) + bstrpick.d t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT + alsl.d t1, t0, ra, _PTE_T_LOG2 #ifdef CONFIG_SMP smp_pgtable_change_load: -#endif -#ifdef CONFIG_SMP - ll.d t0, t1, 0 + ll.d t0, t1, 0 #else - ld.d t0, t1, 0 + ld.d t0, t1, 0 #endif - tlbsrch - - srli.d ra, t0, _PAGE_PRESENT_SHIFT - andi ra, ra, 1 - beqz ra, nopage_tlb_load + andi ra, t0, _PAGE_PRESENT + beqz ra, nopage_tlb_load - ori t0, t0, _PAGE_VALID + ori t0, t0, _PAGE_VALID #ifdef CONFIG_SMP - sc.d t0, t1, 0 - beqz t0, smp_pgtable_change_load + sc.d t0, t1, 0 + beqz t0, smp_pgtable_change_load #else - st.d t0, t1, 0 + st.d t0, t1, 0 #endif - ori t1, t1, 8 - xori t1, t1, 8 - ld.d t0, t1, 0 - ld.d t1, t1, 8 - csrwr t0, LOONGARCH_CSR_TLBELO0 - csrwr t1, LOONGARCH_CSR_TLBELO1 + tlbsrch + bstrins.d t1, zero, 3, 3 + ld.d t0, t1, 0 + ld.d t1, t1, 8 + csrwr t0, LOONGARCH_CSR_TLBELO0 + csrwr t1, LOONGARCH_CSR_TLBELO1 tlbwr -leave_load: - csrrd t0, EXCEPTION_KS0 - csrrd t1, EXCEPTION_KS1 - csrrd ra, EXCEPTION_KS2 + + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 ertn + #ifdef CONFIG_64BIT vmalloc_load: - la.abs t1, swapper_pg_dir - b vmalloc_done_load + la.abs t1, swapper_pg_dir + b vmalloc_done_load #endif - /* - * This is the entry point when build_tlbchange_handler_head - * spots a huge page. - */ + /* This is the entry point of a huge page. */ tlb_huge_update_load: #ifdef CONFIG_SMP - ll.d t0, t1, 0 -#else - ld.d t0, t1, 0 + ll.d ra, t1, 0 #endif - srli.d ra, t0, _PAGE_PRESENT_SHIFT - andi ra, ra, 1 - beqz ra, nopage_tlb_load - tlbsrch + andi t0, ra, _PAGE_PRESENT + beqz t0, nopage_tlb_load - ori t0, t0, _PAGE_VALID #ifdef CONFIG_SMP - sc.d t0, t1, 0 - beqz t0, tlb_huge_update_load - ld.d t0, t1, 0 + ori t0, ra, _PAGE_VALID + sc.d t0, t1, 0 + beqz t0, tlb_huge_update_load + ori t0, ra, _PAGE_VALID #else - st.d t0, t1, 0 + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) + ori t0, ra, _PAGE_VALID + st.d t0, t1, 0 #endif + tlbsrch addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) addi.d ra, t1, 0 csrxchg ra, t1, LOONGARCH_CSR_TLBIDX tlbwr - csrxchg zero, t1, LOONGARCH_CSR_TLBIDX + csrxchg zero, t1, LOONGARCH_CSR_TLBIDX /* * A huge PTE describes an area the size of the @@ -167,21 +154,20 @@ tlb_huge_update_load: * address space. */ /* Huge page: Move Global bit */ - xori t0, t0, _PAGE_HUGE - lu12i.w t1, _PAGE_HGLOBAL >> 12 - and t1, t0, t1 - srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) - or t0, t0, t1 + xori t0, t0, _PAGE_HUGE + lu12i.w t1, _PAGE_HGLOBAL >> 12 + and t1, t0, t1 + srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) + or t0, t0, t1 - addi.d ra, t0, 0 - csrwr t0, LOONGARCH_CSR_TLBELO0 - addi.d t0, ra, 0 + move ra, t0 + csrwr ra, LOONGARCH_CSR_TLBELO0 /* Convert to entrylo1 */ - addi.d t1, zero, 1 - slli.d t1, t1, (HPAGE_SHIFT - 1) - add.d t0, t0, t1 - csrwr t0, LOONGARCH_CSR_TLBELO1 + addi.d t1, zero, 1 + slli.d t1, t1, (HPAGE_SHIFT - 1) + add.d t0, t0, t1 + csrwr t0, LOONGARCH_CSR_TLBELO1 /* Set huge page tlb entry size */ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) @@ -194,136 +180,120 @@ tlb_huge_update_load: addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 + ertn + nopage_tlb_load: - dbar 0 - csrrd ra, EXCEPTION_KS2 - la.abs t0, tlb_do_page_fault_0 - jr t0 + dbar 0 + csrrd ra, EXCEPTION_KS2 + la.abs t0, tlb_do_page_fault_0 + jr t0 SYM_FUNC_END(handle_tlb_load) SYM_FUNC_START(handle_tlb_store) - csrwr t0, EXCEPTION_KS0 - csrwr t1, EXCEPTION_KS1 - csrwr ra, EXCEPTION_KS2 + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + csrwr ra, EXCEPTION_KS2 /* * The vmalloc handling is not in the hotpath. */ - csrrd t0, LOONGARCH_CSR_BADV - bltz t0, vmalloc_store - csrrd t1, LOONGARCH_CSR_PGDL + csrrd t0, LOONGARCH_CSR_BADV + bltz t0, vmalloc_store + csrrd t1, LOONGARCH_CSR_PGDL vmalloc_done_store: /* Get PGD offset in bytes */ - srli.d t0, t0, PGDIR_SHIFT - andi t0, t0, (PTRS_PER_PGD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 - + bstrpick.d ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT + alsl.d t1, ra, t1, 3 #if CONFIG_PGTABLE_LEVELS > 3 - csrrd t0, LOONGARCH_CSR_BADV - ld.d t1, t1, 0 - srli.d t0, t0, PUD_SHIFT - andi t0, t0, (PTRS_PER_PUD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + ld.d t1, t1, 0 + bstrpick.d ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT + alsl.d t1, ra, t1, 3 #endif #if CONFIG_PGTABLE_LEVELS > 2 - csrrd t0, LOONGARCH_CSR_BADV - ld.d t1, t1, 0 - srli.d t0, t0, PMD_SHIFT - andi t0, t0, (PTRS_PER_PMD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + ld.d t1, t1, 0 + bstrpick.d ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT + alsl.d t1, ra, t1, 3 #endif - ld.d ra, t1, 0 + ld.d ra, t1, 0 /* * For huge tlb entries, pmde doesn't contain an address but * instead contains the tlb pte. Check the PAGE_HUGE bit and * see if we need to jump to huge tlb processing. */ - andi t0, ra, _PAGE_HUGE - bnez t0, tlb_huge_update_store + rotri.d ra, ra, _PAGE_HUGE_SHIFT + 1 + bltz ra, tlb_huge_update_store - csrrd t0, LOONGARCH_CSR_BADV - srli.d t0, t0, PAGE_SHIFT - andi t0, t0, (PTRS_PER_PTE - 1) - slli.d t0, t0, _PTE_T_LOG2 - add.d t1, ra, t0 + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) + bstrpick.d t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT + alsl.d t1, t0, ra, _PTE_T_LOG2 #ifdef CONFIG_SMP smp_pgtable_change_store: -#endif -#ifdef CONFIG_SMP - ll.d t0, t1, 0 + ll.d t0, t1, 0 #else - ld.d t0, t1, 0 + ld.d t0, t1, 0 #endif - tlbsrch - - srli.d ra, t0, _PAGE_PRESENT_SHIFT - andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) - xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) - bnez ra, nopage_tlb_store + andi ra, t0, _PAGE_PRESENT | _PAGE_WRITE + xori ra, ra, _PAGE_PRESENT | _PAGE_WRITE + bnez ra, nopage_tlb_store - ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) #ifdef CONFIG_SMP - sc.d t0, t1, 0 - beqz t0, smp_pgtable_change_store + sc.d t0, t1, 0 + beqz t0, smp_pgtable_change_store #else - st.d t0, t1, 0 + st.d t0, t1, 0 #endif - - ori t1, t1, 8 - xori t1, t1, 8 - ld.d t0, t1, 0 - ld.d t1, t1, 8 - csrwr t0, LOONGARCH_CSR_TLBELO0 - csrwr t1, LOONGARCH_CSR_TLBELO1 + tlbsrch + bstrins.d t1, zero, 3, 3 + ld.d t0, t1, 0 + ld.d t1, t1, 8 + csrwr t0, LOONGARCH_CSR_TLBELO0 + csrwr t1, LOONGARCH_CSR_TLBELO1 tlbwr -leave_store: - csrrd t0, EXCEPTION_KS0 - csrrd t1, EXCEPTION_KS1 - csrrd ra, EXCEPTION_KS2 + + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 ertn + #ifdef CONFIG_64BIT vmalloc_store: - la.abs t1, swapper_pg_dir - b vmalloc_done_store + la.abs t1, swapper_pg_dir + b vmalloc_done_store #endif - /* - * This is the entry point when build_tlbchange_handler_head - * spots a huge page. - */ + /* This is the entry point of a huge page. */ tlb_huge_update_store: #ifdef CONFIG_SMP - ll.d t0, t1, 0 -#else - ld.d t0, t1, 0 + ll.d ra, t1, 0 #endif - srli.d ra, t0, _PAGE_PRESENT_SHIFT - andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) - xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) - bnez ra, nopage_tlb_store - - tlbsrch - ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + andi t0, ra, _PAGE_PRESENT | _PAGE_WRITE + xori t0, t0, _PAGE_PRESENT | _PAGE_WRITE + bnez t0, nopage_tlb_store #ifdef CONFIG_SMP - sc.d t0, t1, 0 - beqz t0, tlb_huge_update_store - ld.d t0, t1, 0 + ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + sc.d t0, t1, 0 + beqz t0, tlb_huge_update_store + ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) #else - st.d t0, t1, 0 + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) + ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + st.d t0, t1, 0 #endif + tlbsrch addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) addi.d ra, t1, 0 csrxchg ra, t1, LOONGARCH_CSR_TLBIDX tlbwr - csrxchg zero, t1, LOONGARCH_CSR_TLBIDX + csrxchg zero, t1, LOONGARCH_CSR_TLBIDX /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -334,21 +304,20 @@ tlb_huge_update_store: * address space. */ /* Huge page: Move Global bit */ - xori t0, t0, _PAGE_HUGE - lu12i.w t1, _PAGE_HGLOBAL >> 12 - and t1, t0, t1 - srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) - or t0, t0, t1 + xori t0, t0, _PAGE_HUGE + lu12i.w t1, _PAGE_HGLOBAL >> 12 + and t1, t0, t1 + srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) + or t0, t0, t1 - addi.d ra, t0, 0 - csrwr t0, LOONGARCH_CSR_TLBELO0 - addi.d t0, ra, 0 + move ra, t0 + csrwr ra, LOONGARCH_CSR_TLBELO0 /* Convert to entrylo1 */ - addi.d t1, zero, 1 - slli.d t1, t1, (HPAGE_SHIFT - 1) - add.d t0, t0, t1 - csrwr t0, LOONGARCH_CSR_TLBELO1 + addi.d t1, zero, 1 + slli.d t1, t1, (HPAGE_SHIFT - 1) + add.d t0, t0, t1 + csrwr t0, LOONGARCH_CSR_TLBELO1 /* Set huge page tlb entry size */ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) @@ -362,126 +331,110 @@ tlb_huge_update_store: addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 + ertn + nopage_tlb_store: - dbar 0 - csrrd ra, EXCEPTION_KS2 - la.abs t0, tlb_do_page_fault_1 - jr t0 + dbar 0 + csrrd ra, EXCEPTION_KS2 + la.abs t0, tlb_do_page_fault_1 + jr t0 SYM_FUNC_END(handle_tlb_store) SYM_FUNC_START(handle_tlb_modify) - csrwr t0, EXCEPTION_KS0 - csrwr t1, EXCEPTION_KS1 - csrwr ra, EXCEPTION_KS2 + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + csrwr ra, EXCEPTION_KS2 /* * The vmalloc handling is not in the hotpath. */ - csrrd t0, LOONGARCH_CSR_BADV - bltz t0, vmalloc_modify - csrrd t1, LOONGARCH_CSR_PGDL + csrrd t0, LOONGARCH_CSR_BADV + bltz t0, vmalloc_modify + csrrd t1, LOONGARCH_CSR_PGDL vmalloc_done_modify: /* Get PGD offset in bytes */ - srli.d t0, t0, PGDIR_SHIFT - andi t0, t0, (PTRS_PER_PGD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + bstrpick.d ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT + alsl.d t1, ra, t1, 3 #if CONFIG_PGTABLE_LEVELS > 3 - csrrd t0, LOONGARCH_CSR_BADV - ld.d t1, t1, 0 - srli.d t0, t0, PUD_SHIFT - andi t0, t0, (PTRS_PER_PUD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + ld.d t1, t1, 0 + bstrpick.d ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT + alsl.d t1, ra, t1, 3 #endif #if CONFIG_PGTABLE_LEVELS > 2 - csrrd t0, LOONGARCH_CSR_BADV - ld.d t1, t1, 0 - srli.d t0, t0, PMD_SHIFT - andi t0, t0, (PTRS_PER_PMD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + ld.d t1, t1, 0 + bstrpick.d ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT + alsl.d t1, ra, t1, 3 #endif - ld.d ra, t1, 0 + ld.d ra, t1, 0 /* * For huge tlb entries, pmde doesn't contain an address but * instead contains the tlb pte. Check the PAGE_HUGE bit and * see if we need to jump to huge tlb processing. */ - andi t0, ra, _PAGE_HUGE - bnez t0, tlb_huge_update_modify + rotri.d ra, ra, _PAGE_HUGE_SHIFT + 1 + bltz ra, tlb_huge_update_modify - csrrd t0, LOONGARCH_CSR_BADV - srli.d t0, t0, PAGE_SHIFT - andi t0, t0, (PTRS_PER_PTE - 1) - slli.d t0, t0, _PTE_T_LOG2 - add.d t1, ra, t0 + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) + bstrpick.d t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT + alsl.d t1, t0, ra, _PTE_T_LOG2 #ifdef CONFIG_SMP smp_pgtable_change_modify: -#endif -#ifdef CONFIG_SMP - ll.d t0, t1, 0 + ll.d t0, t1, 0 #else - ld.d t0, t1, 0 + ld.d t0, t1, 0 #endif - tlbsrch - - srli.d ra, t0, _PAGE_WRITE_SHIFT - andi ra, ra, 1 - beqz ra, nopage_tlb_modify + andi ra, t0, _PAGE_WRITE + beqz ra, nopage_tlb_modify - ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) #ifdef CONFIG_SMP - sc.d t0, t1, 0 - beqz t0, smp_pgtable_change_modify + sc.d t0, t1, 0 + beqz t0, smp_pgtable_change_modify #else - st.d t0, t1, 0 + st.d t0, t1, 0 #endif - ori t1, t1, 8 - xori t1, t1, 8 - ld.d t0, t1, 0 - ld.d t1, t1, 8 - csrwr t0, LOONGARCH_CSR_TLBELO0 - csrwr t1, LOONGARCH_CSR_TLBELO1 + tlbsrch + bstrins.d t1, zero, 3, 3 + ld.d t0, t1, 0 + ld.d t1, t1, 8 + csrwr t0, LOONGARCH_CSR_TLBELO0 + csrwr t1, LOONGARCH_CSR_TLBELO1 tlbwr -leave_modify: - csrrd t0, EXCEPTION_KS0 - csrrd t1, EXCEPTION_KS1 - csrrd ra, EXCEPTION_KS2 + + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 ertn + #ifdef CONFIG_64BIT vmalloc_modify: - la.abs t1, swapper_pg_dir - b vmalloc_done_modify + la.abs t1, swapper_pg_dir + b vmalloc_done_modify #endif - /* - * This is the entry point when - * build_tlbchange_handler_head spots a huge page. - */ + /* This is the entry point of a huge page. */ tlb_huge_update_modify: #ifdef CONFIG_SMP - ll.d t0, t1, 0 -#else - ld.d t0, t1, 0 + ll.d ra, t1, 0 #endif - - srli.d ra, t0, _PAGE_WRITE_SHIFT - andi ra, ra, 1 - beqz ra, nopage_tlb_modify - - tlbsrch - ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + andi t0, ra, _PAGE_WRITE + beqz t0, nopage_tlb_modify #ifdef CONFIG_SMP - sc.d t0, t1, 0 - beqz t0, tlb_huge_update_modify - ld.d t0, t1, 0 + ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + sc.d t0, t1, 0 + beqz t0, tlb_huge_update_modify + ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) #else - st.d t0, t1, 0 + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) + ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + st.d t0, t1, 0 #endif /* * A huge PTE describes an area the size of the @@ -493,21 +446,20 @@ tlb_huge_update_modify: * address space. */ /* Huge page: Move Global bit */ - xori t0, t0, _PAGE_HUGE - lu12i.w t1, _PAGE_HGLOBAL >> 12 - and t1, t0, t1 - srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) - or t0, t0, t1 + xori t0, t0, _PAGE_HUGE + lu12i.w t1, _PAGE_HGLOBAL >> 12 + and t1, t0, t1 + srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) + or t0, t0, t1 - addi.d ra, t0, 0 - csrwr t0, LOONGARCH_CSR_TLBELO0 - addi.d t0, ra, 0 + move ra, t0 + csrwr ra, LOONGARCH_CSR_TLBELO0 /* Convert to entrylo1 */ - addi.d t1, zero, 1 - slli.d t1, t1, (HPAGE_SHIFT - 1) - add.d t0, t0, t1 - csrwr t0, LOONGARCH_CSR_TLBELO1 + addi.d t1, zero, 1 + slli.d t1, t1, (HPAGE_SHIFT - 1) + add.d t0, t0, t1 + csrwr t0, LOONGARCH_CSR_TLBELO1 /* Set huge page tlb entry size */ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) @@ -521,26 +473,31 @@ tlb_huge_update_modify: addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 + ertn + nopage_tlb_modify: - dbar 0 - csrrd ra, EXCEPTION_KS2 - la.abs t0, tlb_do_page_fault_1 - jr t0 + dbar 0 + csrrd ra, EXCEPTION_KS2 + la.abs t0, tlb_do_page_fault_1 + jr t0 SYM_FUNC_END(handle_tlb_modify) SYM_FUNC_START(handle_tlb_refill) - csrwr t0, LOONGARCH_CSR_TLBRSAVE - csrrd t0, LOONGARCH_CSR_PGD - lddir t0, t0, 3 + csrwr t0, LOONGARCH_CSR_TLBRSAVE + csrrd t0, LOONGARCH_CSR_PGD + lddir t0, t0, 3 #if CONFIG_PGTABLE_LEVELS > 3 - lddir t0, t0, 2 + lddir t0, t0, 2 #endif #if CONFIG_PGTABLE_LEVELS > 2 - lddir t0, t0, 1 + lddir t0, t0, 1 #endif - ldpte t0, 0 - ldpte t0, 1 + ldpte t0, 0 + ldpte t0, 1 tlbfill - csrrd t0, LOONGARCH_CSR_TLBRSAVE + csrrd t0, LOONGARCH_CSR_TLBRSAVE ertn SYM_FUNC_END(handle_tlb_refill) diff --git a/arch/loongarch/net/Makefile b/arch/loongarch/net/Makefile new file mode 100644 index 000000000000..1ec12a0c324a --- /dev/null +++ b/arch/loongarch/net/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for arch/loongarch/net +# +# Copyright (C) 2022 Loongson Technology Corporation Limited +# +obj-$(CONFIG_BPF_JIT) += bpf_jit.o diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c new file mode 100644 index 000000000000..43f0a98efe38 --- /dev/null +++ b/arch/loongarch/net/bpf_jit.c @@ -0,0 +1,1179 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * BPF JIT compiler for LoongArch + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ +#include "bpf_jit.h" + +#define REG_TCC LOONGARCH_GPR_A6 +#define TCC_SAVED LOONGARCH_GPR_S5 + +#define SAVE_RA BIT(0) +#define SAVE_TCC BIT(1) + +static const int regmap[] = { + /* return value from in-kernel function, and exit value for eBPF program */ + [BPF_REG_0] = LOONGARCH_GPR_A5, + /* arguments from eBPF program to in-kernel function */ + [BPF_REG_1] = LOONGARCH_GPR_A0, + [BPF_REG_2] = LOONGARCH_GPR_A1, + [BPF_REG_3] = LOONGARCH_GPR_A2, + [BPF_REG_4] = LOONGARCH_GPR_A3, + [BPF_REG_5] = LOONGARCH_GPR_A4, + /* callee saved registers that in-kernel function will preserve */ + [BPF_REG_6] = LOONGARCH_GPR_S0, + [BPF_REG_7] = LOONGARCH_GPR_S1, + [BPF_REG_8] = LOONGARCH_GPR_S2, + [BPF_REG_9] = LOONGARCH_GPR_S3, + /* read-only frame pointer to access stack */ + [BPF_REG_FP] = LOONGARCH_GPR_S4, + /* temporary register for blinding constants */ + [BPF_REG_AX] = LOONGARCH_GPR_T0, +}; + +static void mark_call(struct jit_ctx *ctx) +{ + ctx->flags |= SAVE_RA; +} + +static void mark_tail_call(struct jit_ctx *ctx) +{ + ctx->flags |= SAVE_TCC; +} + +static bool seen_call(struct jit_ctx *ctx) +{ + return (ctx->flags & SAVE_RA); +} + +static bool seen_tail_call(struct jit_ctx *ctx) +{ + return (ctx->flags & SAVE_TCC); +} + +static u8 tail_call_reg(struct jit_ctx *ctx) +{ + if (seen_call(ctx)) + return TCC_SAVED; + + return REG_TCC; +} + +/* + * eBPF prog stack layout: + * + * high + * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP + * | $ra | + * +-------------------------+ + * | $fp | + * +-------------------------+ + * | $s0 | + * +-------------------------+ + * | $s1 | + * +-------------------------+ + * | $s2 | + * +-------------------------+ + * | $s3 | + * +-------------------------+ + * | $s4 | + * +-------------------------+ + * | $s5 | + * +-------------------------+ <--BPF_REG_FP + * | prog->aux->stack_depth | + * | (optional) | + * current $sp -------------> +-------------------------+ + * low + */ +static void build_prologue(struct jit_ctx *ctx) +{ + int stack_adjust = 0, store_offset, bpf_stack_adjust; + + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + + /* To store ra, fp, s0, s1, s2, s3, s4 and s5. */ + stack_adjust += sizeof(long) * 8; + + stack_adjust = round_up(stack_adjust, 16); + stack_adjust += bpf_stack_adjust; + + /* + * First instruction initializes the tail call count (TCC). + * On tail call we skip this instruction, and the TCC is + * passed in REG_TCC from the caller. + */ + emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT); + + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust); + + store_offset = stack_adjust - sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset); + + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust); + + if (bpf_stack_adjust) + emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust); + + /* + * Program contains calls and tail calls, so REG_TCC need + * to be saved across calls. + */ + if (seen_tail_call(ctx) && seen_call(ctx)) + move_reg(ctx, TCC_SAVED, REG_TCC); + + ctx->stack_size = stack_adjust; +} + +static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) +{ + int stack_adjust = ctx->stack_size; + int load_offset; + + load_offset = stack_adjust - sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset); + + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust); + + if (!is_tail_call) { + /* Set return value */ + move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]); + /* Return to the caller */ + emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0); + } else { + /* + * Call the next bpf prog and skip the first instruction + * of TCC initialization. + */ + emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1); + } +} + +static void build_epilogue(struct jit_ctx *ctx) +{ + __build_epilogue(ctx, false); +} + +bool bpf_jit_supports_kfunc_call(void) +{ + return true; +} + +/* initialized on the first pass of build_body() */ +static int out_offset = -1; +static int emit_bpf_tail_call(struct jit_ctx *ctx) +{ + int off; + u8 tcc = tail_call_reg(ctx); + u8 a1 = LOONGARCH_GPR_A1; + u8 a2 = LOONGARCH_GPR_A2; + u8 t1 = LOONGARCH_GPR_T1; + u8 t2 = LOONGARCH_GPR_T2; + u8 t3 = LOONGARCH_GPR_T3; + const int idx0 = ctx->idx; + +#define cur_offset (ctx->idx - idx0) +#define jmp_offset (out_offset - (cur_offset)) + + /* + * a0: &ctx + * a1: &array + * a2: index + * + * if (index >= array->map.max_entries) + * goto out; + */ + off = offsetof(struct bpf_array, map.max_entries); + emit_insn(ctx, ldwu, t1, a1, off); + /* bgeu $a2, $t1, jmp_offset */ + if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0) + goto toofar; + + /* + * if (--TCC < 0) + * goto out; + */ + emit_insn(ctx, addid, REG_TCC, tcc, -1); + if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + goto toofar; + + /* + * prog = array->ptrs[index]; + * if (!prog) + * goto out; + */ + emit_insn(ctx, alsld, t2, a2, a1, 2); + off = offsetof(struct bpf_array, ptrs); + emit_insn(ctx, ldd, t2, t2, off); + /* beq $t2, $zero, jmp_offset */ + if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + goto toofar; + + /* goto *(prog->bpf_func + 4); */ + off = offsetof(struct bpf_prog, bpf_func); + emit_insn(ctx, ldd, t3, t2, off); + __build_epilogue(ctx, true); + + /* out: */ + if (out_offset == -1) + out_offset = cur_offset; + if (cur_offset != out_offset) { + pr_err_once("tail_call out_offset = %d, expected %d!\n", + cur_offset, out_offset); + return -1; + } + + return 0; + +toofar: + pr_info_once("tail_call: jump too far\n"); + return -1; +#undef cur_offset +#undef jmp_offset +} + +static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + const u8 t1 = LOONGARCH_GPR_T1; + const u8 t2 = LOONGARCH_GPR_T2; + const u8 t3 = LOONGARCH_GPR_T3; + const u8 src = regmap[insn->src_reg]; + const u8 dst = regmap[insn->dst_reg]; + const s16 off = insn->off; + const s32 imm = insn->imm; + const bool isdw = BPF_SIZE(insn->code) == BPF_DW; + + move_imm(ctx, t1, off, false); + emit_insn(ctx, addd, t1, dst, t1); + move_reg(ctx, t3, src); + + switch (imm) { + /* lock *(size *)(dst + off) <op>= src */ + case BPF_ADD: + if (isdw) + emit_insn(ctx, amaddd, t2, t1, src); + else + emit_insn(ctx, amaddw, t2, t1, src); + break; + case BPF_AND: + if (isdw) + emit_insn(ctx, amandd, t2, t1, src); + else + emit_insn(ctx, amandw, t2, t1, src); + break; + case BPF_OR: + if (isdw) + emit_insn(ctx, amord, t2, t1, src); + else + emit_insn(ctx, amorw, t2, t1, src); + break; + case BPF_XOR: + if (isdw) + emit_insn(ctx, amxord, t2, t1, src); + else + emit_insn(ctx, amxorw, t2, t1, src); + break; + /* src = atomic_fetch_<op>(dst + off, src) */ + case BPF_ADD | BPF_FETCH: + if (isdw) { + emit_insn(ctx, amaddd, src, t1, t3); + } else { + emit_insn(ctx, amaddw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + case BPF_AND | BPF_FETCH: + if (isdw) { + emit_insn(ctx, amandd, src, t1, t3); + } else { + emit_insn(ctx, amandw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + case BPF_OR | BPF_FETCH: + if (isdw) { + emit_insn(ctx, amord, src, t1, t3); + } else { + emit_insn(ctx, amorw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + case BPF_XOR | BPF_FETCH: + if (isdw) { + emit_insn(ctx, amxord, src, t1, t3); + } else { + emit_insn(ctx, amxorw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + /* src = atomic_xchg(dst + off, src); */ + case BPF_XCHG: + if (isdw) { + emit_insn(ctx, amswapd, src, t1, t3); + } else { + emit_insn(ctx, amswapw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + /* r0 = atomic_cmpxchg(dst + off, r0, src); */ + case BPF_CMPXCHG: + u8 r0 = regmap[BPF_REG_0]; + + move_reg(ctx, t2, r0); + if (isdw) { + emit_insn(ctx, lld, r0, t1, 0); + emit_insn(ctx, bne, t2, r0, 4); + move_reg(ctx, t3, src); + emit_insn(ctx, scd, t3, t1, 0); + emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4); + } else { + emit_insn(ctx, llw, r0, t1, 0); + emit_zext_32(ctx, t2, true); + emit_zext_32(ctx, r0, true); + emit_insn(ctx, bne, t2, r0, 4); + move_reg(ctx, t3, src); + emit_insn(ctx, scw, t3, t1, 0); + emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6); + emit_zext_32(ctx, r0, true); + } + break; + } +} + +static bool is_signed_bpf_cond(u8 cond) +{ + return cond == BPF_JSGT || cond == BPF_JSLT || + cond == BPF_JSGE || cond == BPF_JSLE; +} + +static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass) +{ + const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || + BPF_CLASS(insn->code) == BPF_JMP32; + const u8 code = insn->code; + const u8 cond = BPF_OP(code); + const u8 t1 = LOONGARCH_GPR_T1; + const u8 t2 = LOONGARCH_GPR_T2; + const u8 src = regmap[insn->src_reg]; + const u8 dst = regmap[insn->dst_reg]; + const s16 off = insn->off; + const s32 imm = insn->imm; + int jmp_offset; + int i = insn - ctx->prog->insnsi; + + switch (code) { + /* dst = src */ + case BPF_ALU | BPF_MOV | BPF_X: + case BPF_ALU64 | BPF_MOV | BPF_X: + move_reg(ctx, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = imm */ + case BPF_ALU | BPF_MOV | BPF_K: + case BPF_ALU64 | BPF_MOV | BPF_K: + move_imm(ctx, dst, imm, is32); + break; + + /* dst = dst + src */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + emit_insn(ctx, addd, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst + imm */ + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + if (is_signed_imm12(imm)) { + emit_insn(ctx, addid, dst, dst, imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, addd, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst - src */ + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + emit_insn(ctx, subd, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst - imm */ + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + if (is_signed_imm12(-imm)) { + emit_insn(ctx, addid, dst, dst, -imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, subd, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst * src */ + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_X: + emit_insn(ctx, muld, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst * imm */ + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_K: + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, muld, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst / src */ + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_X: + emit_zext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_zext_32(ctx, t1, is32); + emit_insn(ctx, divdu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst / imm */ + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + move_imm(ctx, t1, imm, is32); + emit_zext_32(ctx, dst, is32); + emit_insn(ctx, divdu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst % src */ + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: + emit_zext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_zext_32(ctx, t1, is32); + emit_insn(ctx, moddu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst % imm */ + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + move_imm(ctx, t1, imm, is32); + emit_zext_32(ctx, dst, is32); + emit_insn(ctx, moddu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = -dst */ + case BPF_ALU | BPF_NEG: + case BPF_ALU64 | BPF_NEG: + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst & src */ + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_AND | BPF_X: + emit_insn(ctx, and, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst & imm */ + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, andi, dst, dst, imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, and, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst | src */ + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + emit_insn(ctx, or, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst | imm */ + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, ori, dst, dst, imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, or, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst ^ src */ + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + emit_insn(ctx, xor, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst ^ imm */ + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, xori, dst, dst, imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, xor, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst << src (logical) */ + case BPF_ALU | BPF_LSH | BPF_X: + emit_insn(ctx, sllw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + case BPF_ALU64 | BPF_LSH | BPF_X: + emit_insn(ctx, slld, dst, dst, src); + break; + + /* dst = dst << imm (logical) */ + case BPF_ALU | BPF_LSH | BPF_K: + emit_insn(ctx, slliw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + + case BPF_ALU64 | BPF_LSH | BPF_K: + emit_insn(ctx, sllid, dst, dst, imm); + break; + + /* dst = dst >> src (logical) */ + case BPF_ALU | BPF_RSH | BPF_X: + emit_insn(ctx, srlw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + case BPF_ALU64 | BPF_RSH | BPF_X: + emit_insn(ctx, srld, dst, dst, src); + break; + + /* dst = dst >> imm (logical) */ + case BPF_ALU | BPF_RSH | BPF_K: + emit_insn(ctx, srliw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + + case BPF_ALU64 | BPF_RSH | BPF_K: + emit_insn(ctx, srlid, dst, dst, imm); + break; + + /* dst = dst >> src (arithmetic) */ + case BPF_ALU | BPF_ARSH | BPF_X: + emit_insn(ctx, sraw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + case BPF_ALU64 | BPF_ARSH | BPF_X: + emit_insn(ctx, srad, dst, dst, src); + break; + + /* dst = dst >> imm (arithmetic) */ + case BPF_ALU | BPF_ARSH | BPF_K: + emit_insn(ctx, sraiw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + + case BPF_ALU64 | BPF_ARSH | BPF_K: + emit_insn(ctx, sraid, dst, dst, imm); + break; + + /* dst = BSWAP##imm(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm) { + case 16: + /* zero-extend 16 bits into 64 bits */ + emit_insn(ctx, bstrpickd, dst, dst, 15, 0); + break; + case 32: + /* zero-extend 32 bits into 64 bits */ + emit_zext_32(ctx, dst, is32); + break; + case 64: + /* do nothing */ + break; + } + break; + + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (imm) { + case 16: + emit_insn(ctx, revb2h, dst, dst); + /* zero-extend 16 bits into 64 bits */ + emit_insn(ctx, bstrpickd, dst, dst, 15, 0); + break; + case 32: + emit_insn(ctx, revb2w, dst, dst); + /* zero-extend 32 bits into 64 bits */ + emit_zext_32(ctx, dst, is32); + break; + case 64: + emit_insn(ctx, revbd, dst, dst); + break; + } + break; + + /* PC += off if dst cond src */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + jmp_offset = bpf2la_offset(i, off, ctx); + move_reg(ctx, t1, dst); + move_reg(ctx, t2, src); + if (is_signed_bpf_cond(BPF_OP(code))) { + emit_sext_32(ctx, t1, is32); + emit_sext_32(ctx, t2, is32); + } else { + emit_zext_32(ctx, t1, is32); + emit_zext_32(ctx, t2, is32); + } + if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0) + goto toofar; + break; + + /* PC += off if dst cond imm */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + u8 t7 = -1; + jmp_offset = bpf2la_offset(i, off, ctx); + if (imm) { + move_imm(ctx, t1, imm, false); + t7 = t1; + } else { + /* If imm is 0, simply use zero register. */ + t7 = LOONGARCH_GPR_ZERO; + } + move_reg(ctx, t2, dst); + if (is_signed_bpf_cond(BPF_OP(code))) { + emit_sext_32(ctx, t7, is32); + emit_sext_32(ctx, t2, is32); + } else { + emit_zext_32(ctx, t7, is32); + emit_zext_32(ctx, t2, is32); + } + if (emit_cond_jmp(ctx, cond, t2, t7, jmp_offset) < 0) + goto toofar; + break; + + /* PC += off if dst & src */ + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + jmp_offset = bpf2la_offset(i, off, ctx); + emit_insn(ctx, and, t1, dst, src); + emit_zext_32(ctx, t1, is32); + if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + goto toofar; + break; + + /* PC += off if dst & imm */ + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + jmp_offset = bpf2la_offset(i, off, ctx); + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, and, t1, dst, t1); + emit_zext_32(ctx, t1, is32); + if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + goto toofar; + break; + + /* PC += off */ + case BPF_JMP | BPF_JA: + jmp_offset = bpf2la_offset(i, off, ctx); + if (emit_uncond_jmp(ctx, jmp_offset) < 0) + goto toofar; + break; + + /* function call */ + case BPF_JMP | BPF_CALL: + int ret; + u64 func_addr; + bool func_addr_fixed; + + mark_call(ctx); + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + + move_imm(ctx, t1, func_addr, is32); + emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0); + move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); + break; + + /* tail call */ + case BPF_JMP | BPF_TAIL_CALL: + mark_tail_call(ctx); + if (emit_bpf_tail_call(ctx) < 0) + return -EINVAL; + break; + + /* function return */ + case BPF_JMP | BPF_EXIT: + emit_sext_32(ctx, regmap[BPF_REG_0], true); + + if (i == ctx->prog->len - 1) + break; + + jmp_offset = epilogue_offset(ctx); + if (emit_uncond_jmp(ctx, jmp_offset) < 0) + goto toofar; + break; + + /* dst = imm64 */ + case BPF_LD | BPF_IMM | BPF_DW: + u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm; + + move_imm(ctx, dst, imm64, is32); + return 1; + + /* dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_DW: + switch (BPF_SIZE(code)) { + case BPF_B: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldbu, dst, src, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxbu, dst, src, t1); + } + break; + case BPF_H: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldhu, dst, src, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxhu, dst, src, t1); + } + break; + case BPF_W: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldwu, dst, src, off); + } else if (is_signed_imm14(off)) { + emit_insn(ctx, ldptrw, dst, src, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxwu, dst, src, t1); + } + break; + case BPF_DW: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldd, dst, src, off); + } else if (is_signed_imm14(off)) { + emit_insn(ctx, ldptrd, dst, src, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxd, dst, src, t1); + } + break; + } + break; + + /* *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_DW: + switch (BPF_SIZE(code)) { + case BPF_B: + move_imm(ctx, t1, imm, is32); + if (is_signed_imm12(off)) { + emit_insn(ctx, stb, t1, dst, off); + } else { + move_imm(ctx, t2, off, is32); + emit_insn(ctx, stxb, t1, dst, t2); + } + break; + case BPF_H: + move_imm(ctx, t1, imm, is32); + if (is_signed_imm12(off)) { + emit_insn(ctx, sth, t1, dst, off); + } else { + move_imm(ctx, t2, off, is32); + emit_insn(ctx, stxh, t1, dst, t2); + } + break; + case BPF_W: + move_imm(ctx, t1, imm, is32); + if (is_signed_imm12(off)) { + emit_insn(ctx, stw, t1, dst, off); + } else if (is_signed_imm14(off)) { + emit_insn(ctx, stptrw, t1, dst, off); + } else { + move_imm(ctx, t2, off, is32); + emit_insn(ctx, stxw, t1, dst, t2); + } + break; + case BPF_DW: + move_imm(ctx, t1, imm, is32); + if (is_signed_imm12(off)) { + emit_insn(ctx, std, t1, dst, off); + } else if (is_signed_imm14(off)) { + emit_insn(ctx, stptrd, t1, dst, off); + } else { + move_imm(ctx, t2, off, is32); + emit_insn(ctx, stxd, t1, dst, t2); + } + break; + } + break; + + /* *(size *)(dst + off) = src */ + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_DW: + switch (BPF_SIZE(code)) { + case BPF_B: + if (is_signed_imm12(off)) { + emit_insn(ctx, stb, src, dst, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, stxb, src, dst, t1); + } + break; + case BPF_H: + if (is_signed_imm12(off)) { + emit_insn(ctx, sth, src, dst, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, stxh, src, dst, t1); + } + break; + case BPF_W: + if (is_signed_imm12(off)) { + emit_insn(ctx, stw, src, dst, off); + } else if (is_signed_imm14(off)) { + emit_insn(ctx, stptrw, src, dst, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, stxw, src, dst, t1); + } + break; + case BPF_DW: + if (is_signed_imm12(off)) { + emit_insn(ctx, std, src, dst, off); + } else if (is_signed_imm14(off)) { + emit_insn(ctx, stptrd, src, dst, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, stxd, src, dst, t1); + } + break; + } + break; + + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + emit_atomic(insn, ctx); + break; + + default: + pr_err("bpf_jit: unknown opcode %02x\n", code); + return -EINVAL; + } + + return 0; + +toofar: + pr_info_once("bpf_jit: opcode %02x, jump too far\n", code); + return -E2BIG; +} + +static int build_body(struct jit_ctx *ctx, bool extra_pass) +{ + int i; + const struct bpf_prog *prog = ctx->prog; + + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + int ret; + + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; + + ret = build_insn(insn, ctx, extra_pass); + if (ret > 0) { + i++; + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; + continue; + } + if (ret) + return ret; + } + + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; + + return 0; +} + +/* Fill space with break instructions */ +static void jit_fill_hole(void *area, unsigned int size) +{ + u32 *ptr; + + /* We are guaranteed to have aligned memory */ + for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) + *ptr++ = INSN_BREAK; +} + +static int validate_code(struct jit_ctx *ctx) +{ + int i; + union loongarch_instruction insn; + + for (i = 0; i < ctx->idx; i++) { + insn = ctx->image[i]; + /* Check INSN_BREAK */ + if (insn.word == INSN_BREAK) + return -1; + } + + return 0; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + bool tmp_blinded = false, extra_pass = false; + u8 *image_ptr; + int image_size; + struct jit_ctx ctx; + struct jit_data *jit_data; + struct bpf_binary_header *header; + struct bpf_prog *tmp, *orig_prog = prog; + + /* + * If BPF JIT was not enabled then we must fall back to + * the interpreter. + */ + if (!prog->jit_requested) + return orig_prog; + + tmp = bpf_jit_blind_constants(prog); + /* + * If blinding was requested and we failed during blinding, + * we must fall back to the interpreter. Otherwise, we save + * the new JITed code. + */ + if (IS_ERR(tmp)) + return orig_prog; + + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + if (jit_data->ctx.offset) { + ctx = jit_data->ctx; + image_ptr = jit_data->image; + header = jit_data->header; + extra_pass = true; + image_size = sizeof(u32) * ctx.idx; + goto skip_init_ctx; + } + + memset(&ctx, 0, sizeof(ctx)); + ctx.prog = prog; + + ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL); + if (ctx.offset == NULL) { + prog = orig_prog; + goto out_offset; + } + + /* 1. Initial fake pass to compute ctx->idx and set ctx->flags */ + build_prologue(&ctx); + if (build_body(&ctx, extra_pass)) { + prog = orig_prog; + goto out_offset; + } + ctx.epilogue_offset = ctx.idx; + build_epilogue(&ctx); + + /* Now we know the actual image size. + * As each LoongArch instruction is of length 32bit, + * we are translating number of JITed intructions into + * the size required to store these JITed code. + */ + image_size = sizeof(u32) * ctx.idx; + /* Now we know the size of the structure to make */ + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + if (header == NULL) { + prog = orig_prog; + goto out_offset; + } + + /* 2. Now, the actual pass to generate final JIT code */ + ctx.image = (union loongarch_instruction *)image_ptr; + +skip_init_ctx: + ctx.idx = 0; + + build_prologue(&ctx); + if (build_body(&ctx, extra_pass)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_offset; + } + build_epilogue(&ctx); + + /* 3. Extra pass to validate JITed code */ + if (validate_code(&ctx)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_offset; + } + + /* And we're done */ + if (bpf_jit_enable > 1) + bpf_jit_dump(prog->len, image_size, 2, ctx.image); + + /* Update the icache */ + flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx)); + + if (!prog->is_func || extra_pass) { + if (extra_pass && ctx.idx != jit_data->ctx.idx) { + pr_err_once("multi-func JIT bug %d != %d\n", + ctx.idx, jit_data->ctx.idx); + bpf_jit_binary_free(header); + prog->bpf_func = NULL; + prog->jited = 0; + prog->jited_len = 0; + goto out_offset; + } + bpf_jit_binary_lock_ro(header); + } else { + jit_data->ctx = ctx; + jit_data->image = image_ptr; + jit_data->header = header; + } + prog->jited = 1; + prog->jited_len = image_size; + prog->bpf_func = (void *)ctx.image; + + if (!prog->is_func || extra_pass) { + int i; + + /* offset[prog->len] is the size of program */ + for (i = 0; i <= prog->len; i++) + ctx.offset[i] *= LOONGARCH_INSN_SIZE; + bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); + +out_offset: + kvfree(ctx.offset); + kfree(jit_data); + prog->aux->jit_data = NULL; + } + +out: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog); + + out_offset = -1; + + return prog; +} diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h new file mode 100644 index 000000000000..e665ddb0aeb8 --- /dev/null +++ b/arch/loongarch/net/bpf_jit.h @@ -0,0 +1,282 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * BPF JIT compiler for LoongArch + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ +#include <linux/bpf.h> +#include <linux/filter.h> +#include <asm/cacheflush.h> +#include <asm/inst.h> + +struct jit_ctx { + const struct bpf_prog *prog; + unsigned int idx; + unsigned int flags; + unsigned int epilogue_offset; + u32 *offset; + union loongarch_instruction *image; + u32 stack_size; +}; + +struct jit_data { + struct bpf_binary_header *header; + u8 *image; + struct jit_ctx ctx; +}; + +#define emit_insn(ctx, func, ...) \ +do { \ + if (ctx->image != NULL) { \ + union loongarch_instruction *insn = &ctx->image[ctx->idx]; \ + emit_##func(insn, ##__VA_ARGS__); \ + } \ + ctx->idx++; \ +} while (0) + +#define is_signed_imm12(val) signed_imm_check(val, 12) +#define is_signed_imm14(val) signed_imm_check(val, 14) +#define is_signed_imm16(val) signed_imm_check(val, 16) +#define is_signed_imm26(val) signed_imm_check(val, 26) +#define is_signed_imm32(val) signed_imm_check(val, 32) +#define is_signed_imm52(val) signed_imm_check(val, 52) +#define is_unsigned_imm12(val) unsigned_imm_check(val, 12) + +static inline int bpf2la_offset(int bpf_insn, int off, const struct jit_ctx *ctx) +{ + /* BPF JMP offset is relative to the next instruction */ + bpf_insn++; + /* + * Whereas LoongArch branch instructions encode the offset + * from the branch itself, so we must subtract 1 from the + * instruction offset. + */ + return (ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1)); +} + +static inline int epilogue_offset(const struct jit_ctx *ctx) +{ + int from = ctx->idx; + int to = ctx->epilogue_offset; + + return (to - from); +} + +/* Zero-extend 32 bits into 64 bits */ +static inline void emit_zext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, bool is32) +{ + if (!is32) + return; + + emit_insn(ctx, lu32id, reg, 0); +} + +/* Signed-extend 32 bits into 64 bits */ +static inline void emit_sext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, bool is32) +{ + if (!is32) + return; + + emit_insn(ctx, addiw, reg, reg, 0); +} + +static inline void move_imm(struct jit_ctx *ctx, enum loongarch_gpr rd, long imm, bool is32) +{ + long imm_11_0, imm_31_12, imm_51_32, imm_63_52, imm_51_0, imm_51_31; + + /* or rd, $zero, $zero */ + if (imm == 0) { + emit_insn(ctx, or, rd, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_ZERO); + return; + } + + /* addiw rd, $zero, imm_11_0 */ + if (is_signed_imm12(imm)) { + emit_insn(ctx, addiw, rd, LOONGARCH_GPR_ZERO, imm); + goto zext; + } + + /* ori rd, $zero, imm_11_0 */ + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, ori, rd, LOONGARCH_GPR_ZERO, imm); + goto zext; + } + + /* lu52id rd, $zero, imm_63_52 */ + imm_63_52 = (imm >> 52) & 0xfff; + imm_51_0 = imm & 0xfffffffffffff; + if (imm_63_52 != 0 && imm_51_0 == 0) { + emit_insn(ctx, lu52id, rd, LOONGARCH_GPR_ZERO, imm_63_52); + return; + } + + /* lu12iw rd, imm_31_12 */ + imm_31_12 = (imm >> 12) & 0xfffff; + emit_insn(ctx, lu12iw, rd, imm_31_12); + + /* ori rd, rd, imm_11_0 */ + imm_11_0 = imm & 0xfff; + if (imm_11_0 != 0) + emit_insn(ctx, ori, rd, rd, imm_11_0); + + if (!is_signed_imm32(imm)) { + if (imm_51_0 != 0) { + /* + * If bit[51:31] is all 0 or all 1, + * it means bit[51:32] is sign extended by lu12iw, + * no need to call lu32id to do a new filled operation. + */ + imm_51_31 = (imm >> 31) & 0x1fffff; + if (imm_51_31 != 0 || imm_51_31 != 0x1fffff) { + /* lu32id rd, imm_51_32 */ + imm_51_32 = (imm >> 32) & 0xfffff; + emit_insn(ctx, lu32id, rd, imm_51_32); + } + } + + /* lu52id rd, rd, imm_63_52 */ + if (!is_signed_imm52(imm)) + emit_insn(ctx, lu52id, rd, rd, imm_63_52); + } + +zext: + emit_zext_32(ctx, rd, is32); +} + +static inline void move_reg(struct jit_ctx *ctx, enum loongarch_gpr rd, + enum loongarch_gpr rj) +{ + emit_insn(ctx, or, rd, rj, LOONGARCH_GPR_ZERO); +} + +static inline int invert_jmp_cond(u8 cond) +{ + switch (cond) { + case BPF_JEQ: + return BPF_JNE; + case BPF_JNE: + case BPF_JSET: + return BPF_JEQ; + case BPF_JGT: + return BPF_JLE; + case BPF_JGE: + return BPF_JLT; + case BPF_JLT: + return BPF_JGE; + case BPF_JLE: + return BPF_JGT; + case BPF_JSGT: + return BPF_JSLE; + case BPF_JSGE: + return BPF_JSLT; + case BPF_JSLT: + return BPF_JSGE; + case BPF_JSLE: + return BPF_JSGT; + } + return -1; +} + +static inline void cond_jmp_offset(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + switch (cond) { + case BPF_JEQ: + /* PC += jmp_offset if rj == rd */ + emit_insn(ctx, beq, rj, rd, jmp_offset); + return; + case BPF_JNE: + case BPF_JSET: + /* PC += jmp_offset if rj != rd */ + emit_insn(ctx, bne, rj, rd, jmp_offset); + return; + case BPF_JGT: + /* PC += jmp_offset if rj > rd (unsigned) */ + emit_insn(ctx, bltu, rd, rj, jmp_offset); + return; + case BPF_JLT: + /* PC += jmp_offset if rj < rd (unsigned) */ + emit_insn(ctx, bltu, rj, rd, jmp_offset); + return; + case BPF_JGE: + /* PC += jmp_offset if rj >= rd (unsigned) */ + emit_insn(ctx, bgeu, rj, rd, jmp_offset); + return; + case BPF_JLE: + /* PC += jmp_offset if rj <= rd (unsigned) */ + emit_insn(ctx, bgeu, rd, rj, jmp_offset); + return; + case BPF_JSGT: + /* PC += jmp_offset if rj > rd (signed) */ + emit_insn(ctx, blt, rd, rj, jmp_offset); + return; + case BPF_JSLT: + /* PC += jmp_offset if rj < rd (signed) */ + emit_insn(ctx, blt, rj, rd, jmp_offset); + return; + case BPF_JSGE: + /* PC += jmp_offset if rj >= rd (signed) */ + emit_insn(ctx, bge, rj, rd, jmp_offset); + return; + case BPF_JSLE: + /* PC += jmp_offset if rj <= rd (signed) */ + emit_insn(ctx, bge, rd, rj, jmp_offset); + return; + } +} + +static inline void cond_jmp_offs26(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + cond = invert_jmp_cond(cond); + cond_jmp_offset(ctx, cond, rj, rd, 2); + emit_insn(ctx, b, jmp_offset); +} + +static inline void uncond_jmp_offs26(struct jit_ctx *ctx, int jmp_offset) +{ + emit_insn(ctx, b, jmp_offset); +} + +static inline int emit_cond_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + /* + * A large PC-relative jump offset may overflow the immediate field of + * the native conditional branch instruction, triggering a conversion + * to use an absolute jump instead, this jump sequence is particularly + * nasty. For now, use cond_jmp_offs26() directly to keep it simple. + * In the future, maybe we can add support for far branching, the branch + * relaxation requires more than two passes to converge, the code seems + * too complex to understand, not quite sure whether it is necessary and + * worth the extra pain. Anyway, just leave it as it is to enhance code + * readability now. + */ + if (is_signed_imm26(jmp_offset)) { + cond_jmp_offs26(ctx, cond, rj, rd, jmp_offset); + return 0; + } + + return -EINVAL; +} + +static inline int emit_uncond_jmp(struct jit_ctx *ctx, int jmp_offset) +{ + if (is_signed_imm26(jmp_offset)) { + uncond_jmp_offs26(ctx, jmp_offset); + return 0; + } + + return -EINVAL; +} + +static inline int emit_tailcall_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + if (is_signed_imm16(jmp_offset)) { + cond_jmp_offset(ctx, cond, rj, rd, jmp_offset); + return 0; + } + + return -EINVAL; +} diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index bf921487333c..8235ec92b41f 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -83,6 +83,69 @@ static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci) } /* + * Create a PCI config space window + * - reserve mem region + * - alloc struct pci_config_window with space for all mappings + * - ioremap the config space + */ +static struct pci_config_window *arch_pci_ecam_create(struct device *dev, + struct resource *cfgres, struct resource *busr, const struct pci_ecam_ops *ops) +{ + int bsz, bus_range, err; + struct resource *conflict; + struct pci_config_window *cfg; + + if (busr->start > busr->end) + return ERR_PTR(-EINVAL); + + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return ERR_PTR(-ENOMEM); + + cfg->parent = dev; + cfg->ops = ops; + cfg->busr.start = busr->start; + cfg->busr.end = busr->end; + cfg->busr.flags = IORESOURCE_BUS; + bus_range = resource_size(cfgres) >> ops->bus_shift; + + bsz = 1 << ops->bus_shift; + + cfg->res.start = cfgres->start; + cfg->res.end = cfgres->end; + cfg->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + cfg->res.name = "PCI ECAM"; + + conflict = request_resource_conflict(&iomem_resource, &cfg->res); + if (conflict) { + err = -EBUSY; + dev_err(dev, "can't claim ECAM area %pR: address conflict with %s %pR\n", + &cfg->res, conflict->name, conflict); + goto err_exit; + } + + cfg->win = pci_remap_cfgspace(cfgres->start, bus_range * bsz); + if (!cfg->win) + goto err_exit_iomap; + + if (ops->init) { + err = ops->init(cfg); + if (err) + goto err_exit; + } + dev_info(dev, "ECAM at %pR for %pR\n", &cfg->res, &cfg->busr); + + return cfg; + +err_exit_iomap: + err = -ENOMEM; + dev_err(dev, "ECAM ioremap failed\n"); +err_exit: + pci_ecam_free(cfg); + return ERR_PTR(err); +} + +/* * Lookup the bus range for the domain in MCFG, and set up config space * mapping. */ @@ -106,11 +169,16 @@ pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root) bus_shift = ecam_ops->bus_shift ? : 20; - cfgres.start = root->mcfg_addr + (bus_res->start << bus_shift); - cfgres.end = cfgres.start + (resource_size(bus_res) << bus_shift) - 1; - cfgres.flags = IORESOURCE_MEM; + if (bus_shift == 20) + cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); + else { + cfgres.start = root->mcfg_addr + (bus_res->start << bus_shift); + cfgres.end = cfgres.start + (resource_size(bus_res) << bus_shift) - 1; + cfgres.end |= BIT(28) + (((PCI_CFG_SPACE_EXP_SIZE - 1) & 0xf00) << 16); + cfgres.flags = IORESOURCE_MEM; + cfg = arch_pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); + } - cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); if (IS_ERR(cfg)) { dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res, PTR_ERR(cfg)); return NULL; diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index e9b7c34d9b6d..2726639150bc 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -9,6 +9,7 @@ #include <linux/types.h> #include <linux/pci.h> #include <linux/vgaarb.h> +#include <asm/cacheflush.h> #include <asm/loongson.h> #define PCI_DEVICE_ID_LOONGSON_HOST 0x7a00 @@ -45,12 +46,10 @@ static int __init pcibios_init(void) unsigned int lsize; /* - * Set PCI cacheline size to that of the highest level in the + * Set PCI cacheline size to that of the last level in the * cache hierarchy. */ - lsize = cpu_dcache_line_size(); - lsize = cpu_vcache_line_size() ? : lsize; - lsize = cpu_scache_line_size() ? : lsize; + lsize = cpu_last_level_cache_line_size(); BUG_ON(!lsize); diff --git a/arch/m68k/68000/Makefile b/arch/m68k/68000/Makefile index 674541fdf5b8..279560add577 100644 --- a/arch/m68k/68000/Makefile +++ b/arch/m68k/68000/Makefile @@ -17,4 +17,4 @@ obj-$(CONFIG_DRAGEN2) += dragen2.o obj-$(CONFIG_UCSIMM) += ucsimm.o obj-$(CONFIG_UCDIMM) += ucsimm.o -extra-y := head.o +obj-y += head.o diff --git a/arch/m68k/68000/ints.c b/arch/m68k/68000/ints.c index cda49b12d7be..f9a5ec781408 100644 --- a/arch/m68k/68000/ints.c +++ b/arch/m68k/68000/ints.c @@ -18,12 +18,12 @@ #include <asm/io.h> #include <asm/machdep.h> -#if defined(CONFIG_M68328) -#include <asm/MC68328.h> -#elif defined(CONFIG_M68EZ328) +#if defined(CONFIG_M68EZ328) #include <asm/MC68EZ328.h> #elif defined(CONFIG_M68VZ328) #include <asm/MC68VZ328.h> +#else +#include <asm/MC68328.h> #endif /* assembler routines */ diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index e0e9e31339c1..9380f6e3bb66 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -46,6 +46,7 @@ config M68000 select GENERIC_CSUM select CPU_NO_EFFICIENT_FFS select HAVE_ARCH_HASH + select LEGACY_TIMER_TICK help The Freescale (was Motorola) 68000 CPU is the first generation of the well known M68K family of processors. The CPU core as well as @@ -97,7 +98,6 @@ config M68060 config M68328 bool depends on !MMU - select LEGACY_TIMER_TICK select M68000 help Motorola 68328 processor support. @@ -105,7 +105,6 @@ config M68328 config M68EZ328 bool depends on !MMU - select LEGACY_TIMER_TICK select M68000 help Motorola 68EX328 processor support. @@ -113,7 +112,6 @@ config M68EZ328 config M68VZ328 bool depends on !MMU - select LEGACY_TIMER_TICK select M68000 help Motorola 68VZ328 processor support. @@ -399,7 +397,7 @@ config SINGLE_MEMORY_CHUNK order" to save memory that could be wasted for unused memory map. Say N if not sure. -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int "Maximum zone order" if ADVANCED depends on !SINGLE_MEMORY_CHUNK default "11" diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index e358605b70ba..43e39040d3ac 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -86,15 +86,6 @@ ifdef CONFIG_KGDB KBUILD_CFLAGS := $(subst -fomit-frame-pointer,,$(KBUILD_CFLAGS)) -g endif -# -# Select the assembler head startup code. Order is important. The default -# head code is first, processor specific selections can override it after. -# -head-y := arch/m68k/kernel/head.o -head-$(CONFIG_SUN3) := arch/m68k/kernel/sun3-head.o -head-$(CONFIG_M68000) := arch/m68k/68000/head.o -head-$(CONFIG_COLDFIRE) := arch/m68k/coldfire/head.o - libs-y += arch/m68k/lib/ diff --git a/arch/m68k/coldfire/Makefile b/arch/m68k/coldfire/Makefile index 9419a6c1f036..c56bc0dc7f2e 100644 --- a/arch/m68k/coldfire/Makefile +++ b/arch/m68k/coldfire/Makefile @@ -45,4 +45,4 @@ obj-$(CONFIG_STMARK2) += stmark2.o obj-$(CONFIG_PCI) += pci.o obj-y += gpio.o -extra-y := head.o +obj-y += head.o diff --git a/arch/m68k/configs/amcore_defconfig b/arch/m68k/configs/amcore_defconfig index 6d9ed2198170..041adcf6ecfc 100644 --- a/arch/m68k/configs/amcore_defconfig +++ b/arch/m68k/configs/amcore_defconfig @@ -27,9 +27,6 @@ CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set @@ -85,7 +82,6 @@ CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_BOTH=y # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_PRINTK_TIME=y -# CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/m68k/configs/m5208evb_defconfig b/arch/m68k/configs/m5208evb_defconfig index 0ee3079f6ca9..31035a0b9247 100644 --- a/arch/m68k/configs/m5208evb_defconfig +++ b/arch/m68k/configs/m5208evb_defconfig @@ -21,9 +21,6 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set diff --git a/arch/m68k/configs/m5249evb_defconfig b/arch/m68k/configs/m5249evb_defconfig index f84f68c04065..5706d7a1daba 100644 --- a/arch/m68k/configs/m5249evb_defconfig +++ b/arch/m68k/configs/m5249evb_defconfig @@ -22,9 +22,6 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set diff --git a/arch/m68k/configs/m5272c3_defconfig b/arch/m68k/configs/m5272c3_defconfig index eca65020aae3..f02fe144f4ad 100644 --- a/arch/m68k/configs/m5272c3_defconfig +++ b/arch/m68k/configs/m5272c3_defconfig @@ -22,9 +22,6 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set diff --git a/arch/m68k/configs/m5275evb_defconfig b/arch/m68k/configs/m5275evb_defconfig index 9402c7a3e9c7..781f307ff330 100644 --- a/arch/m68k/configs/m5275evb_defconfig +++ b/arch/m68k/configs/m5275evb_defconfig @@ -22,9 +22,6 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set diff --git a/arch/m68k/configs/m5307c3_defconfig b/arch/m68k/configs/m5307c3_defconfig index bb8b0eb4bdfc..6eac482356ca 100644 --- a/arch/m68k/configs/m5307c3_defconfig +++ b/arch/m68k/configs/m5307c3_defconfig @@ -22,9 +22,6 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set diff --git a/arch/m68k/configs/m5407c3_defconfig b/arch/m68k/configs/m5407c3_defconfig index ce9ccf13c7c0..496dcccb1c18 100644 --- a/arch/m68k/configs/m5407c3_defconfig +++ b/arch/m68k/configs/m5407c3_defconfig @@ -23,9 +23,6 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index d86b4009880b..7a2da780830b 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -145,11 +145,6 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc, /* Forward declaration, a strange C thing */ struct task_struct; -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *dead_task) -{ -} - unsigned long __get_wchan(struct task_struct *p); void show_registers(struct pt_regs *regs); diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile index c0833da6a2ca..af015447dfb4 100644 --- a/arch/m68k/kernel/Makefile +++ b/arch/m68k/kernel/Makefile @@ -3,19 +3,20 @@ # Makefile for the linux kernel. # -extra-$(CONFIG_AMIGA) := head.o -extra-$(CONFIG_ATARI) := head.o -extra-$(CONFIG_MAC) := head.o -extra-$(CONFIG_APOLLO) := head.o -extra-$(CONFIG_VME) := head.o -extra-$(CONFIG_HP300) := head.o -extra-$(CONFIG_Q40) := head.o -extra-$(CONFIG_SUN3X) := head.o -extra-$(CONFIG_VIRT) := head.o -extra-$(CONFIG_SUN3) := sun3-head.o extra-y += vmlinux.lds -obj-y := entry.o irq.o module.o process.o ptrace.o +obj-$(CONFIG_AMIGA) := head.o +obj-$(CONFIG_ATARI) := head.o +obj-$(CONFIG_MAC) := head.o +obj-$(CONFIG_APOLLO) := head.o +obj-$(CONFIG_VME) := head.o +obj-$(CONFIG_HP300) := head.o +obj-$(CONFIG_Q40) := head.o +obj-$(CONFIG_SUN3X) := head.o +obj-$(CONFIG_VIRT) := head.o +obj-$(CONFIG_SUN3) := sun3-head.o + +obj-y += entry.o irq.o module.o process.o ptrace.o obj-y += setup.o signal.o sys_m68k.o syscalltable.o time.o traps.o obj-$(CONFIG_MMU_MOTOROLA) += ints.o vectors.o diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 996132a5ef35..4ebb56d6d959 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -204,6 +204,16 @@ config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" +config MB_MANAGER + bool "Support for Microblaze Manager" + depends on ADVANCED_OPTIONS + help + This option enables API for configuring the MicroBlaze manager + control register, which is consumed by the break handler to + block the break. + + Say N here unless you know what you are doing. + endmenu menu "Bus Options" diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 1826d9ce4459..3f8a86c4336a 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -48,7 +48,6 @@ CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER)) # r31 holds current when in kernel mode KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-y) $(CPUFLAGS-1) $(CPUFLAGS-2) -head-y := arch/microblaze/kernel/head.o libs-y += arch/microblaze/lib/ boot := arch/microblaze/boot diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index 51337fffb947..8150daf04a76 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -83,7 +83,7 @@ CONFIG_CIFS=y CONFIG_CIFS_STATS2=y CONFIG_ENCRYPTED_KEYS=y CONFIG_DMA_CMA=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KGDB=y CONFIG_KGDB_TESTS=y CONFIG_KGDB_KDB=y diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index 7e9e92670df3..4e193c7550df 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -63,11 +63,6 @@ struct thread_struct { .pgdir = swapper_pg_dir, \ } -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *dead_task) -{ -} - unsigned long __get_wchan(struct task_struct *p); /* The size allocated for kernel stacks. This _must_ be a power of two! */ diff --git a/arch/microblaze/include/asm/xilinx_mb_manager.h b/arch/microblaze/include/asm/xilinx_mb_manager.h new file mode 100644 index 000000000000..7b6995722b0c --- /dev/null +++ b/arch/microblaze/include/asm/xilinx_mb_manager.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Xilinx, Inc. + */ +#ifndef _XILINX_MB_MANAGER_H +#define _XILINX_MB_MANAGER_H + +# ifndef __ASSEMBLY__ + +#include <linux/of_address.h> + +/* + * When the break vector gets asserted because of error injection, the break + * signal must be blocked before exiting from the break handler, Below api + * updates the manager address and control register and error counter callback + * arguments, which will be used by the break handler to block the break and + * call the callback function. + */ +void xmb_manager_register(uintptr_t phys_baseaddr, u32 cr_val, + void (*callback)(void *data), + void *priv, void (*reset_callback)(void *data)); +asmlinkage void xmb_inject_err(void); + +# endif /* __ASSEMBLY__ */ + +/* Error injection offset */ +#define XMB_INJECT_ERR_OFFSET 0x200 + +#endif /* _XILINX_MB_MANAGER_H */ diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile index 15a20eb814ce..4393bee64eaf 100644 --- a/arch/microblaze/kernel/Makefile +++ b/arch/microblaze/kernel/Makefile @@ -12,9 +12,9 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_process.o = -pg endif -extra-y := head.o vmlinux.lds +extra-y := vmlinux.lds -obj-y += dma.o exceptions.o \ +obj-y += head.o dma.o exceptions.o \ hw_exception_handler.o irq.o \ process.o prom.o ptrace.o \ reset.o setup.o signal.o sys_microblaze.o timer.o traps.o unwind.o diff --git a/arch/microblaze/kernel/asm-offsets.c b/arch/microblaze/kernel/asm-offsets.c index 47ee409508b1..104c3ac5f30c 100644 --- a/arch/microblaze/kernel/asm-offsets.c +++ b/arch/microblaze/kernel/asm-offsets.c @@ -120,5 +120,12 @@ int main(int argc, char *argv[]) DEFINE(CC_FSR, offsetof(struct cpu_context, fsr)); BLANK(); + /* struct cpuinfo */ + DEFINE(CI_DCS, offsetof(struct cpuinfo, dcache_size)); + DEFINE(CI_DCL, offsetof(struct cpuinfo, dcache_line_length)); + DEFINE(CI_ICS, offsetof(struct cpuinfo, icache_size)); + DEFINE(CI_ICL, offsetof(struct cpuinfo, icache_line_length)); + BLANK(); + return 0; } diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S index d875a0c01032..582d7256d815 100644 --- a/arch/microblaze/kernel/entry.S +++ b/arch/microblaze/kernel/entry.S @@ -27,9 +27,11 @@ #include <asm/page.h> #include <asm/unistd.h> +#include <asm/xilinx_mb_manager.h> #include <linux/errno.h> #include <asm/signal.h> +#include <asm/mmu.h> #undef DEBUG @@ -287,6 +289,44 @@ syscall_debug_table: .text +.extern cpuinfo + +C_ENTRY(mb_flush_dcache): + addik r1, r1, -PT_SIZE + SAVE_REGS + + addik r3, r0, cpuinfo + lwi r7, r3, CI_DCS + lwi r8, r3, CI_DCL + sub r9, r7, r8 +1: + wdc.flush r9, r0 + bgtid r9, 1b + addk r9, r9, r8 + + RESTORE_REGS + addik r1, r1, PT_SIZE + rtsd r15, 8 + nop + +C_ENTRY(mb_invalidate_icache): + addik r1, r1, -PT_SIZE + SAVE_REGS + + addik r3, r0, cpuinfo + lwi r7, r3, CI_ICS + lwi r8, r3, CI_ICL + sub r9, r7, r8 +1: + wic r9, r0 + bgtid r9, 1b + addk r9, r9, r8 + + RESTORE_REGS + addik r1, r1, PT_SIZE + rtsd r15, 8 + nop + /* * User trap. * @@ -753,6 +793,160 @@ IRQ_return: /* MS: Make global symbol for debugging */ rtid r14, 0 nop +#ifdef CONFIG_MB_MANAGER + +#define PT_PID PT_SIZE +#define PT_TLBI PT_SIZE + 4 +#define PT_ZPR PT_SIZE + 8 +#define PT_TLBL0 PT_SIZE + 12 +#define PT_TLBH0 PT_SIZE + 16 + +C_ENTRY(_xtmr_manager_reset): + lwi r1, r0, xmb_manager_stackpointer + + /* Restore MSR */ + lwi r2, r1, PT_MSR + mts rmsr, r2 + bri 4 + + /* restore Special purpose registers */ + lwi r2, r1, PT_PID + mts rpid, r2 + + lwi r2, r1, PT_TLBI + mts rtlbx, r2 + + lwi r2, r1, PT_ZPR + mts rzpr, r2 + +#if CONFIG_XILINX_MICROBLAZE0_USE_FPU + lwi r2, r1, PT_FSR + mts rfsr, r2 +#endif + + /* restore all the tlb's */ + addik r3, r0, TOPHYS(tlb_skip) + addik r6, r0, PT_TLBL0 + addik r7, r0, PT_TLBH0 +restore_tlb: + add r6, r6, r1 + add r7, r7, r1 + lwi r2, r6, 0 + mts rtlblo, r2 + lwi r2, r7, 0 + mts rtlbhi, r2 + addik r6, r6, 4 + addik r7, r7, 4 + bgtid r3, restore_tlb + addik r3, r3, -1 + + lwi r5, r0, TOPHYS(xmb_manager_dev) + lwi r8, r0, TOPHYS(xmb_manager_reset_callback) + set_vms + /* return from reset need -8 to adjust for rtsd r15, 8 */ + addik r15, r0, ret_from_reset - 8 + rtbd r8, 0 + nop + +ret_from_reset: + set_bip /* Ints masked for state restore */ + VM_OFF + /* MS: Restore all regs */ + RESTORE_REGS + lwi r14, r1, PT_R14 + lwi r16, r1, PT_PC + addik r1, r1, PT_SIZE + 36 + rtbd r16, 0 + nop + +/* + * Break handler for MB Manager. Enter to _xmb_manager_break by + * injecting fault in one of the TMR Microblaze core. + * FIXME: This break handler supports getting + * called from kernel space only. + */ +C_ENTRY(_xmb_manager_break): + /* + * Reserve memory in the stack for context store/restore + * (which includes memory for storing tlbs (max two tlbs)) + */ + addik r1, r1, -PT_SIZE - 36 + swi r1, r0, xmb_manager_stackpointer + SAVE_REGS + swi r14, r1, PT_R14 /* rewrite saved R14 value */ + swi r16, r1, PT_PC; /* PC and r16 are the same */ + + lwi r6, r0, TOPHYS(xmb_manager_baseaddr) + lwi r7, r0, TOPHYS(xmb_manager_crval) + /* + * When the break vector gets asserted because of error injection, + * the break signal must be blocked before exiting from the + * break handler, below code configures the tmr manager + * control register to block break signal. + */ + swi r7, r6, 0 + + /* Save the special purpose registers */ + mfs r2, rpid + swi r2, r1, PT_PID + + mfs r2, rtlbx + swi r2, r1, PT_TLBI + + mfs r2, rzpr + swi r2, r1, PT_ZPR + +#if CONFIG_XILINX_MICROBLAZE0_USE_FPU + mfs r2, rfsr + swi r2, r1, PT_FSR +#endif + mfs r2, rmsr + swi r2, r1, PT_MSR + + /* Save all the tlb's */ + addik r3, r0, TOPHYS(tlb_skip) + addik r6, r0, PT_TLBL0 + addik r7, r0, PT_TLBH0 +save_tlb: + add r6, r6, r1 + add r7, r7, r1 + mfs r2, rtlblo + swi r2, r6, 0 + mfs r2, rtlbhi + swi r2, r7, 0 + addik r6, r6, 4 + addik r7, r7, 4 + bgtid r3, save_tlb + addik r3, r3, -1 + + lwi r5, r0, TOPHYS(xmb_manager_dev) + lwi r8, r0, TOPHYS(xmb_manager_callback) + /* return from break need -8 to adjust for rtsd r15, 8 */ + addik r15, r0, ret_from_break - 8 + rtbd r8, 0 + nop + +ret_from_break: + /* flush the d-cache */ + bralid r15, mb_flush_dcache + nop + + /* + * To make sure microblaze i-cache is in a proper state + * invalidate the i-cache. + */ + bralid r15, mb_invalidate_icache + nop + + set_bip; /* Ints masked for state restore */ + VM_OFF; + mbar 1 + mbar 2 + bri 4 + suspend + nop +#endif + /* * Debug trap for KGDB. Enter to _debug_exception by brki r16, 0x18 * and call handling function with saved pt_regs @@ -957,6 +1151,88 @@ ENTRY(_switch_to) rtsd r15, 8 nop +#ifdef CONFIG_MB_MANAGER +.global xmb_inject_err +.section .text +.align 2 +.ent xmb_inject_err +.type xmb_inject_err, @function +xmb_inject_err: + addik r1, r1, -PT_SIZE + SAVE_REGS + + /* Switch to real mode */ + VM_OFF; + set_bip; + mbar 1 + mbar 2 + bralid r15, XMB_INJECT_ERR_OFFSET + nop; + + /* enable virtual mode */ + set_vms; + /* barrier for instructions and data accesses */ + mbar 1 + mbar 2 + /* + * Enable Interrupts, Virtual Protected Mode, equalize + * initial state for all possible entries. + */ + rtbd r0, 1f + nop; +1: + RESTORE_REGS + addik r1, r1, PT_SIZE + rtsd r15, 8; + nop; +.end xmb_inject_err + +.section .data +.global xmb_manager_dev +.global xmb_manager_baseaddr +.global xmb_manager_crval +.global xmb_manager_callback +.global xmb_manager_reset_callback +.global xmb_manager_stackpointer +.align 4 +xmb_manager_dev: + .long 0 +xmb_manager_baseaddr: + .long 0 +xmb_manager_crval: + .long 0 +xmb_manager_callback: + .long 0 +xmb_manager_reset_callback: + .long 0 +xmb_manager_stackpointer: + .long 0 + +/* + * When the break vector gets asserted because of error injection, + * the break signal must be blocked before exiting from the + * break handler, Below api updates the manager address and + * control register and error count callback arguments, + * which will be used by the break handler to block the + * break and call the callback function. + */ +.global xmb_manager_register +.section .text +.align 2 +.ent xmb_manager_register +.type xmb_manager_register, @function +xmb_manager_register: + swi r5, r0, xmb_manager_baseaddr + swi r6, r0, xmb_manager_crval + swi r7, r0, xmb_manager_callback + swi r8, r0, xmb_manager_dev + swi r9, r0, xmb_manager_reset_callback + + rtsd r15, 8; + nop; +.end xmb_manager_register +#endif + ENTRY(_reset) VM_OFF brai 0; /* Jump to reset vector */ @@ -964,19 +1240,43 @@ ENTRY(_reset) /* These are compiled and loaded into high memory, then * copied into place in mach_early_setup */ .section .init.ivt, "ax" -#if CONFIG_MANUAL_RESET_VECTOR +#if CONFIG_MANUAL_RESET_VECTOR && !defined(CONFIG_MB_MANAGER) .org 0x0 brai CONFIG_MANUAL_RESET_VECTOR +#elif defined(CONFIG_MB_MANAGER) + .org 0x0 + brai TOPHYS(_xtmr_manager_reset); #endif .org 0x8 brai TOPHYS(_user_exception); /* syscall handler */ .org 0x10 brai TOPHYS(_interrupt); /* Interrupt handler */ +#ifdef CONFIG_MB_MANAGER + .org 0x18 + brai TOPHYS(_xmb_manager_break); /* microblaze manager break handler */ +#else .org 0x18 brai TOPHYS(_debug_exception); /* debug trap handler */ +#endif .org 0x20 brai TOPHYS(_hw_exception_handler); /* HW exception handler */ +#ifdef CONFIG_MB_MANAGER + /* + * For TMR Inject API which injects the error should + * be executed from LMB. + * TMR Inject is programmed with address of 0x200 so that + * when program counter matches with this address error will + * be injected. 0x200 is expected to be next available bram + * offset, hence used for this api. + */ + .org XMB_INJECT_ERR_OFFSET +xmb_inject_error: + nop + rtsd r15, 8 + nop +#endif + .section .rodata,"a" #include "syscall_table.S" diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 25dd4c5a8ef5..b26b77673c2c 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2140,7 +2140,7 @@ config PAGE_SIZE_64KB endchoice -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int "Maximum zone order" range 14 64 if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_64KB default "14" if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_64KB diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 4d2a3e73fc45..b296e33f8e33 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -324,8 +324,6 @@ endif OBJCOPYFLAGS += --remove-section=.reginfo -head-y := arch/mips/kernel/head.o - libs-y += arch/mips/lib/ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/mips/math-emu/ diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig index 91ce75edbfb4..22ffde722bb9 100644 --- a/arch/mips/configs/bcm47xx_defconfig +++ b/arch/mips/configs/bcm47xx_defconfig @@ -72,7 +72,7 @@ CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_CRC32_SARWATE=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_INFO_REDUCED=y CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig index a2311495af79..0bc2e3cc573b 100644 --- a/arch/mips/configs/cavium_octeon_defconfig +++ b/arch/mips/configs/cavium_octeon_defconfig @@ -161,7 +161,7 @@ CONFIG_CRYPTO_SHA1_OCTEON=m CONFIG_CRYPTO_SHA256_OCTEON=m CONFIG_CRYPTO_SHA512_OCTEON=m CONFIG_CRYPTO_DES=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig index cc69b215854e..11f08b6a3013 100644 --- a/arch/mips/configs/ci20_defconfig +++ b/arch/mips/configs/ci20_defconfig @@ -143,7 +143,7 @@ CONFIG_MEMORY=y CONFIG_JZ4780_NEMC=y CONFIG_PWM=y CONFIG_PWM_JZ4740=m -CONFIG_JZ4780_EFUSE=y +CONFIG_NVMEM_JZ4780_EFUSE=y CONFIG_JZ4770_PHY=y CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set @@ -199,7 +199,7 @@ CONFIG_NLS_UTF8=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=32 CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_STRIP_ASM_SYMS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig index 5bd55eb32fe5..1cbc9302e1d1 100644 --- a/arch/mips/configs/cu1000-neo_defconfig +++ b/arch/mips/configs/cu1000-neo_defconfig @@ -113,7 +113,7 @@ CONFIG_PRINTK_TIME=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 CONFIG_CONSOLE_LOGLEVEL_QUIET=15 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=7 -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_STRIP_ASM_SYMS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y diff --git a/arch/mips/configs/cu1830-neo_defconfig b/arch/mips/configs/cu1830-neo_defconfig index cc69688962e8..a0f73f3cd6ce 100644 --- a/arch/mips/configs/cu1830-neo_defconfig +++ b/arch/mips/configs/cu1830-neo_defconfig @@ -116,7 +116,7 @@ CONFIG_PRINTK_TIME=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 CONFIG_CONSOLE_LOGLEVEL_QUIET=15 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=7 -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_STRIP_ASM_SYMS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig index a8b62df3c021..af070be1b583 100644 --- a/arch/mips/configs/db1xxx_defconfig +++ b/arch/mips/configs/db1xxx_defconfig @@ -9,7 +9,6 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_CGROUPS=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y diff --git a/arch/mips/configs/generic_defconfig b/arch/mips/configs/generic_defconfig index 714169e411cf..c2cd2b181ef3 100644 --- a/arch/mips/configs/generic_defconfig +++ b/arch/mips/configs/generic_defconfig @@ -3,7 +3,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y @@ -83,7 +82,7 @@ CONFIG_ROOT_NFS=y # CONFIG_XZ_DEC_ARMTHUMB is not set # CONFIG_XZ_DEC_SPARC is not set CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_INFO_REDUCED=y CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/mips/configs/omega2p_defconfig b/arch/mips/configs/omega2p_defconfig index 9c34daf83563..91fe2822f897 100644 --- a/arch/mips/configs/omega2p_defconfig +++ b/arch/mips/configs/omega2p_defconfig @@ -113,7 +113,7 @@ CONFIG_CRYPTO_LZO=y CONFIG_CRC16=y CONFIG_XZ_DEC=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/mips/configs/qi_lb60_defconfig b/arch/mips/configs/qi_lb60_defconfig index b4448d0876d5..7e5d9741bd5d 100644 --- a/arch/mips/configs/qi_lb60_defconfig +++ b/arch/mips/configs/qi_lb60_defconfig @@ -166,7 +166,7 @@ CONFIG_NLS_UTF8=y CONFIG_FONTS=y CONFIG_FONT_SUN8x16=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_STRIP_ASM_SYMS=y CONFIG_READABLE_ASM=y CONFIG_KGDB=y diff --git a/arch/mips/configs/vocore2_defconfig b/arch/mips/configs/vocore2_defconfig index 0722a3bf03c0..e47d4cc3353b 100644 --- a/arch/mips/configs/vocore2_defconfig +++ b/arch/mips/configs/vocore2_defconfig @@ -113,7 +113,7 @@ CONFIG_CRYPTO_LZO=y CONFIG_CRC16=y CONFIG_XZ_DEC=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/mips/crypto/Kconfig b/arch/mips/crypto/Kconfig new file mode 100644 index 000000000000..9003a5c1e879 --- /dev/null +++ b/arch/mips/crypto/Kconfig @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: GPL-2.0 + +menu "Accelerated Cryptographic Algorithms for CPU (mips)" + +config CRYPTO_CRC32_MIPS + tristate "CRC32c and CRC32" + depends on MIPS_CRC_SUPPORT + select CRYPTO_HASH + help + CRC32c and CRC32 CRC algorithms + + Architecture: mips + +config CRYPTO_POLY1305_MIPS + tristate "Hash functions: Poly1305" + depends on MIPS + select CRYPTO_ARCH_HAVE_LIB_POLY1305 + help + Poly1305 authenticator algorithm (RFC7539) + + Architecture: mips + +config CRYPTO_MD5_OCTEON + tristate "Digests: MD5 (OCTEON)" + depends on CPU_CAVIUM_OCTEON + select CRYPTO_MD5 + select CRYPTO_HASH + help + MD5 message digest algorithm (RFC1321) + + Architecture: mips OCTEON using crypto instructions, when available + +config CRYPTO_SHA1_OCTEON + tristate "Hash functions: SHA-1 (OCTEON)" + depends on CPU_CAVIUM_OCTEON + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash algorithm (FIPS 180) + + Architecture: mips OCTEON + +config CRYPTO_SHA256_OCTEON + tristate "Hash functions: SHA-224 and SHA-256 (OCTEON)" + depends on CPU_CAVIUM_OCTEON + select CRYPTO_SHA256 + select CRYPTO_HASH + help + SHA-224 and SHA-256 secure hash algorithms (FIPS 180) + + Architecture: mips OCTEON using crypto instructions, when available + +config CRYPTO_SHA512_OCTEON + tristate "Hash functions: SHA-384 and SHA-512 (OCTEON)" + depends on CPU_CAVIUM_OCTEON + select CRYPTO_SHA512 + select CRYPTO_HASH + help + SHA-384 and SHA-512 secure hash algorithms (FIPS 180) + + Architecture: mips OCTEON using crypto instructions, when available + +config CRYPTO_CHACHA_MIPS + tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (MIPS32r2)" + depends on CPU_MIPS32_R2 + select CRYPTO_SKCIPHER + select CRYPTO_ARCH_HAVE_LIB_CHACHA + help + Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12 + stream cipher algorithms + + Architecture: MIPS32r2 + +endmenu diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 4bb24579d12e..3fde1ff72bd1 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -344,9 +344,6 @@ struct thread_struct { struct task_struct; -/* Free all resources held by a thread. */ -#define release_thread(thread) do { } while(0) - /* * Do necessary setup to start up a newly executed thread. */ diff --git a/arch/mips/include/asm/sn/gda.h b/arch/mips/include/asm/sn/gda.h index d52f81620661..5b8c96d5b587 100644 --- a/arch/mips/include/asm/sn/gda.h +++ b/arch/mips/include/asm/sn/gda.h @@ -16,8 +16,6 @@ #include <asm/sn/addrs.h> -#define GDA_MAGIC 0x58464552 - /* * GDA Version History * diff --git a/arch/mips/include/asm/termios.h b/arch/mips/include/asm/termios.h deleted file mode 100644 index bc29eeacc55a..000000000000 --- a/arch/mips/include/asm/termios.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1995, 1996, 2000, 2001 by Ralf Baechle - * Copyright (C) 2000, 2001 Silicon Graphics, Inc. - */ -#ifndef _ASM_TERMIOS_H -#define _ASM_TERMIOS_H - -#include <linux/uaccess.h> -#include <uapi/asm/termios.h> - -/* - * intr=^C quit=^\ erase=del kill=^U - * vmin=\1 vtime=\0 eol2=\0 swtc=\0 - * start=^Q stop=^S susp=^Z vdsusp= - * reprint=^R discard=^U werase=^W lnext=^V - * eof=^D eol=\0 - */ -#define INIT_C_CC "\003\034\177\025\1\0\0\0\021\023\032\0\022\017\027\026\004\0" - -#include <linux/string.h> - -/* - * Translate a "termio" structure into a "termios". Ugh. - */ -static inline int user_termio_to_kernel_termios(struct ktermios *termios, - struct termio __user *termio) -{ - unsigned short iflag, oflag, cflag, lflag; - unsigned int err; - - if (!access_ok(termio, sizeof(struct termio))) - return -EFAULT; - - err = __get_user(iflag, &termio->c_iflag); - termios->c_iflag = (termios->c_iflag & 0xffff0000) | iflag; - err |=__get_user(oflag, &termio->c_oflag); - termios->c_oflag = (termios->c_oflag & 0xffff0000) | oflag; - err |=__get_user(cflag, &termio->c_cflag); - termios->c_cflag = (termios->c_cflag & 0xffff0000) | cflag; - err |=__get_user(lflag, &termio->c_lflag); - termios->c_lflag = (termios->c_lflag & 0xffff0000) | lflag; - err |=__get_user(termios->c_line, &termio->c_line); - if (err) - return -EFAULT; - - if (__copy_from_user(termios->c_cc, termio->c_cc, NCC)) - return -EFAULT; - - return 0; -} - -/* - * Translate a "termios" structure into a "termio". Ugh. - */ -static inline int kernel_termios_to_user_termio(struct termio __user *termio, - struct ktermios *termios) -{ - int err; - - if (!access_ok(termio, sizeof(struct termio))) - return -EFAULT; - - err = __put_user(termios->c_iflag, &termio->c_iflag); - err |= __put_user(termios->c_oflag, &termio->c_oflag); - err |= __put_user(termios->c_cflag, &termio->c_cflag); - err |= __put_user(termios->c_lflag, &termio->c_lflag); - err |= __put_user(termios->c_line, &termio->c_line); - if (err) - return -EFAULT; - - if (__copy_to_user(termio->c_cc, termios->c_cc, NCC)) - return -EFAULT; - - return 0; -} - -static inline int user_termios_to_kernel_termios(struct ktermios __user *k, - struct termios2 *u) -{ - return copy_from_user(k, u, sizeof(struct termios2)) ? -EFAULT : 0; -} - -static inline int kernel_termios_to_user_termios(struct termios2 __user *u, - struct ktermios *k) -{ - return copy_to_user(u, k, sizeof(struct termios2)) ? -EFAULT : 0; -} - -static inline int user_termios_to_kernel_termios_1(struct ktermios *k, - struct termios __user *u) -{ - return copy_from_user(k, u, sizeof(struct termios)) ? -EFAULT : 0; -} - -static inline int kernel_termios_to_user_termios_1(struct termios __user *u, - struct ktermios *k) -{ - return copy_to_user(u, k, sizeof(struct termios)) ? -EFAULT : 0; -} - -#endif /* _ASM_TERMIOS_H */ diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 1be428663c10..c6e1fc77c996 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -103,6 +103,8 @@ #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ +#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 7c96282bff2e..5d1addac5e28 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -3,9 +3,9 @@ # Makefile for the Linux/MIPS kernel. # -extra-y := head.o vmlinux.lds +extra-y := vmlinux.lds -obj-y += branch.o cmpxchg.o elf.o entry.o genex.o idle.o irq.o \ +obj-y += head.o branch.o cmpxchg.o elf.o entry.o genex.o idle.o irq.o \ process.o prom.o ptrace.o reset.o setup.o signal.o \ syscall.o time.o topology.o traps.o unaligned.o watch.o \ vdso.o cacheinfo.o diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 35b912bce429..bbe9ce471791 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -711,7 +711,7 @@ unsigned long mips_stack_top(void) unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() & ~PAGE_MASK; + sp -= prandom_u32_max(PAGE_SIZE); return sp & ALMASK; } diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 39c79f67c7a3..f1c88f8a1dc5 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -751,7 +751,7 @@ static void __init prefill_possible_map(void) for (; i < NR_CPUS; i++) set_cpu_possible(i, false); - nr_cpu_ids = possible; + set_nr_cpu_ids(possible); } #else static inline void prefill_possible_map(void) {} diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index b2cc2c2dd4bf..5fd9bf1d596c 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -79,7 +79,7 @@ static unsigned long vdso_base(void) } if (current->flags & PF_RANDOMIZE) { - base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1); + base += prandom_u32_max(VDSO_RANDOMIZE_SIZE); base = PAGE_ALIGN(base); } diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index b494d8d39290..edaec93a1a1f 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -955,13 +955,11 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) kvm_vcpu_halt(vcpu); /* - * We we are runnable, then definitely go off to user space to + * We are runnable, then definitely go off to user space to * check if any I/O interrupts are pending. */ - if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { - kvm_clear_request(KVM_REQ_UNHALT, vcpu); + if (kvm_arch_vcpu_runnable(vcpu)) vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; - } } return EMULATE_DONE; diff --git a/arch/nios2/Kbuild b/arch/nios2/Kbuild index 4e39f7abdeb6..fc2952edd2de 100644 --- a/arch/nios2/Kbuild +++ b/arch/nios2/Kbuild @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only +obj-y += kernel/ mm/ platform/ boot/dts/ + # for cleaning subdir- += boot diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index 4167f1eb4cd8..a582f72104f3 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -44,7 +44,7 @@ menu "Kernel features" source "kernel/Kconfig.hz" -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int "Maximum zone order" range 9 20 default "11" diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile index d6a7499b814c..f1ff4ce0f1a2 100644 --- a/arch/nios2/Makefile +++ b/arch/nios2/Makefile @@ -37,10 +37,7 @@ KBUILD_CFLAGS += -DUTS_SYSNAME=\"$(UTS_SYSNAME)\" KBUILD_CFLAGS += -fno-builtin KBUILD_CFLAGS += -G 0 -head-y := arch/nios2/kernel/head.o libs-y += arch/nios2/lib/ $(LIBGCC) -core-y += arch/nios2/kernel/ arch/nios2/mm/ -core-y += arch/nios2/platform/ INSTALL_PATH ?= /tftpboot nios2-boot := arch/$(ARCH)/boot @@ -48,8 +45,6 @@ BOOT_TARGETS = vmImage zImage PHONY += $(BOOT_TARGETS) install KBUILD_IMAGE := $(nios2-boot)/vmImage -core-y += $(nios2-boot)/dts/ - all: vmImage $(BOOT_TARGETS): vmlinux diff --git a/arch/nios2/configs/10m50_defconfig b/arch/nios2/configs/10m50_defconfig index a7967b4cfb6e..91c3fce4dc7f 100644 --- a/arch/nios2/configs/10m50_defconfig +++ b/arch/nios2/configs/10m50_defconfig @@ -74,4 +74,4 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y CONFIG_SUNRPC_DEBUG=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/nios2/configs/3c120_defconfig b/arch/nios2/configs/3c120_defconfig index 423a0c40a162..c42ad7e162a3 100644 --- a/arch/nios2/configs/3c120_defconfig +++ b/arch/nios2/configs/3c120_defconfig @@ -71,4 +71,4 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y CONFIG_SUNRPC_DEBUG=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h index b8125dfbcad2..8916d93d5c2d 100644 --- a/arch/nios2/include/asm/processor.h +++ b/arch/nios2/include/asm/processor.h @@ -64,11 +64,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long pc, struct task_struct; -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *dead_task) -{ -} - extern unsigned long __get_wchan(struct task_struct *p); #define task_pt_regs(p) \ diff --git a/arch/nios2/kernel/Makefile b/arch/nios2/kernel/Makefile index 0b645e1e3158..78a913181fa1 100644 --- a/arch/nios2/kernel/Makefile +++ b/arch/nios2/kernel/Makefile @@ -3,9 +3,9 @@ # Makefile for the nios2 linux kernel. # -extra-y += head.o extra-y += vmlinux.lds +obj-y += head.o obj-y += cpuinfo.o obj-y += entry.o obj-y += insnemu.o diff --git a/arch/openrisc/Makefile b/arch/openrisc/Makefile index b446510173cd..68249521db5a 100644 --- a/arch/openrisc/Makefile +++ b/arch/openrisc/Makefile @@ -55,8 +55,6 @@ ifeq ($(CONFIG_OPENRISC_HAVE_INST_SEXT),y) KBUILD_CFLAGS += $(call cc-option,-msext) endif -head-y := arch/openrisc/kernel/head.o - libs-y += $(LIBGCC) PHONY += vmlinux.bin diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index aa1699c18add..ed9efb430afa 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -72,7 +72,6 @@ struct thread_struct { void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp); -void release_thread(struct task_struct *); unsigned long __get_wchan(struct task_struct *p); #define cpu_relax() barrier() diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile index 2d172e79f58d..79129161f3e0 100644 --- a/arch/openrisc/kernel/Makefile +++ b/arch/openrisc/kernel/Makefile @@ -3,9 +3,9 @@ # Makefile for the linux kernel. # -extra-y := head.o vmlinux.lds +extra-y := vmlinux.lds -obj-y := setup.o or32_ksyms.o process.o dma.o \ +obj-y := head.o setup.o or32_ksyms.o process.o dma.o \ traps.o time.o irq.o entry.o ptrace.o signal.o \ sys_call_table.o unwinder.o diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index a82b2caaa560..b3edbb33b621 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -74,10 +74,10 @@ void *arch_dma_set_uncached(void *cpu_addr, size_t size) * We need to iterate through the pages, clearing the dcache for * them and setting the cache-inhibit bit. */ - mmap_read_lock(&init_mm); - error = walk_page_range(&init_mm, va, va + size, &set_nocache_walk_ops, - NULL); - mmap_read_unlock(&init_mm); + mmap_write_lock(&init_mm); + error = walk_page_range_novma(&init_mm, va, va + size, + &set_nocache_walk_ops, NULL, NULL); + mmap_write_unlock(&init_mm); if (error) return ERR_PTR(error); @@ -88,11 +88,11 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size) { unsigned long va = (unsigned long)cpu_addr; - mmap_read_lock(&init_mm); + mmap_write_lock(&init_mm); /* walk_page_range shouldn't be able to fail here */ - WARN_ON(walk_page_range(&init_mm, va, va + size, - &clear_nocache_walk_ops, NULL)); - mmap_read_unlock(&init_mm); + WARN_ON(walk_page_range_novma(&init_mm, va, va + size, + &clear_nocache_walk_ops, NULL, NULL)); + mmap_write_unlock(&init_mm); } void arch_sync_dma_for_device(phys_addr_t addr, size_t size, diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 52dc983ddeba..f94b5ec06786 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -125,10 +125,6 @@ void show_regs(struct pt_regs *regs) show_registers(regs); } -void release_thread(struct task_struct *dead_task) -{ -} - /* * Copy the thread-specific (arch specific) info from the current * process to the new one p diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index e38d993d87f2..a2d8600521f9 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -113,8 +113,6 @@ cflags-$(CONFIG_PA7100LC) += -march=1.1 -mschedule=7100LC cflags-$(CONFIG_PA7300LC) += -march=1.1 -mschedule=7300 cflags-$(CONFIG_PA8X00) += -march=2.0 -mschedule=8000 -head-y := arch/parisc/kernel/head.o - KBUILD_CFLAGS += $(cflags-y) LIBGCC := $(shell $(CC) -print-libgcc-file-name) export LIBGCC diff --git a/arch/parisc/include/asm/alternative.h b/arch/parisc/include/asm/alternative.h index 0ec54f43d6d2..1ed45fd085d3 100644 --- a/arch/parisc/include/asm/alternative.h +++ b/arch/parisc/include/asm/alternative.h @@ -22,10 +22,10 @@ struct alt_instr { s32 orig_offset; /* offset to original instructions */ - s32 len; /* end of original instructions */ - u32 cond; /* see ALT_COND_XXX */ + s16 len; /* end of original instructions */ + u16 cond; /* see ALT_COND_XXX */ u32 replacement; /* replacement instruction or code */ -}; +} __packed; void set_kernel_text_rw(int enable_read_write); void apply_alternatives_all(void); @@ -35,8 +35,9 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* Alternative SMP implementation. */ #define ALTERNATIVE(cond, replacement) "!0:" \ ".section .altinstructions, \"aw\" !" \ - ".word (0b-4-.), 1, " __stringify(cond) "," \ - __stringify(replacement) " !" \ + ".word (0b-4-.) !" \ + ".hword 1, " __stringify(cond) " !" \ + ".word " __stringify(replacement) " !" \ ".previous" #else @@ -44,15 +45,17 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* to replace one single instructions by a new instruction */ #define ALTERNATIVE(from, to, cond, replacement)\ .section .altinstructions, "aw" ! \ - .word (from - .), (to - from)/4 ! \ - .word cond, replacement ! \ + .word (from - .) ! \ + .hword (to - from)/4, cond ! \ + .word replacement ! \ .previous /* to replace multiple instructions by new code */ #define ALTERNATIVE_CODE(from, num_instructions, cond, new_instr_ptr)\ .section .altinstructions, "aw" ! \ - .word (from - .), -num_instructions ! \ - .word cond, (new_instr_ptr - .) ! \ + .word (from - .) ! \ + .hword -num_instructions, cond ! \ + .word (new_instr_ptr - .) ! \ .previous #endif /* __ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h index b643092d4b98..fcbcf9a96c11 100644 --- a/arch/parisc/include/asm/pdc.h +++ b/arch/parisc/include/asm/pdc.h @@ -19,9 +19,6 @@ extern unsigned long parisc_pat_pdc_cap; /* PDC capabilities (PAT) */ #define PDC_TYPE_SYSTEM_MAP 1 /* 32-bit, but supports PDC_SYSTEM_MAP */ #define PDC_TYPE_SNAKE 2 /* Doesn't support SYSTEM_MAP */ -void pdc_console_init(void); /* in pdc_console.c */ -void pdc_console_restart(void); - void setup_pdc(void); /* in inventory.c */ /* wrapper-functions from pdc.c */ diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index df7b931865d2..ecd028854469 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -192,6 +192,11 @@ extern void __update_cache(pte_t pte); #define _PAGE_PRESENT_BIT 22 /* (0x200) Software: translation valid */ #define _PAGE_HPAGE_BIT 21 /* (0x400) Software: Huge Page */ #define _PAGE_USER_BIT 20 /* (0x800) Software: User accessible page */ +#ifdef CONFIG_HUGETLB_PAGE +#define _PAGE_SPECIAL_BIT _PAGE_DMB_BIT /* DMB feature is currently unused */ +#else +#define _PAGE_SPECIAL_BIT _PAGE_HPAGE_BIT /* use unused HUGE PAGE bit */ +#endif /* N.B. The bits are defined in terms of a 32 bit word above, so the */ /* following macro is ok for both 32 and 64 bit. */ @@ -219,7 +224,7 @@ extern void __update_cache(pte_t pte); #define _PAGE_PRESENT (1 << xlate_pabit(_PAGE_PRESENT_BIT)) #define _PAGE_HUGE (1 << xlate_pabit(_PAGE_HPAGE_BIT)) #define _PAGE_USER (1 << xlate_pabit(_PAGE_USER_BIT)) -#define _PAGE_SPECIAL (_PAGE_DMB) +#define _PAGE_SPECIAL (1 << xlate_pabit(_PAGE_SPECIAL_BIT)) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL) diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 4621ceb51314..a608970b249a 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -266,9 +266,6 @@ on downward growing arches, it looks like this: struct mm_struct; -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - extern unsigned long __get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) ((tsk)->thread.regs.iaoq[0]) diff --git a/arch/parisc/include/asm/termios.h b/arch/parisc/include/asm/termios.h deleted file mode 100644 index cded9dc90c1b..000000000000 --- a/arch/parisc/include/asm/termios.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PARISC_TERMIOS_H -#define _PARISC_TERMIOS_H - -#include <uapi/asm/termios.h> - - -/* intr=^C quit=^\ erase=del kill=^U - eof=^D vtime=\0 vmin=\1 sxtc=\0 - start=^Q stop=^S susp=^Z eol=\0 - reprint=^R discard=^U werase=^W lnext=^V - eol2=\0 -*/ -#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" - -/* - * Translate a "termio" structure into a "termios". Ugh. - */ -#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \ - unsigned short __tmp; \ - get_user(__tmp,&(termio)->x); \ - *(unsigned short *) &(termios)->x = __tmp; \ -} - -#define user_termio_to_kernel_termios(termios, termio) \ -({ \ - SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \ - SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \ - SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \ - SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \ - copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \ -}) - -/* - * Translate a "termios" structure into a "termio". Ugh. - */ -#define kernel_termios_to_user_termio(termio, termios) \ -({ \ - put_user((termios)->c_iflag, &(termio)->c_iflag); \ - put_user((termios)->c_oflag, &(termio)->c_oflag); \ - put_user((termios)->c_cflag, &(termio)->c_cflag); \ - put_user((termios)->c_lflag, &(termio)->c_lflag); \ - put_user((termios)->c_line, &(termio)->c_line); \ - copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \ -}) - -#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2)) -#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2)) -#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) -#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) - -#endif /* _PARISC_TERMIOS_H */ diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index a7ea3204a5fa..22133a6a506e 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -70,6 +70,8 @@ #define MADV_WIPEONFORK 71 /* Zero memory on fork, child only */ #define MADV_KEEPONFORK 72 /* Undo MADV_WIPEONFORK */ +#define MADV_COLLAPSE 73 /* Synchronous hugepage collapse */ + #define MADV_HWPOISON 100 /* poison a page for testing */ #define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */ diff --git a/arch/parisc/include/uapi/asm/termios.h b/arch/parisc/include/uapi/asm/termios.h deleted file mode 100644 index aba174f23ef0..000000000000 --- a/arch/parisc/include/uapi/asm/termios.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_PARISC_TERMIOS_H -#define _UAPI_PARISC_TERMIOS_H - -#include <asm/termbits.h> -#include <asm/ioctls.h> - -struct winsize { - unsigned short ws_row; - unsigned short ws_col; - unsigned short ws_xpixel; - unsigned short ws_ypixel; -}; - -#define NCC 8 -struct termio { - unsigned short c_iflag; /* input mode flags */ - unsigned short c_oflag; /* output mode flags */ - unsigned short c_cflag; /* control mode flags */ - unsigned short c_lflag; /* local mode flags */ - unsigned char c_line; /* line discipline */ - unsigned char c_cc[NCC]; /* control characters */ -}; - -/* modem lines */ -#define TIOCM_LE 0x001 -#define TIOCM_DTR 0x002 -#define TIOCM_RTS 0x004 -#define TIOCM_ST 0x008 -#define TIOCM_SR 0x010 -#define TIOCM_CTS 0x020 -#define TIOCM_CAR 0x040 -#define TIOCM_RNG 0x080 -#define TIOCM_DSR 0x100 -#define TIOCM_CD TIOCM_CAR -#define TIOCM_RI TIOCM_RNG -#define TIOCM_OUT1 0x2000 -#define TIOCM_OUT2 0x4000 -#define TIOCM_LOOP 0x8000 - -/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ - - -#endif /* _UAPI_PARISC_TERMIOS_H */ diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index d0bfac89a842..3d138c9cf9ce 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -3,9 +3,9 @@ # Makefile for arch/parisc/kernel # -extra-y := head.o vmlinux.lds +extra-y := vmlinux.lds -obj-y := cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \ +obj-y := head.o cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \ pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \ ptrace.o hardware.o inventory.o drivers.o alternative.o \ signal.o hpmc.o real2.o parisc_ksyms.o unaligned.o \ diff --git a/arch/parisc/kernel/alternative.c b/arch/parisc/kernel/alternative.c index daa1e9047275..66f5672c70bd 100644 --- a/arch/parisc/kernel/alternative.c +++ b/arch/parisc/kernel/alternative.c @@ -26,7 +26,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start, struct alt_instr *entry; int index = 0, applied = 0; int num_cpus = num_online_cpus(); - u32 cond_check; + u16 cond_check; cond_check = ALT_COND_ALWAYS | ((num_cpus == 1) ? ALT_COND_NO_SMP : 0) | @@ -45,8 +45,9 @@ void __init_or_module apply_alternatives(struct alt_instr *start, for (entry = start; entry < end; entry++, index++) { - u32 *from, cond, replacement; - s32 len; + u32 *from, replacement; + u16 cond; + s16 len; from = (u32 *)((ulong)&entry->orig_offset + entry->orig_offset); len = entry->len; diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 3feb7694e0ca..1d3b8bc8a623 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -657,15 +657,20 @@ static inline unsigned long mm_total_size(struct mm_struct *mm) { struct vm_area_struct *vma; unsigned long usize = 0; + VMA_ITERATOR(vmi, mm, 0); - for (vma = mm->mmap; vma && usize < parisc_cache_flush_threshold; vma = vma->vm_next) + for_each_vma(vmi, vma) { + if (usize >= parisc_cache_flush_threshold) + break; usize += vma->vm_end - vma->vm_start; + } return usize; } void flush_cache_mm(struct mm_struct *mm) { struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); /* * Flushing the whole cache on each cpu takes forever on @@ -685,7 +690,7 @@ void flush_cache_mm(struct mm_struct *mm) } /* Flush mm */ - for (vma = mm->mmap; vma; vma = vma->vm_next) + for_each_vma(vmi, vma) flush_cache_pages(vma, vma->vm_start, vma->vm_end); } diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index df8102fb435f..0e5ebfe8d9d2 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -499,6 +499,10 @@ * Finally, _PAGE_READ goes in the top bit of PL1 (so we * trigger an access rights trap in user space if the user * tries to read an unreadable page */ +#if _PAGE_SPECIAL_BIT == _PAGE_DMB_BIT + /* need to drop DMB bit, as it's used as SPECIAL flag */ + depi 0,_PAGE_SPECIAL_BIT,1,\pte +#endif depd \pte,8,7,\prot /* PAGE_USER indicates the page can be read with user privileges, @@ -529,6 +533,10 @@ * makes the tlb entry for the differently formatted pa11 * insertion instructions */ .macro make_insert_tlb_11 spc,pte,prot +#if _PAGE_SPECIAL_BIT == _PAGE_DMB_BIT + /* need to drop DMB bit, as it's used as SPECIAL flag */ + depi 0,_PAGE_SPECIAL_BIT,1,\pte +#endif zdep \spc,30,15,\prot dep \pte,8,7,\prot extru,= \pte,_PAGE_NO_CACHE_BIT,1,%r0 diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c index 2661cdd256ae..7d0989f523d0 100644 --- a/arch/parisc/kernel/pdc_cons.c +++ b/arch/parisc/kernel/pdc_cons.c @@ -1,46 +1,18 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * PDC Console support - ie use firmware to dump text via boot console + * PDC early console support - use PDC firmware to dump text via boot console * - * Copyright (C) 1999-2003 Matthew Wilcox <willy at parisc-linux.org> - * Copyright (C) 2000 Martin K Petersen <mkp at mkp.net> - * Copyright (C) 2000 John Marvin <jsm at parisc-linux.org> - * Copyright (C) 2000-2003 Paul Bame <bame at parisc-linux.org> - * Copyright (C) 2000 Philipp Rumpf <prumpf with tux.org> - * Copyright (C) 2000 Michael Ang <mang with subcarrier.org> - * Copyright (C) 2000 Grant Grundler <grundler with parisc-linux.org> - * Copyright (C) 2001-2002 Ryan Bradetich <rbrad at parisc-linux.org> - * Copyright (C) 2001 Helge Deller <deller at parisc-linux.org> - * Copyright (C) 2001 Thomas Bogendoerfer <tsbogend at parisc-linux.org> - * Copyright (C) 2002 Randolph Chung <tausq with parisc-linux.org> - * Copyright (C) 2010 Guy Martin <gmsoft at tuxicoman.be> + * Copyright (C) 2001-2022 Helge Deller <deller@gmx.de> */ -/* - * The PDC console is a simple console, which can be used for debugging - * boot related problems on HP PA-RISC machines. It is also useful when no - * other console works. - * - * This code uses the ROM (=PDC) based functions to read and write characters - * from and to PDC's boot path. - */ - -/* Define EARLY_BOOTUP_DEBUG to debug kernel related boot problems. - * On production kernels EARLY_BOOTUP_DEBUG should be undefined. */ -#define EARLY_BOOTUP_DEBUG - - -#include <linux/kernel.h> #include <linux/console.h> -#include <linux/string.h> #include <linux/init.h> -#include <linux/major.h> -#include <linux/tty.h> +#include <linux/serial_core.h> +#include <linux/kgdb.h> #include <asm/page.h> /* for PAGE0 */ #include <asm/pdc.h> /* for iodc_call() proto and friends */ static DEFINE_SPINLOCK(pdc_console_lock); -static struct console pdc_cons; static void pdc_console_write(struct console *co, const char *s, unsigned count) { @@ -54,7 +26,8 @@ static void pdc_console_write(struct console *co, const char *s, unsigned count) spin_unlock_irqrestore(&pdc_console_lock, flags); } -int pdc_console_poll_key(struct console *co) +#ifdef CONFIG_KGDB +static int kgdb_pdc_read_char(void) { int c; unsigned long flags; @@ -63,201 +36,40 @@ int pdc_console_poll_key(struct console *co) c = pdc_iodc_getc(); spin_unlock_irqrestore(&pdc_console_lock, flags); - return c; -} - -static int pdc_console_setup(struct console *co, char *options) -{ - return 0; -} - -#if defined(CONFIG_PDC_CONSOLE) -#include <linux/vt_kern.h> -#include <linux/tty_flip.h> - -#define PDC_CONS_POLL_DELAY (30 * HZ / 1000) - -static void pdc_console_poll(struct timer_list *unused); -static DEFINE_TIMER(pdc_console_timer, pdc_console_poll); -static struct tty_port tty_port; - -static int pdc_console_tty_open(struct tty_struct *tty, struct file *filp) -{ - tty_port_tty_set(&tty_port, tty); - mod_timer(&pdc_console_timer, jiffies + PDC_CONS_POLL_DELAY); - - return 0; + return (c <= 0) ? NO_POLL_CHAR : c; } -static void pdc_console_tty_close(struct tty_struct *tty, struct file *filp) +static void kgdb_pdc_write_char(u8 chr) { - if (tty->count == 1) { - del_timer_sync(&pdc_console_timer); - tty_port_tty_set(&tty_port, NULL); - } + if (PAGE0->mem_cons.cl_class != CL_DUPLEX) + pdc_console_write(NULL, &chr, 1); } -static int pdc_console_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) -{ - pdc_console_write(NULL, buf, count); - return count; -} - -static unsigned int pdc_console_tty_write_room(struct tty_struct *tty) -{ - return 32768; /* no limit, no buffer used */ -} - -static const struct tty_operations pdc_console_tty_ops = { - .open = pdc_console_tty_open, - .close = pdc_console_tty_close, - .write = pdc_console_tty_write, - .write_room = pdc_console_tty_write_room, +static struct kgdb_io kgdb_pdc_io_ops = { + .name = "kgdb_pdc", + .read_char = kgdb_pdc_read_char, + .write_char = kgdb_pdc_write_char, }; - -static void pdc_console_poll(struct timer_list *unused) -{ - int data, count = 0; - - while (1) { - data = pdc_console_poll_key(NULL); - if (data == -1) - break; - tty_insert_flip_char(&tty_port, data & 0xFF, TTY_NORMAL); - count ++; - } - - if (count) - tty_flip_buffer_push(&tty_port); - - if (pdc_cons.flags & CON_ENABLED) - mod_timer(&pdc_console_timer, jiffies + PDC_CONS_POLL_DELAY); -} - -static struct tty_driver *pdc_console_tty_driver; - -static int __init pdc_console_tty_driver_init(void) -{ - struct tty_driver *driver; - int err; - - /* Check if the console driver is still registered. - * It is unregistered if the pdc console was not selected as the - * primary console. */ - - struct console *tmp; - - console_lock(); - for_each_console(tmp) - if (tmp == &pdc_cons) - break; - console_unlock(); - - if (!tmp) { - printk(KERN_INFO "PDC console driver not registered anymore, not creating %s\n", pdc_cons.name); - return -ENODEV; - } - - printk(KERN_INFO "The PDC console driver is still registered, removing CON_BOOT flag\n"); - pdc_cons.flags &= ~CON_BOOT; - - driver = tty_alloc_driver(1, TTY_DRIVER_REAL_RAW | - TTY_DRIVER_RESET_TERMIOS); - if (IS_ERR(driver)) - return PTR_ERR(driver); - - tty_port_init(&tty_port); - - driver->driver_name = "pdc_cons"; - driver->name = "ttyB"; - driver->major = MUX_MAJOR; - driver->minor_start = 0; - driver->type = TTY_DRIVER_TYPE_SYSTEM; - driver->init_termios = tty_std_termios; - tty_set_operations(driver, &pdc_console_tty_ops); - tty_port_link_device(&tty_port, driver, 0); - - err = tty_register_driver(driver); - if (err) { - printk(KERN_ERR "Unable to register the PDC console TTY driver\n"); - tty_port_destroy(&tty_port); - tty_driver_kref_put(driver); - return err; - } - - pdc_console_tty_driver = driver; - - return 0; -} -device_initcall(pdc_console_tty_driver_init); - -static struct tty_driver * pdc_console_device (struct console *c, int *index) -{ - *index = c->index; - return pdc_console_tty_driver; -} -#else -#define pdc_console_device NULL #endif -static struct console pdc_cons = { - .name = "ttyB", - .write = pdc_console_write, - .device = pdc_console_device, - .setup = pdc_console_setup, - .flags = CON_BOOT | CON_PRINTBUFFER, - .index = -1, -}; - -static int pdc_console_initialized; - -static void pdc_console_init_force(void) +static int __init pdc_earlycon_setup(struct earlycon_device *device, + const char *opt) { - if (pdc_console_initialized) - return; - ++pdc_console_initialized; - + struct console *earlycon_console; + /* If the console is duplex then copy the COUT parameters to CIN. */ if (PAGE0->mem_cons.cl_class == CL_DUPLEX) memcpy(&PAGE0->mem_kbd, &PAGE0->mem_cons, sizeof(PAGE0->mem_cons)); - /* register the pdc console */ - register_console(&pdc_cons); -} + earlycon_console = device->con; + earlycon_console->write = pdc_console_write; + device->port.iotype = UPIO_MEM32BE; -void __init pdc_console_init(void) -{ -#if defined(EARLY_BOOTUP_DEBUG) || defined(CONFIG_PDC_CONSOLE) - pdc_console_init_force(); +#ifdef CONFIG_KGDB + kgdb_register_io_module(&kgdb_pdc_io_ops); #endif -#ifdef EARLY_BOOTUP_DEBUG - printk(KERN_INFO "Initialized PDC Console for debugging.\n"); -#endif -} - - -/* - * Used for emergencies. Currently only used if an HPMC occurs. If an - * HPMC occurs, it is possible that the current console may not be - * properly initialised after the PDC IO reset. This routine unregisters - * all of the current consoles, reinitializes the pdc console and - * registers it. - */ - -void pdc_console_restart(void) -{ - struct console *console; - - if (pdc_console_initialized) - return; - /* If we've already seen the output, don't bother to print it again */ - if (console_drivers != NULL) - pdc_cons.flags &= ~CON_PRINTBUFFER; - - while ((console = console_drivers) != NULL) - unregister_console(console_drivers); - - /* force registering the pdc console */ - pdc_console_init_force(); + return 0; } + +EARLYCON_DECLARE(pdc, pdc_earlycon_setup); diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 7c37e09c92da..c4f8374c7018 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -146,10 +146,6 @@ void flush_thread(void) */ } -void release_thread(struct task_struct *dead_task) -{ -} - /* * Idle thread support * @@ -288,7 +284,7 @@ __get_wchan(struct task_struct *p) static inline unsigned long brk_rnd(void) { - return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT; + return (get_random_u32() & BRK_RND_MASK) << PAGE_SHIFT; } unsigned long arch_randomize_brk(struct mm_struct *mm) diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index f005ddedb50e..375f38d6e1a4 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -70,6 +70,10 @@ void __init setup_cmdline(char **cmdline_p) strlcat(p, "tty0", COMMAND_LINE_SIZE); } + /* default to use early console */ + if (!strstr(p, "earlycon")) + strlcat(p, " earlycon=pdc", COMMAND_LINE_SIZE); + #ifdef CONFIG_BLK_DEV_INITRD if (boot_args[2] != 0) /* did palo pass us a ramdisk? */ { @@ -139,8 +143,6 @@ void __init setup_arch(char **cmdline_p) if (__pa((unsigned long) &_end) >= KERNEL_INITIAL_SIZE) panic("KERNEL_INITIAL_ORDER too small!"); - pdc_console_init(); - #ifdef CONFIG_64BIT if(parisc_narrow_firmware) { printk(KERN_INFO "Kernel is using PDC in 32-bit mode.\n"); diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 2b34294517a1..848b0702005d 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -239,14 +239,14 @@ static unsigned long mmap_rnd(void) unsigned long rnd = 0; if (current->flags & PF_RANDOMIZE) - rnd = get_random_int() & MMAP_RND_MASK; + rnd = get_random_u32() & MMAP_RND_MASK; return rnd << PAGE_SHIFT; } unsigned long arch_mmap_rnd(void) { - return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT; + return (get_random_u32() & MMAP_RND_MASK) << PAGE_SHIFT; } static unsigned long mmap_legacy_base(void) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index b78f1b9d45c1..f9696fbf646c 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -239,13 +239,6 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) /* unlock the pdc lock if necessary */ pdc_emergency_unlock(); - /* maybe the kernel hasn't booted very far yet and hasn't been able - * to initialize the serial or STI console. In that case we should - * re-enable the pdc console, so that the user will be able to - * identify the problem. */ - if (!console_drivers) - pdc_console_restart(); - if (err) printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n", current->comm, task_pid_nr(current), str, err); @@ -429,10 +422,6 @@ void parisc_terminate(char *msg, struct pt_regs *regs, int code, unsigned long o /* unlock the pdc lock if necessary */ pdc_emergency_unlock(); - /* restart pdc console if necessary */ - if (!console_drivers) - pdc_console_restart(); - /* Not all paths will gutter the processor... */ switch(code){ @@ -482,9 +471,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) unsigned long fault_space = 0; int si_code; - if (code == 1) - pdc_console_restart(); /* switch back to pdc if HPMC */ - else if (!irqs_disabled_flags(regs->gr[0])) + if (!irqs_disabled_flags(regs->gr[0])) local_irq_enable(); /* Security check: diff --git a/arch/parisc/kernel/vdso.c b/arch/parisc/kernel/vdso.c index 63dc44c4c246..47e5960a2f96 100644 --- a/arch/parisc/kernel/vdso.c +++ b/arch/parisc/kernel/vdso.c @@ -75,7 +75,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, map_base = mm->mmap_base; if (current->flags & PF_RANDOMIZE) - map_base -= (get_random_int() & 0x1f) * PAGE_SIZE; + map_base -= prandom_u32_max(0x20) * PAGE_SIZE; vdso_text_start = get_unmapped_area(NULL, map_base, vdso_text_len, 0, 0); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4c466acdc70d..699df27b0e2f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -135,8 +135,9 @@ config PPC select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION - select ARCH_HAS_STRICT_KERNEL_RWX if FSL_BOOKE && !HIBERNATION && !RANDOMIZE_BASE + select ARCH_HAS_STRICT_KERNEL_RWX if PPC_85xx && !HIBERNATION && !RANDOMIZE_BASE select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_HAS_SYSCALL_WRAPPER if !SPU_BASE && !COMPAT select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UBSAN_SANITIZE_ALL @@ -194,7 +195,7 @@ config PPC select HAVE_ARCH_KASAN if PPC_RADIX_MMU select HAVE_ARCH_KASAN if PPC_BOOK3E_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN - select HAVE_ARCH_KFENCE if PPC_BOOK3S_32 || PPC_8xx || 40x + select HAVE_ARCH_KFENCE if ARCH_SUPPORTS_DEBUG_PAGEALLOC select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS @@ -211,7 +212,7 @@ config PPC select HAVE_DYNAMIC_FTRACE_WITH_ARGS if MPROFILE_KERNEL || PPC32 select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT - select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_DESCRIPTORS if PPC64_ELF_ABI_V1 @@ -290,7 +291,7 @@ config PPC_LONG_DOUBLE_128 config PPC_BARRIER_NOSPEC bool default y - depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E + depends on PPC_BOOK3S_64 || PPC_E500 config EARLY_PRINTK bool @@ -548,7 +549,7 @@ config PPC64_SUPPORTS_MEMORY_FAILURE config KEXEC bool "kexec system call" - depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) || PPC_BOOK3E + depends on PPC_BOOK3S || PPC_E500 || (44x && !SMP) select KEXEC_CORE help kexec is a system call that implements the ability to shutdown your @@ -583,7 +584,7 @@ config ARCH_HAS_KEXEC_PURGATORY config RELOCATABLE bool "Build a relocatable kernel" - depends on PPC64 || (FLATMEM && (44x || FSL_BOOKE)) + depends on PPC64 || (FLATMEM && (44x || PPC_85xx)) select NONSTATIC_KERNEL help This builds a kernel image that is capable of running at the @@ -606,7 +607,7 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" - depends on (FSL_BOOKE && FLATMEM && PPC32) + depends on PPC_85xx && FLATMEM depends on RELOCATABLE help Randomizes the virtual address at which the kernel image is @@ -625,8 +626,8 @@ config RELOCATABLE_TEST config CRASH_DUMP bool "Build a dump capture kernel" - depends on PPC64 || PPC_BOOK3S_32 || FSL_BOOKE || (44x && !SMP) - select RELOCATABLE if PPC64 || 44x || FSL_BOOKE + depends on PPC64 || PPC_BOOK3S_32 || PPC_85xx || (44x && !SMP) + select RELOCATABLE if PPC64 || 44x || PPC_85xx help Build a kernel suitable for use as a dump capture kernel. The same kernel binary can be used as production kernel and dump @@ -815,7 +816,7 @@ config DATA_SHIFT_BOOL depends on ADVANCED_OPTIONS depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) || \ - FSL_BOOKE + PPC_85xx help This option allows you to set the kernel data alignment. When RAM is mapped by blocks, the alignment needs to fit the size and @@ -828,13 +829,13 @@ config DATA_SHIFT default 24 if STRICT_KERNEL_RWX && PPC64 range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx - range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_FSL_BOOKE + range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_85xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 default 23 if STRICT_KERNEL_RWX && PPC_8xx default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx - default 24 if STRICT_KERNEL_RWX && FSL_BOOKE + default 24 if STRICT_KERNEL_RWX && PPC_85xx default PPC_PAGE_SHIFT help On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO. @@ -845,7 +846,7 @@ config DATA_SHIFT in that case. If PIN_TLB is selected, it must be aligned to 8M as 8M pages will be pinned. -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int "Maximum zone order" range 8 9 if PPC64 && PPC_64K_PAGES default "9" if PPC64 && PPC_64K_PAGES @@ -1150,7 +1151,7 @@ config LOWMEM_SIZE config LOWMEM_CAM_NUM_BOOL bool "Set number of CAMs to use to map low memory" - depends on ADVANCED_OPTIONS && FSL_BOOKE + depends on ADVANCED_OPTIONS && PPC_85xx help This option allows you to set the maximum number of CAM slots that will be used to map low memory. There are a limited number of slots @@ -1161,7 +1162,7 @@ config LOWMEM_CAM_NUM_BOOL Say N here unless you know what you are doing. config LOWMEM_CAM_NUM - depends on FSL_BOOKE + depends on PPC_85xx int "Number of CAMs to use to map low memory" if LOWMEM_CAM_NUM_BOOL default 3 if !STRICT_KERNEL_RWX default 9 if DATA_SHIFT >= 24 @@ -1170,7 +1171,7 @@ config LOWMEM_CAM_NUM config DYNAMIC_MEMSTART bool "Enable page aligned dynamic load address for kernel" - depends on ADVANCED_OPTIONS && FLATMEM && (FSL_BOOKE || 44x) + depends on ADVANCED_OPTIONS && FLATMEM && (PPC_85xx || 44x) select NONSTATIC_KERNEL help This option enables the kernel to be loaded at any page aligned @@ -1219,7 +1220,7 @@ config KERNEL_START config PHYSICAL_START_BOOL bool "Set physical address where the kernel is loaded" - depends on ADVANCED_OPTIONS && FLATMEM && FSL_BOOKE + depends on ADVANCED_OPTIONS && FLATMEM && PPC_85xx help This gives the physical address where the kernel is loaded. @@ -1232,7 +1233,7 @@ config PHYSICAL_START config PHYSICAL_ALIGN hex - default "0x04000000" if FSL_BOOKE + default "0x04000000" if PPC_85xx help This value puts the alignment restrictions on physical address where kernel is loaded and run from. Kernel is compiled for an diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index ae727d4218b9..6aaf8dc60610 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -283,6 +283,12 @@ config PPC_EARLY_DEBUG_MEMCONS This console provides input and output buffers stored within the kernel BSS and should be safe to select on any system. A debugger can then be used to read kernel output or send input to the console. + +config PPC_EARLY_DEBUG_16550 + bool "Serial 16550" + depends on PPC_UDBG_16550 + help + Select this to enable early debugging via Serial 16550 console endchoice config PPC_MEMCONS_OUTPUT_SIZE @@ -354,6 +360,15 @@ config PPC_EARLY_DEBUG_CPM_ADDR platform probing is done, all platforms selected must share the same address. +config PPC_EARLY_DEBUG_16550_PHYSADDR + hex "Early debug Serial 16550 physical address" + depends on PPC_EARLY_DEBUG_16550 + +config PPC_EARLY_DEBUG_16550_STRIDE + int "Early debug Serial 16550 stride" + depends on PPC_EARLY_DEBUG_16550 + default 1 + config FAIL_IOMMU bool "Fault-injection capability for IOMMU" depends on FAULT_INJECTION diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 02742facf895..dc4cbf0a5ca9 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -149,11 +149,12 @@ CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata) ifdef CONFIG_PPC_BOOK3S_64 ifdef CONFIG_CPU_LITTLE_ENDIAN CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power8 -CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power9,-mtune=power8) else -CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5)) -CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4) +CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power4 endif +CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power10, \ + $(call cc-option,-mtune=power9, \ + $(call cc-option,-mtune=power8))) else ifdef CONFIG_PPC_BOOK3E_64 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif @@ -191,9 +192,14 @@ ifdef CONFIG_476FPE_ERR46 -T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds endif -# No AltiVec or VSX instructions when building kernel +# No prefix or pcrel +KBUILD_CFLAGS += $(call cc-option,-mno-prefixed) +KBUILD_CFLAGS += $(call cc-option,-mno-pcrel) + +# No AltiVec or VSX or MMA instructions when building kernel KBUILD_CFLAGS += $(call cc-option,-mno-altivec) KBUILD_CFLAGS += $(call cc-option,-mno-vsx) +KBUILD_CFLAGS += $(call cc-option,-mno-mma) # No SPE instruction when building kernel # (We use all available options to help semi-broken compilers) @@ -210,7 +216,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-string) cpu-as-$(CONFIG_40x) += -Wa,-m405 cpu-as-$(CONFIG_44x) += -Wa,-m440 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) -cpu-as-$(CONFIG_E500) += -Wa,-me500 +cpu-as-$(CONFIG_PPC_E500) += -Wa,-me500 # When using '-many -mpower4' gas will first try and find a matching power4 # mnemonic and failing that it will allow any valid mnemonic that GAS knows @@ -226,18 +232,6 @@ KBUILD_CFLAGS += $(cpu-as-y) KBUILD_AFLAGS += $(aflags-y) KBUILD_CFLAGS += $(cflags-y) -head-$(CONFIG_PPC64) := arch/powerpc/kernel/head_64.o -head-$(CONFIG_PPC_BOOK3S_32) := arch/powerpc/kernel/head_book3s_32.o -head-$(CONFIG_PPC_8xx) := arch/powerpc/kernel/head_8xx.o -head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o -head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o -head-$(CONFIG_FSL_BOOKE) := arch/powerpc/kernel/head_fsl_booke.o - -head-$(CONFIG_PPC64) += arch/powerpc/kernel/entry_64.o -head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o -head-$(CONFIG_ALTIVEC) += arch/powerpc/kernel/vector.o -head-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += arch/powerpc/kernel/prom_init.o - # Default to zImage, override when needed all: zImage diff --git a/arch/powerpc/boot/44x.h b/arch/powerpc/boot/44x.h index 02563443788a..9b15e59522d6 100644 --- a/arch/powerpc/boot/44x.h +++ b/arch/powerpc/boot/44x.h @@ -1,11 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * PowerPC 44x related functions * * Copyright 2007 David Gibson, IBM Corporation. - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. */ #ifndef _PPC_BOOT_44X_H_ #define _PPC_BOOT_44X_H_ diff --git a/arch/powerpc/boot/4xx.h b/arch/powerpc/boot/4xx.h index 7dc5d45361bc..77f15d124c81 100644 --- a/arch/powerpc/boot/4xx.h +++ b/arch/powerpc/boot/4xx.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * PowerPC 4xx related functions * * Copyright 2007 IBM Corporation. * Josh Boyer <jwboyer@linux.vnet.ibm.com> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. */ #ifndef _POWERPC_BOOT_4XX_H_ #define _POWERPC_BOOT_4XX_H_ diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index a9cd2ea4a861..d32d95aea5d6 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -34,6 +34,7 @@ endif BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \ + $(call cc-option,-mno-spe) $(call cc-option,-mspe=no) \ -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ $(LINUXINCLUDE) diff --git a/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi b/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi new file mode 100644 index 000000000000..7e2a90cde72e --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi @@ -0,0 +1,51 @@ +/* + * e500v1 Power ISA Device Tree Source (include) + * + * Copyright 2012 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/ { + cpus { + power-isa-version = "2.03"; + power-isa-b; // Base + power-isa-e; // Embedded + power-isa-atb; // Alternate Time Base + power-isa-cs; // Cache Specification + power-isa-e.le; // Embedded.Little-Endian + power-isa-e.pm; // Embedded.Performance Monitor + power-isa-ecl; // Embedded Cache Locking + power-isa-mmc; // Memory Coherence + power-isa-sp; // Signal Processing Engine + power-isa-sp.fs; // SPE.Embedded Float Scalar Single + power-isa-sp.fv; // SPE.Embedded Float Vector + mmu-type = "power-embedded"; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/mpc8540ads.dts b/arch/powerpc/boot/dts/fsl/mpc8540ads.dts index 18a885130538..e03ae130162b 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8540ads.dts +++ b/arch/powerpc/boot/dts/fsl/mpc8540ads.dts @@ -7,7 +7,7 @@ /dts-v1/; -/include/ "e500v2_power_isa.dtsi" +/include/ "e500v1_power_isa.dtsi" / { model = "MPC8540ADS"; diff --git a/arch/powerpc/boot/dts/fsl/mpc8541cds.dts b/arch/powerpc/boot/dts/fsl/mpc8541cds.dts index ac381e7b1c60..a2a6c5cf852e 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8541cds.dts +++ b/arch/powerpc/boot/dts/fsl/mpc8541cds.dts @@ -7,7 +7,7 @@ /dts-v1/; -/include/ "e500v2_power_isa.dtsi" +/include/ "e500v1_power_isa.dtsi" / { model = "MPC8541CDS"; diff --git a/arch/powerpc/boot/dts/fsl/mpc8555cds.dts b/arch/powerpc/boot/dts/fsl/mpc8555cds.dts index 9f58db2a7e66..901b6ff06dfb 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8555cds.dts +++ b/arch/powerpc/boot/dts/fsl/mpc8555cds.dts @@ -7,7 +7,7 @@ /dts-v1/; -/include/ "e500v2_power_isa.dtsi" +/include/ "e500v1_power_isa.dtsi" / { model = "MPC8555CDS"; diff --git a/arch/powerpc/boot/dts/fsl/mpc8560ads.dts b/arch/powerpc/boot/dts/fsl/mpc8560ads.dts index a24722ccaebf..c2f9aea78b29 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8560ads.dts +++ b/arch/powerpc/boot/dts/fsl/mpc8560ads.dts @@ -7,7 +7,7 @@ /dts-v1/; -/include/ "e500v2_power_isa.dtsi" +/include/ "e500v1_power_isa.dtsi" / { model = "MPC8560ADS"; diff --git a/arch/powerpc/boot/dts/ksi8560.dts b/arch/powerpc/boot/dts/ksi8560.dts index fe6c17c8812a..37a7eb576d02 100644 --- a/arch/powerpc/boot/dts/ksi8560.dts +++ b/arch/powerpc/boot/dts/ksi8560.dts @@ -14,6 +14,8 @@ /dts-v1/; +/include/ "fsl/e500v1_power_isa.dtsi" + / { model = "KSI8560"; compatible = "emerson,KSI8560"; diff --git a/arch/powerpc/boot/dts/mgcoge.dts b/arch/powerpc/boot/dts/mgcoge.dts index 7de068991bde..9cefed207234 100644 --- a/arch/powerpc/boot/dts/mgcoge.dts +++ b/arch/powerpc/boot/dts/mgcoge.dts @@ -225,13 +225,6 @@ interrupts = <2 8>; interrupt-parent = <&PIC>; cs-gpios = < &cpm2_pio_d 19 0>; - #address-cells = <1>; - #size-cells = <0>; - ds3106@1 { - compatible = "gen,spidev"; - reg = <0>; - spi-max-frequency = <8000000>; - }; }; }; diff --git a/arch/powerpc/boot/dts/stx_gp3_8560.dts b/arch/powerpc/boot/dts/stx_gp3_8560.dts index d1ab698eef36..e73f7e75b0b4 100644 --- a/arch/powerpc/boot/dts/stx_gp3_8560.dts +++ b/arch/powerpc/boot/dts/stx_gp3_8560.dts @@ -7,6 +7,8 @@ /dts-v1/; +/include/ "fsl/e500v1_power_isa.dtsi" + / { model = "stx,gp3"; compatible = "stx,gp3-8560", "stx,gp3"; diff --git a/arch/powerpc/boot/dts/stxssa8555.dts b/arch/powerpc/boot/dts/stxssa8555.dts index 5dca2a91c41f..96add25c904b 100644 --- a/arch/powerpc/boot/dts/stxssa8555.dts +++ b/arch/powerpc/boot/dts/stxssa8555.dts @@ -9,6 +9,8 @@ /dts-v1/; +/include/ "fsl/e500v1_power_isa.dtsi" + / { model = "stx,gp3"; compatible = "stx,gp3-8560", "stx,gp3"; diff --git a/arch/powerpc/boot/dts/tqm8540.dts b/arch/powerpc/boot/dts/tqm8540.dts index 9c1eb9779108..eb4d8fd3f7aa 100644 --- a/arch/powerpc/boot/dts/tqm8540.dts +++ b/arch/powerpc/boot/dts/tqm8540.dts @@ -7,6 +7,8 @@ /dts-v1/; +/include/ "fsl/e500v1_power_isa.dtsi" + / { model = "tqc,tqm8540"; compatible = "tqc,tqm8540"; diff --git a/arch/powerpc/boot/dts/tqm8541.dts b/arch/powerpc/boot/dts/tqm8541.dts index 44595cf675d0..fe5d3d873ec9 100644 --- a/arch/powerpc/boot/dts/tqm8541.dts +++ b/arch/powerpc/boot/dts/tqm8541.dts @@ -7,6 +7,8 @@ /dts-v1/; +/include/ "fsl/e500v1_power_isa.dtsi" + / { model = "tqc,tqm8541"; compatible = "tqc,tqm8541"; diff --git a/arch/powerpc/boot/dts/tqm8555.dts b/arch/powerpc/boot/dts/tqm8555.dts index 54f3e82907d6..4be05b7d225d 100644 --- a/arch/powerpc/boot/dts/tqm8555.dts +++ b/arch/powerpc/boot/dts/tqm8555.dts @@ -7,6 +7,8 @@ /dts-v1/; +/include/ "fsl/e500v1_power_isa.dtsi" + / { model = "tqc,tqm8555"; compatible = "tqc,tqm8555"; diff --git a/arch/powerpc/boot/dts/tqm8560.dts b/arch/powerpc/boot/dts/tqm8560.dts index 7415cb69f60d..8ea48502420b 100644 --- a/arch/powerpc/boot/dts/tqm8560.dts +++ b/arch/powerpc/boot/dts/tqm8560.dts @@ -8,6 +8,8 @@ /dts-v1/; +/include/ "fsl/e500v1_power_isa.dtsi" + / { model = "tqc,tqm8560"; compatible = "tqc,tqm8560"; diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts index 12e08271e61f..045af668e928 100644 --- a/arch/powerpc/boot/dts/turris1x.dts +++ b/arch/powerpc/boot/dts/turris1x.dts @@ -147,7 +147,7 @@ port@0 { reg = <0>; - label = "cpu1"; + label = "cpu"; ethernet = <&enet1>; phy-mode = "rgmii-id"; @@ -184,7 +184,7 @@ port@6 { reg = <6>; - label = "cpu0"; + label = "cpu"; ethernet = <&enet0>; phy-mode = "rgmii-id"; @@ -263,21 +263,21 @@ }; partition@20000 { - /* 1.7 MB for Rescue Linux Kernel Image */ + /* 1.7 MB for Linux Kernel Image */ reg = <0x00020000 0x001a0000>; - label = "rescue-kernel"; + label = "kernel"; }; partition@1c0000 { /* 1.5 MB for Rescue JFFS2 Root File System */ reg = <0x001c0000 0x00180000>; - label = "rescue-rootfs"; + label = "rescue"; }; partition@340000 { - /* 11 MB for TAR.XZ Backup with content of NAND Root File System */ + /* 11 MB for TAR.XZ Archive with Factory content of NAND Root File System */ reg = <0x00340000 0x00b00000>; - label = "backup-rootfs"; + label = "factory"; }; partition@e40000 { diff --git a/arch/powerpc/boot/dummy.c b/arch/powerpc/boot/dummy.c deleted file mode 100644 index 31dbf45bf99c..000000000000 --- a/arch/powerpc/boot/dummy.c +++ /dev/null @@ -1,4 +0,0 @@ -int main(void) -{ - return 0; -} diff --git a/arch/powerpc/boot/opal-calls.S b/arch/powerpc/boot/opal-calls.S index ad0e15d930c4..1f2f330a459e 100644 --- a/arch/powerpc/boot/opal-calls.S +++ b/arch/powerpc/boot/opal-calls.S @@ -16,7 +16,7 @@ opal_kentry: li r5, 0 li r6, 0 li r7, 0 - ld r11,opal@got(r2) + LOAD_REG_ADDR(r11, opal) ld r8,0(r11) ld r9,8(r11) bctr @@ -35,7 +35,7 @@ opal_call: mr r13,r2 /* Set opal return address */ - ld r11,opal_return@got(r2) + LOAD_REG_ADDR(r11, opal_return) mtlr r11 mfmsr r12 @@ -45,7 +45,7 @@ opal_call: mtspr SPRN_HSRR1,r12 /* load the opal call entry point and base */ - ld r11,opal@got(r2) + LOAD_REG_ADDR(r11, opal) ld r12,8(r11) ld r2,0(r11) mtspr SPRN_HSRR0,r12 diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h index 8334bc3cbe49..a40c2162a4e9 100644 --- a/arch/powerpc/boot/ops.h +++ b/arch/powerpc/boot/ops.h @@ -1,12 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Global definition of all the bootwrapper operations. * * Author: Mark A. Greer <mgreer@mvista.com> * - * 2006 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. + * 2006 (c) MontaVista Software, Inc. */ #ifndef _PPC_BOOT_OPS_H_ #define _PPC_BOOT_OPS_H_ diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h index 192b97523b05..a66cfd76fa4d 100644 --- a/arch/powerpc/boot/ppc_asm.h +++ b/arch/powerpc/boot/ppc_asm.h @@ -84,4 +84,14 @@ #define MFTBU(dest) mfspr dest, SPRN_TBRU #endif +#ifdef CONFIG_PPC64_BOOT_WRAPPER +#define LOAD_REG_ADDR(reg,name) \ + addis reg,r2,name@toc@ha; \ + addi reg,reg,name@toc@l +#else +#define LOAD_REG_ADDR(reg,name) \ + lis reg,name@ha; \ + addi reg,reg,name@l +#endif + #endif /* _PPC64_PPC_ASM_H */ diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 54d2522be485..c6d32a8c3612 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Generic serial console support * @@ -6,10 +7,7 @@ * Code in serial_edit_cmdline() copied from <file:arch/ppc/boot/simple/misc.c> * and was written by Matt Porter <mporter@kernel.crashing.org>. * - * 2001,2006 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. + * 2001,2006 (c) MontaVista Software, Inc. */ #include <stdarg.h> #include <stddef.h> diff --git a/arch/powerpc/boot/simple_alloc.c b/arch/powerpc/boot/simple_alloc.c index 65ec135d0157..267d6524caac 100644 --- a/arch/powerpc/boot/simple_alloc.c +++ b/arch/powerpc/boot/simple_alloc.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Implement primitive realloc(3) functionality. * * Author: Mark A. Greer <mgreer@mvista.com> * - * 2006 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. + * 2006 (c) MontaVista, Software, Inc. */ #include <stddef.h> diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 55978f32fa77..5bdd4dd20bbb 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -433,7 +433,7 @@ fi # Extract kernel version information, some platforms want to include # it in the image header version=`${CROSS}strings "$kernel" | grep '^Linux version [-0-9.]' | \ - cut -d' ' -f3` + head -n1 | cut -d' ' -f3` if [ -n "$version" ]; then uboot_version="-n Linux-$version" fi diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig index cbcae2a927e9..4e3373381ab6 100644 --- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig @@ -77,6 +77,5 @@ CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_CRC_T10DIF=y -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index f29c166998af..ea719898b581 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -30,7 +30,7 @@ CONFIG_PREEMPT=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_MATH_EMULATION=y -CONFIG_FORCE_MAX_ZONEORDER=17 +CONFIG_ARCH_FORCE_MAX_ORDER=17 CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_MSI=y @@ -74,7 +74,6 @@ CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 diff --git a/arch/powerpc/configs/85xx/ppa8548_defconfig b/arch/powerpc/configs/85xx/ppa8548_defconfig index 190978a5b7d5..4bd5f993d26a 100644 --- a/arch/powerpc/configs/85xx/ppa8548_defconfig +++ b/arch/powerpc/configs/85xx/ppa8548_defconfig @@ -7,9 +7,7 @@ CONFIG_RAPIDIO=y CONFIG_FSL_RIO=y CONFIG_RAPIDIO_DMA_ENGINE=y CONFIG_RAPIDIO_ENUM_BASIC=y -CONFIG_RAPIDIO_TSI57X=y CONFIG_RAPIDIO_CPS_XX=y -CONFIG_RAPIDIO_TSI568=y CONFIG_RAPIDIO_CPS_GEN2=y CONFIG_ADVANCED_OPTIONS=y CONFIG_LOWMEM_SIZE_BOOL=y diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 7fd9e596ea33..06391cc2af3a 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -195,7 +195,6 @@ CONFIG_NLS_ISO8859_9=m CONFIG_NLS_ISO8859_13=m CONFIG_NLS_ISO8859_14=m CONFIG_NLS_ISO8859_15=m -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index f14c6dbd7346..ab8a8c4530d9 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -41,7 +41,7 @@ CONFIG_FIXED_PHY=y CONFIG_FONT_8x16=y CONFIG_FONT_8x8=y CONFIG_FONTS=y -CONFIG_FORCE_MAX_ZONEORDER=13 +CONFIG_ARCH_FORCE_MAX_ORDER=13 CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAME_WARN=1024 CONFIG_FTL=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 9d6212a8b195..71d9d112c0b6 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -119,7 +119,6 @@ CONFIG_INPUT_EVDEV=y # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set # CONFIG_HW_RANDOM is not set -CONFIG_RAW_DRIVER=y CONFIG_I2C_CHARDEV=y CONFIG_AGP=m CONFIG_AGP_UNINORTH=m diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig index eff933ebbb9e..ea2dbd778aad 100644 --- a/arch/powerpc/configs/microwatt_defconfig +++ b/arch/powerpc/configs/microwatt_defconfig @@ -75,7 +75,12 @@ CONFIG_SPI_BITBANG=y CONFIG_SPI_SPIDEV=y # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set +CONFIG_MMC=y +# CONFIG_PWRSEQ_EMMC is not set +# CONFIG_PWRSEQ_SIMPLE is not set +CONFIG_MMC_LITEX=y # CONFIG_VIRTIO_MENU is not set +CONFIG_COMMON_CLK=y # CONFIG_IOMMU_SUPPORT is not set # CONFIG_NVMEM is not set CONFIG_EXT4_FS=y diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index e75d3f3060c9..10fe061c5e6d 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -114,5 +114,4 @@ CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_CRYPTO_HW is not set diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 700115d85d6f..56b876e418e9 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -78,4 +78,4 @@ CONFIG_DEBUG_VM_PGTABLE=y CONFIG_DETECT_HUNG_TASK=y CONFIG_BDI_SWITCH=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_PPC_PTDUMP=y +CONFIG_GENERIC_PTDUMP=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index e00a703581c3..96aa5355911f 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -92,7 +92,6 @@ CONFIG_LEGACY_PTY_COUNT=4 CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_HW_RANDOM=y -CONFIG_RAW_DRIVER=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_PASEMI=y CONFIG_SENSORS_LM85=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 13885ec563d1..019163c2571e 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -284,7 +284,6 @@ CONFIG_BOOTX_TEXT=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 49f49c263935..c92652575064 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -17,7 +17,6 @@ CONFIG_LOG_CPU_MAX_BUF_SHIFT=13 CONFIG_NUMA_BALANCING=y CONFIG_CGROUPS=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y @@ -50,6 +49,7 @@ CONFIG_CPU_IDLE=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y +CONFIG_PPC_UV=y CONFIG_HOTPLUG_CPU=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -65,6 +65,8 @@ CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_SCHED_SMT=y CONFIG_PM=y CONFIG_HOTPLUG_PCI=y +CONFIG_ZONE_DEVICE=y +CONFIG_DEVICE_PRIVATE=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -252,7 +254,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=y # CONFIG_VIRTIO_MENU is not set CONFIG_LIBNVDIMM=y -# CONFIG_ND_BLK is not set CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y @@ -325,13 +326,11 @@ CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1_PPC=m CONFIG_CRYPTO_SHA256=y -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index c8b0e80d613b..d6949a6c5b2b 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -1,7 +1,9 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_VIRT_CPU_ACCOUNTING_GEN=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_IKCONFIG=y @@ -213,7 +215,6 @@ CONFIG_HVC_RTAS=y CONFIG_HVCS=m CONFIG_VIRTIO_CONSOLE=m CONFIG_IBM_BSR=m -CONFIG_RAW_DRIVER=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_AMD8111=y CONFIG_I2C_PASEMI=y @@ -342,13 +343,11 @@ CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1_PPC=m CONFIG_CRYPTO_SHA256=y -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 5cf49a515f8e..f97a2d31bbf7 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -118,7 +118,6 @@ CONFIG_INPUT_MISC=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_HW_RANDOM is not set -CONFIG_RAW_DRIVER=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_AMD8111=y CONFIG_FB=y @@ -234,13 +233,11 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index a24f484bfbd2..d23deb94b36e 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -321,7 +321,6 @@ CONFIG_PNP=y CONFIG_ISAPNP=y CONFIG_MAC_FLOPPY=m CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 @@ -590,7 +589,6 @@ CONFIG_GAMEPORT_EMU10K1=m CONFIG_GAMEPORT_FM801=m # CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_NONSTANDARD=y -CONFIG_ROCKETPORT=m CONFIG_SYNCLINK_GT=m CONFIG_NOZOMI=m CONFIG_N_HDLC=m @@ -1107,13 +1105,9 @@ CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m @@ -1121,7 +1115,6 @@ CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 2d9ac233da68..0a1b42c4f26a 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -165,6 +165,5 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_FTRACE is not set CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_LZO=m CONFIG_PRINTK_TIME=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index b571d084c148..7497e17ea657 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -3,8 +3,10 @@ CONFIG_NR_CPUS=2048 CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y +# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_VIRT_CPU_ACCOUNTING_GEN=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y @@ -16,7 +18,6 @@ CONFIG_LOG_CPU_MAX_BUF_SHIFT=13 CONFIG_NUMA_BALANCING=y CONFIG_CGROUPS=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y @@ -40,6 +41,7 @@ CONFIG_PPC_SPLPAR=y CONFIG_DTL=y CONFIG_PPC_SMLPAR=y CONFIG_IBMEBUS=y +CONFIG_LIBNVDIMM=m CONFIG_PAPR_SCM=m CONFIG_PPC_SVM=y # CONFIG_PPC_PMAC is not set @@ -187,7 +189,6 @@ CONFIG_HVC_RTAS=y CONFIG_HVCS=m CONFIG_VIRTIO_CONSOLE=m CONFIG_IBM_BSR=m -CONFIG_RAW_DRIVER=y CONFIG_I2C_CHARDEV=y CONFIG_FB=y CONFIG_FIRMWARE_EDID=y @@ -302,13 +303,11 @@ CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1_PPC=m CONFIG_CRYPTO_SHA256=y -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index f491875700e8..e0964210f259 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -133,7 +133,6 @@ CONFIG_ACENIC_OMIT_TIGON_I=y # CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_VENDOR_ATHEROS is not set -# CONFIG_NET_VENDOR_AURORA is not set CONFIG_TIGON3=m CONFIG_BNX2X=m # CONFIG_NET_VENDOR_BROCADE is not set @@ -274,7 +273,6 @@ CONFIG_NLS_UTF8=y CONFIG_ENCRYPTED_KEYS=y CONFIG_SECURITY=y CONFIG_HARDENED_USERCOPY=y -CONFIG_HARDENED_USERCOPY_PAGESPAN=y CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_LOCKDOWN_LSM=y CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index 47dcfaddc1ac..7a978d396991 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -76,4 +76,3 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y CONFIG_CRC_T10DIF=y -# CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig new file mode 100644 index 000000000000..c1b964447401 --- /dev/null +++ b/arch/powerpc/crypto/Kconfig @@ -0,0 +1,97 @@ +# SPDX-License-Identifier: GPL-2.0 + +menu "Accelerated Cryptographic Algorithms for CPU (powerpc)" + +config CRYPTO_CRC32C_VPMSUM + tristate "CRC32c" + depends on PPC64 && ALTIVEC + select CRYPTO_HASH + select CRC32 + help + CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) + + Architecture: powerpc64 using + - AltiVec extensions + + Enable on POWER8 and newer processors for improved performance. + +config CRYPTO_CRCT10DIF_VPMSUM + tristate "CRC32T10DIF" + depends on PPC64 && ALTIVEC && CRC_T10DIF + select CRYPTO_HASH + help + CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF) + + Architecture: powerpc64 using + - AltiVec extensions + + Enable on POWER8 and newer processors for improved performance. + +config CRYPTO_VPMSUM_TESTER + tristate "CRC32c and CRC32T10DIF hardware acceleration tester" + depends on CRYPTO_CRCT10DIF_VPMSUM && CRYPTO_CRC32C_VPMSUM + help + Stress test for CRC32c and CRCT10DIF algorithms implemented with + powerpc64 AltiVec extensions (POWER8 vpmsum instructions). + Unless you are testing these algorithms, you don't need this. + +config CRYPTO_MD5_PPC + tristate "Digests: MD5" + depends on PPC + select CRYPTO_HASH + help + MD5 message digest algorithm (RFC1321) + + Architecture: powerpc + +config CRYPTO_SHA1_PPC + tristate "Hash functions: SHA-1" + depends on PPC + help + SHA-1 secure hash algorithm (FIPS 180) + + Architecture: powerpc + +config CRYPTO_SHA1_PPC_SPE + tristate "Hash functions: SHA-1 (SPE)" + depends on PPC && SPE + help + SHA-1 secure hash algorithm (FIPS 180) + + Architecture: powerpc using + - SPE (Signal Processing Engine) extensions + +config CRYPTO_SHA256_PPC_SPE + tristate "Hash functions: SHA-224 and SHA-256 (SPE)" + depends on PPC && SPE + select CRYPTO_SHA256 + select CRYPTO_HASH + help + SHA-224 and SHA-256 secure hash algorithms (FIPS 180) + + Architecture: powerpc using + - SPE (Signal Processing Engine) extensions + +config CRYPTO_AES_PPC_SPE + tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (SPE)" + depends on PPC && SPE + select CRYPTO_SKCIPHER + help + Block ciphers: AES cipher algorithms (FIPS-197) + Length-preserving ciphers: AES with ECB, CBC, CTR, and XTS modes + + Architecture: powerpc using: + - SPE (Signal Processing Engine) extensions + + SPE is available for: + - Processor Type: Freescale 8500 + - CPU selection: e500 (8540) + + This module should only be used for low power (router) devices + without hardware AES acceleration (e.g. caam crypto). It reduces the + size of the AES tables from 16KB to 8KB + 256 bytes and mitigates + timining attacks. Nevertheless it might be not as secure as other + architecture specific assembler implementations that work on 1KB + tables or 256 bytes S-boxes. + +endmenu diff --git a/arch/powerpc/crypto/crc-vpmsum_test.c b/arch/powerpc/crypto/crc-vpmsum_test.c index c1c1ef9457fb..273c527868db 100644 --- a/arch/powerpc/crypto/crc-vpmsum_test.c +++ b/arch/powerpc/crypto/crc-vpmsum_test.c @@ -82,7 +82,7 @@ static int __init crc_test_init(void) if (len <= offset) continue; - prandom_bytes(data, len); + get_random_bytes(data, len); len -= offset; crypto_shash_update(crct10dif_shash, data+offset, len); diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 81631e64dbeb..274bce76f5da 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -36,6 +36,7 @@ int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, int64_t opcode, uint64_t msr); /* misc runtime */ +void enable_machine_check(void); extern u64 __bswapdi2(u64); extern s64 __lshrdi3(s64, int); extern s64 __ashldi3(s64, int); @@ -55,19 +56,6 @@ struct kvm_vcpu; void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); -/* Patch sites */ -extern s32 patch__call_flush_branch_caches1; -extern s32 patch__call_flush_branch_caches2; -extern s32 patch__call_flush_branch_caches3; -extern s32 patch__flush_count_cache_return; -extern s32 patch__flush_link_stack_return; -extern s32 patch__call_kvm_flush_link_stack; -extern s32 patch__call_kvm_flush_link_stack_p9; -extern s32 patch__memset_nocache, patch__memcpy_nocache; - -extern long flush_branch_caches; -extern long kvm_flush_link_stack; - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv); void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv); diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index ef2d8b15eaab..e80b2c0e9315 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -86,7 +86,7 @@ do { \ #ifdef CONFIG_PPC_BOOK3S_64 #define NOSPEC_BARRIER_SLOT nop -#elif defined(CONFIG_PPC_FSL_BOOK3E) +#elif defined(CONFIG_PPC_E500) #define NOSPEC_BARRIER_SLOT nop; nop #endif diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 40041ac713d9..75823f39e042 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -112,31 +112,11 @@ static inline bool pte_user(pte_t pte) /* Permission masks used for kernel mappings */ #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) #define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE) -#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ - _PAGE_NO_CACHE | _PAGE_GUARDED) +#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE | _PAGE_GUARDED) #define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) #define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO) #define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) -/* - * Protection used for kernel text. We want the debuggers to be able to - * set breakpoints anywhere, so don't write protect the kernel text - * on platforms where such control is possible. - */ -#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\ - defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE) -#define PAGE_KERNEL_TEXT PAGE_KERNEL_X -#else -#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX -#endif - -/* Make modules code happy. We don't set RO yet */ -#define PAGE_KERNEL_EXEC PAGE_KERNEL_X - -/* Advertise special mapping type for AGP */ -#define PAGE_AGP (PAGE_KERNEL_NC) -#define HAVE_PAGE_AGP - #define PTE_INDEX_SIZE PTE_SHIFT #define PMD_INDEX_SIZE 0 #define PUD_INDEX_SIZE 0 diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index e1af0b394ceb..dd2cff53a111 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -113,9 +113,11 @@ static inline void __pud_free(pud_t *pud) /* * Early pud pages allocated via memblock allocator - * can't be directly freed to slab + * can't be directly freed to slab. KFENCE pages have + * both reserved and slab flags set so need to be freed + * kmem_cache_free. */ - if (PageReserved(page)) + if (PageReserved(page) && !PageSlab(page)) free_reserved_page(page); else kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h index 4e697bc2f4cd..48f21820afe2 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h @@ -26,16 +26,6 @@ static inline int pud_huge(pud_t pud) return 0; } -static inline int pgd_huge(pgd_t pgd) -{ - /* - * leaf pte for huge page - */ - if (radix_enabled()) - return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE)); - return 0; -} -#define pgd_huge pgd_huge /* * With radix , we have hugepage ptes in the pud and pmd entries. We don't * need to setup hugepage directory for them. Our pte and page directory format diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h index 34d1018896b3..2fce3498b000 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h @@ -30,15 +30,6 @@ static inline int pud_huge(pud_t pud) return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); } -static inline int pgd_huge(pgd_t pgd) -{ - /* - * leaf pte for huge page - */ - return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE)); -} -#define pgd_huge pgd_huge - /* * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't * need to setup hugepage directory for them. Our pte and page directory format diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 392ff48f77df..c436d8422654 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -117,8 +117,7 @@ #define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY) #define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_READ) #define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC) -#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ - _PAGE_RW | _PAGE_EXEC) +#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) /* * _PAGE_CHG_MASK masks of bits that are to be preserved across * pgprot changes @@ -151,33 +150,17 @@ #define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) #define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ) #define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) +/* Radix only, Hash uses PAGE_READONLY_X + execute-only pkey instead */ +#define PAGE_EXECONLY __pgprot(_PAGE_BASE | _PAGE_EXEC) /* Permission masks used for kernel mappings */ #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) -#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ - _PAGE_TOLERANT) -#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ - _PAGE_NON_IDEMPOTENT) +#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_TOLERANT) +#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NON_IDEMPOTENT) #define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) #define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO) #define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) -/* - * Protection used for kernel text. We want the debuggers to be able to - * set breakpoints anywhere, so don't write protect the kernel text - * on platforms where such control is possible. - */ -#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) || \ - defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE) -#define PAGE_KERNEL_TEXT PAGE_KERNEL_X -#else -#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX -#endif - -/* Make modules code happy. We don't set RO yet */ -#define PAGE_KERNEL_EXEC PAGE_KERNEL_X -#define PAGE_AGP (PAGE_KERNEL_NC) - #ifndef __ASSEMBLY__ /* * page table defines @@ -333,9 +316,6 @@ extern unsigned long pci_io_base; #define IOREMAP_END (KERN_IO_END - FIXADDR_SIZE) #define FIXADDR_SIZE SZ_32M -/* Advertise special mapping type for AGP */ -#define HAVE_PAGE_AGP - #ifndef __ASSEMBLY__ /* @@ -411,6 +391,9 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, * event of it not getting flushed for a long time the delay * shouldn't really matter because there's no real memory * pressure for swapout to react to. ] + * + * Note: this optimisation also exists in pte_needs_flush() and + * huge_pmd_needs_flush(). */ #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH #define ptep_clear_flush_young ptep_test_and_clear_young @@ -1123,7 +1106,7 @@ static inline void vmemmap_remove_mapping(unsigned long start, } #endif -#ifdef CONFIG_DEBUG_PAGEALLOC +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) static inline void __kernel_map_pages(struct page *page, int numpages, int enable) { if (radix_enabled()) @@ -1457,12 +1440,5 @@ static inline bool pud_is_leaf(pud_t pud) return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); } -#define p4d_is_leaf p4d_is_leaf -#define p4d_leaf p4d_is_leaf -static inline bool p4d_is_leaf(p4d_t p4d) -{ - return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PTE)); -} - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index 8b762f282190..fab8332fe1ad 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -112,13 +112,11 @@ static inline void hash__flush_tlb_kernel_range(unsigned long start, struct mmu_gather; extern void hash__tlb_flush(struct mmu_gather *tlb); -void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr); #ifdef CONFIG_PPC_64S_HASH_MMU /* Private function for use by PCI IO mapping code */ extern void __flush_hash_table_range(unsigned long start, unsigned long end); -extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr); +void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr); #else static inline void __flush_hash_table_range(unsigned long start, unsigned long end) { } #endif diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 206f920fe5b9..67655cd60545 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -163,6 +163,62 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, */ } +static inline bool __pte_flags_need_flush(unsigned long oldval, + unsigned long newval) +{ + unsigned long delta = oldval ^ newval; + + /* + * The return value of this function doesn't matter for hash, + * ptep_modify_prot_start() does a pte_update() which does or schedules + * any necessary hash table update and flush. + */ + if (!radix_enabled()) + return true; + + /* + * We do not expect kernel mappings or non-PTEs or not-present PTEs. + */ + VM_WARN_ON_ONCE(oldval & _PAGE_PRIVILEGED); + VM_WARN_ON_ONCE(newval & _PAGE_PRIVILEGED); + VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE)); + VM_WARN_ON_ONCE(!(newval & _PAGE_PTE)); + VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT)); + VM_WARN_ON_ONCE(!(newval & _PAGE_PRESENT)); + + /* + * Must flush on any change except READ, WRITE, EXEC, DIRTY, ACCESSED. + * + * In theory, some changed software bits could be tolerated, in + * practice those should rarely if ever matter. + */ + + if (delta & ~(_PAGE_RWX | _PAGE_DIRTY | _PAGE_ACCESSED)) + return true; + + /* + * If any of the above was present in old but cleared in new, flush. + * With the exception of _PAGE_ACCESSED, don't worry about flushing + * if that was cleared (see the comment in ptep_clear_flush_young()). + */ + if ((delta & ~_PAGE_ACCESSED) & oldval) + return true; + + return false; +} + +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) +{ + return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte)); +} +#define pte_needs_flush pte_needs_flush + +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) +{ + return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd)); +} +#define huge_pmd_needs_flush huge_pmd_needs_flush + extern bool tlbie_capable; extern bool tlbie_enabled; diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h index e8269434ecbe..d18b748ea3ae 100644 --- a/arch/powerpc/include/asm/book3s/pgtable.h +++ b/arch/powerpc/include/asm/book3s/pgtable.h @@ -25,7 +25,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot); #define __HAVE_PHYS_MEM_ACCESS_PROT -#if defined(CONFIG_PPC32) || defined(CONFIG_PPC_64S_HASH_MMU) +void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); + /* * This gets called at the end of handling a page fault, when * the kernel has put a new PTE into the page table for the process. @@ -35,10 +36,14 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, * corresponding HPTE into the hash table ahead of time, instead of * waiting for the inevitable extra hash-table miss exception. */ -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); -#else -static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {} -#endif +static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +{ + if (IS_ENABLED(CONFIG_PPC32) && !mmu_has_feature(MMU_FTR_HPTE_TABLE)) + return; + if (radix_enabled()) + return; + __update_mmu_cache(vma, address, ptep); +} #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/cpu_setup.h b/arch/powerpc/include/asm/cpu_setup.h new file mode 100644 index 000000000000..30e2fe389502 --- /dev/null +++ b/arch/powerpc/include/asm/cpu_setup.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 IBM Corporation + */ + +#ifndef _ASM_POWERPC_CPU_SETUP_H +#define _ASM_POWERPC_CPU_SETUP_H +void __setup_cpu_power7(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_power8(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_power9(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_power10(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power7(void); +void __restore_cpu_power8(void); +void __restore_cpu_power9(void); +void __restore_cpu_power10(void); + +void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_440ep(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_440epx(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_440gx(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_440grx(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_440spe(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_440x5(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_460ex(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_460gt(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_603(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_604(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_750(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_750cx(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_750fx(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_7400(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_7410(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_745x(unsigned long offset, struct cpu_spec *spec); + +void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_pa6t(void); +void __restore_cpu_ppc970(void); + +void __setup_cpu_e5500(unsigned long offset, struct cpu_spec *spec); +void __setup_cpu_e6500(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_e5500(void); +void __restore_cpu_e6500(void); +#endif /* _ASM_POWERPC_CPU_SETUP_H */ diff --git a/arch/powerpc/include/asm/cpu_setup_power.h b/arch/powerpc/include/asm/cpu_setup_power.h deleted file mode 100644 index 24be9131f803..000000000000 --- a/arch/powerpc/include/asm/cpu_setup_power.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2020 IBM Corporation - */ -void __setup_cpu_power7(unsigned long offset, struct cpu_spec *spec); -void __restore_cpu_power7(void); -void __setup_cpu_power8(unsigned long offset, struct cpu_spec *spec); -void __restore_cpu_power8(void); -void __setup_cpu_power9(unsigned long offset, struct cpu_spec *spec); -void __restore_cpu_power9(void); -void __setup_cpu_power10(unsigned long offset, struct cpu_spec *spec); -void __restore_cpu_power10(void); diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index ae8c3e13cfce..757dbded11dc 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -463,7 +463,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTRS_COMPATIBLE (CPU_FTR_PPCAS_ARCH_V2) #ifdef CONFIG_PPC64 -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 #define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500) #else #ifdef CONFIG_CPU_LITTLE_ENDIAN @@ -510,7 +510,7 @@ enum { #elif defined(CONFIG_44x) CPU_FTRS_44X | CPU_FTRS_440x6 | #endif -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 CPU_FTRS_E500 | CPU_FTRS_E500_2 | #endif #ifdef CONFIG_PPC_E500MC @@ -521,7 +521,7 @@ enum { #endif /* __powerpc64__ */ #ifdef CONFIG_PPC64 -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 #define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500) #else @@ -584,7 +584,7 @@ enum { #elif defined(CONFIG_44x) CPU_FTRS_44X & CPU_FTRS_440x6 & #endif -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 CPU_FTRS_E500 & CPU_FTRS_E500_2 & #endif #ifdef CONFIG_PPC_E500MC diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 6d2b27997492..431ae2343022 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -95,7 +95,7 @@ static notrace inline void account_stolen_time(void) struct lppaca *lp = local_paca->lppaca_ptr; if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx))) - accumulate_stolen_time(); + pseries_accumulate_stolen_time(); } #endif } diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h index 1625888f27ef..4bcb9f9ac764 100644 --- a/arch/powerpc/include/asm/dtl.h +++ b/arch/powerpc/include/asm/dtl.h @@ -37,14 +37,6 @@ struct dtl_entry { extern struct kmem_cache *dtl_cache; extern rwlock_t dtl_access_lock; -/* - * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls - * reading from the dispatch trace log. If other code wants to consume - * DTL entries, it can set this pointer to a function that will get - * called once for each DTL entry that gets processed. - */ -extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index); - extern void register_dtl_buffer(int cpu); extern void alloc_dtl_buffers(unsigned long *time_limit); extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity); diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 32ce0fb7548f..ea71f7245a63 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -7,8 +7,8 @@ #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/hugetlb.h> -#elif defined(CONFIG_PPC_FSL_BOOK3E) -#include <asm/nohash/hugetlb-book3e.h> +#elif defined(CONFIG_PPC_E500) +#include <asm/nohash/hugetlb-e500.h> #elif defined(CONFIG_PPC_8xx) #include <asm/nohash/32/hugetlb-8xx.h> #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 983551859891..77fa88c2aed0 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -157,36 +157,18 @@ static inline notrace void irq_soft_mask_set(unsigned long mask) static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask) { - unsigned long flags; - -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON(mask && !(mask & IRQS_DISABLED)); -#endif + unsigned long flags = irq_soft_mask_return(); - asm volatile( - "lbz %0,%1(13); stb %2,%1(13)" - : "=&r" (flags) - : "i" (offsetof(struct paca_struct, irq_soft_mask)), - "r" (mask) - : "memory"); + irq_soft_mask_set(mask); return flags; } static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask) { - unsigned long flags, tmp; - - asm volatile( - "lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)" - : "=&r" (flags), "=r" (tmp) - : "i" (offsetof(struct paca_struct, irq_soft_mask)), - "r" (mask) - : "memory"); + unsigned long flags = irq_soft_mask_return(); -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED)); -#endif + irq_soft_mask_set(flags | mask); return flags; } @@ -489,6 +471,30 @@ static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned l } #endif /* CONFIG_PPC64 */ +static inline unsigned long mtmsr_isync_irqsafe(unsigned long msr) +{ +#ifdef CONFIG_PPC64 + if (arch_irqs_disabled()) { + /* + * With soft-masking, MSR[EE] can change from 1 to 0 + * asynchronously when irqs are disabled, and we don't want to + * set MSR[EE] back to 1 here if that has happened. A race-free + * way to do this is ensure EE is already 0. Another way it + * could be done is with a RESTART_TABLE handler, but that's + * probably overkill here. + */ + msr &= ~MSR_EE; + mtmsr_isync(msr); + irq_soft_mask_set(IRQS_ALL_DISABLED); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + } else +#endif + mtmsr_isync(msr); + + return msr; +} + + #define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 8069dbc4b8d1..4745bb9998bd 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -74,6 +74,19 @@ #include <asm/kprobes.h> #include <asm/runlatch.h> +#ifdef CONFIG_PPC64 +/* + * WARN/BUG is handled with a program interrupt so minimise checks here to + * avoid recursion and maximise the chance of getting the first oops handled. + */ +#define INT_SOFT_MASK_BUG_ON(regs, cond) \ +do { \ + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && \ + (user_mode(regs) || (TRAP(regs) != INTERRUPT_PROGRAM))) \ + BUG_ON(cond); \ +} while (0) +#endif + #ifdef CONFIG_PPC_BOOK3S_64 extern char __end_soft_masked[]; bool search_kernel_soft_mask_table(unsigned long addr); @@ -170,8 +183,7 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs) * context. */ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - BUG_ON(!(regs->msr & MSR_EE)); + INT_SOFT_MASK_BUG_ON(regs, !(regs->msr & MSR_EE)); __hard_irq_enable(); } else { __hard_RI_enable(); @@ -194,19 +206,15 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs) * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (TRAP(regs) != INTERRUPT_PROGRAM) { - CT_WARN_ON(ct_state() != CONTEXT_KERNEL); - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - BUG_ON(is_implicit_soft_masked(regs)); - } - - /* Move this under a debugging check */ - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && - arch_irq_disabled_regs(regs)) - BUG_ON(search_kernel_restart_table(regs->nip)); + if (TRAP(regs) != INTERRUPT_PROGRAM) + CT_WARN_ON(ct_state() != CONTEXT_KERNEL && + ct_state() != CONTEXT_IDLE); + INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs)); + INT_SOFT_MASK_BUG_ON(regs, arch_irq_disabled_regs(regs) && + search_kernel_restart_table(regs->nip)); } - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE)); + INT_SOFT_MASK_BUG_ON(regs, !arch_irq_disabled_regs(regs) && + !(regs->msr & MSR_EE)); #endif booke_restore_dbcr0(); @@ -281,7 +289,7 @@ static inline bool nmi_disables_ftrace(struct pt_regs *regs) if (TRAP(regs) == INTERRUPT_PERFMON) return false; } - if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { if (TRAP(regs) == INTERRUPT_PERFMON) return false; } @@ -665,8 +673,7 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs) local_irq_enable(); } -long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, - unsigned long r0, struct pt_regs *regs); +long system_call_exception(struct pt_regs *regs, unsigned long r0); notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index f8d122d16af4..a1ddba01e7d1 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -3,7 +3,7 @@ #define _ASM_POWERPC_KEXEC_H #ifdef __KERNEL__ -#if defined(CONFIG_FSL_BOOKE) || defined(CONFIG_44x) +#if defined(CONFIG_PPC_85xx) || defined(CONFIG_44x) /* * On FSL-BookE we setup a 1:1 mapping which covers the first 2GiB of memory diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h index a9846b68c6b9..6fd2b4d486c5 100644 --- a/arch/powerpc/include/asm/kfence.h +++ b/arch/powerpc/include/asm/kfence.h @@ -11,11 +11,25 @@ #include <linux/mm.h> #include <asm/pgtable.h> +#ifdef CONFIG_PPC64_ELF_ABI_V1 +#define ARCH_FUNC_PREFIX "." +#endif + static inline bool arch_kfence_init_pool(void) { return true; } +#ifdef CONFIG_PPC64 +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + struct page *page = virt_to_page(addr); + + __kernel_map_pages(page, 1, !protect); + + return true; +} +#else static inline bool kfence_protect_page(unsigned long addr, bool protect) { pte_t *kpte = virt_to_kpte(addr); @@ -29,5 +43,6 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) return true; } +#endif #endif /* __ASM_POWERPC_KFENCE_H */ diff --git a/arch/powerpc/include/asm/kgdb.h b/arch/powerpc/include/asm/kgdb.h index a9e098a3b881..715c18b75334 100644 --- a/arch/powerpc/include/asm/kgdb.h +++ b/arch/powerpc/include/asm/kgdb.h @@ -52,7 +52,7 @@ static inline void arch_kgdb_breakpoint(void) /* On non-E500 family PPC32 we determine the size by picking the last * register we need, but on E500 we skip sections so we list what we * need to store, and add it up. */ -#ifndef CONFIG_E500 +#ifndef CONFIG_PPC_E500 #define MAXREG (PT_FPSCR+1) #else /* 32 GPRs (8 bytes), nip, msr, ccr, link, ctr, xer, acc (8 bytes), spefscr*/ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index c2b003550dc9..caea15dcb91d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -443,7 +443,7 @@ struct kvmppc_passthru_irqmap { }; #endif -# ifdef CONFIG_PPC_FSL_BOOK3E +# ifdef CONFIG_PPC_E500 #define KVMPPC_BOOKE_IAC_NUM 2 #define KVMPPC_BOOKE_DAC_NUM 2 # else diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9f625af3b65b..bfacf12784dd 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -104,7 +104,6 @@ extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, unsigned int gtlb_idx); -extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); @@ -153,7 +152,6 @@ extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu); extern int kvmppc_booke_init(void); extern void kvmppc_booke_exit(void); -extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu); extern void kvmppc_map_magic(struct kvm_vcpu *vcpu); @@ -162,8 +160,6 @@ extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info); extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order); extern void kvmppc_free_hpt(struct kvm_hpt_info *info); extern void kvmppc_rmap_reset(struct kvm *kvm); -extern long kvmppc_prepare_vrma(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index c390ec377bae..34d44cb17c87 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -104,14 +104,18 @@ struct lppaca { volatile __be32 dispersion_count; /* dispatch changed physical cpu */ volatile __be64 cmo_faults; /* CMO page fault count */ volatile __be64 cmo_fault_time; /* CMO page fault time */ - u8 reserved10[104]; + u8 reserved10[64]; /* [S]PURR expropriated/donated */ + volatile __be64 enqueue_dispatch_tb; /* Total TB enqueue->dispatch */ + volatile __be64 ready_enqueue_tb; /* Total TB ready->enqueue */ + volatile __be64 wait_ready_tb; /* Total TB wait->ready */ + u8 reserved11[16]; /* cacheline 4-5 */ __be32 page_ins; /* CMO Hint - # page ins by OS */ - u8 reserved11[148]; + u8 reserved12[148]; volatile __be64 dtl_idx; /* Dispatch Trace Log head index */ - u8 reserved12[96]; + u8 reserved13[96]; } ____cacheline_aligned; #define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 8cb83600c434..378b8d5836a7 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -204,7 +204,6 @@ struct machdep_calls { extern void e500_idle(void); extern void power4_idle(void); extern void ppc6xx_idle(void); -extern void book3e_idle(void); /* * ppc_md contains a copy of the machine description structure for the diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 860d0290ca4d..94b981152667 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -120,6 +120,9 @@ */ #define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) +// NX paste RMA reject in DSI +#define MMU_FTR_NX_DSI ASM_CONST(0x80000000) + /* MMU feature bit sets for various CPUs */ #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 (MMU_FTR_HPTE_TABLE | MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE) #define MMU_FTRS_POWER MMU_FTRS_DEFAULT_HPTE_ARCH_V2 @@ -141,7 +144,7 @@ typedef pte_t *pgtable_t; -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 #include <asm/percpu.h> DECLARE_PER_CPU(int, next_tlbcam_idx); #endif @@ -162,7 +165,7 @@ enum { #elif defined(CONFIG_44x) MMU_FTR_TYPE_44x | #endif -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX | #endif #ifdef CONFIG_PPC_BOOK3S_32 @@ -181,7 +184,7 @@ enum { #endif #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | - MMU_FTR_GTSE | + MMU_FTR_GTSE | MMU_FTR_NX_DSI | #endif /* CONFIG_PPC_RADIX_MMU */ #endif #ifdef CONFIG_PPC_KUAP @@ -211,7 +214,7 @@ enum { #elif defined(CONFIG_44x) #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_44x #endif -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E #endif diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 3f25bd3e14eb..c1ea270bb848 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -31,7 +31,6 @@ extern long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua, extern long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_init(struct mm_struct *mm); -extern void mm_iommu_cleanup(struct mm_struct *mm); extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, unsigned long ua, unsigned long size); extern struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm, @@ -117,7 +116,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea) } #endif -extern void switch_cop(struct mm_struct *next); extern int use_cop(unsigned long acop, struct mm_struct *mm); extern void drop_cop(unsigned long acop, struct mm_struct *mm); diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 9091e4904a6b..0d40b33184eb 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -130,10 +130,10 @@ void unmap_kernel_page(unsigned long va); #include <asm/nohash/32/pte-40x.h> #elif defined(CONFIG_44x) #include <asm/nohash/32/pte-44x.h> -#elif defined(CONFIG_FSL_BOOKE) && defined(CONFIG_PTE_64BIT) -#include <asm/nohash/pte-book3e.h> -#elif defined(CONFIG_FSL_BOOKE) -#include <asm/nohash/32/pte-fsl-booke.h> +#elif defined(CONFIG_PPC_85xx) && defined(CONFIG_PTE_64BIT) +#include <asm/nohash/pte-e500.h> +#elif defined(CONFIG_PPC_85xx) +#include <asm/nohash/32/pte-85xx.h> #elif defined(CONFIG_PPC_8xx) #include <asm/nohash/32/pte-8xx.h> #endif diff --git a/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h b/arch/powerpc/include/asm/nohash/32/pte-85xx.h index 0fc1bd42bb3e..93fb8e11a3f1 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h +++ b/arch/powerpc/include/asm/nohash/32/pte-85xx.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H -#define _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H +#ifndef _ASM_POWERPC_NOHASH_32_PTE_85xx_H +#define _ASM_POWERPC_NOHASH_32_PTE_85xx_H #ifdef __KERNEL__ /* PTE bit definitions for Freescale BookE SW loaded TLB MMU based @@ -71,4 +71,4 @@ #define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) #endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H */ +#endif /* _ASM_POWERPC_NOHASH_32_PTE_FSL_85xx_H */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 599921cc257e..879e9a6e5a87 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -70,7 +70,7 @@ /* * Include the PTE bits definitions */ -#include <asm/nohash/pte-book3e.h> +#include <asm/nohash/pte-e500.h> #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) diff --git a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h b/arch/powerpc/include/asm/nohash/hugetlb-e500.h index ecd8694cb229..8f04ad20e040 100644 --- a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h +++ b/arch/powerpc/include/asm/nohash/hugetlb-e500.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H -#define _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H +#ifndef _ASM_POWERPC_NOHASH_HUGETLB_E500_H +#define _ASM_POWERPC_NOHASH_HUGETLB_E500_H static inline pte_t *hugepd_page(hugepd_t hpd) { @@ -30,7 +30,7 @@ void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) { - /* We use the old format for PPC_FSL_BOOK3E */ + /* We use the old format for PPC_E500 */ *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift); } @@ -42,4 +42,4 @@ static inline int check_and_get_huge_psize(int shift) return shift_to_mmu_psize(shift); } -#endif /* _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H */ +#endif /* _ASM_POWERPC_NOHASH_HUGETLB_E500_H */ diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-e500.h index e43a418d3ccd..e43a418d3ccd 100644 --- a/arch/powerpc/include/asm/nohash/mmu-book3e.h +++ b/arch/powerpc/include/asm/nohash/mmu-e500.h diff --git a/arch/powerpc/include/asm/nohash/mmu.h b/arch/powerpc/include/asm/nohash/mmu.h index edc793e5f08f..e264be219fdb 100644 --- a/arch/powerpc/include/asm/nohash/mmu.h +++ b/arch/powerpc/include/asm/nohash/mmu.h @@ -8,9 +8,9 @@ #elif defined(CONFIG_44x) /* 44x-style software loaded TLB */ #include <asm/nohash/32/mmu-44x.h> -#elif defined(CONFIG_PPC_BOOK3E_MMU) +#elif defined(CONFIG_PPC_E500) /* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ -#include <asm/nohash/mmu-book3e.h> +#include <asm/nohash/mmu-e500.h> #elif defined (CONFIG_PPC_8xx) /* Motorola/Freescale 8xx software loaded TLB */ #include <asm/nohash/32/mmu-8xx.h> diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 29c43665a753..4b62376318e1 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -15,7 +15,7 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb, { } -#endif /* !CONFIG_PPC_BOOK3E */ +#endif /* !CONFIG_PPC_BOOK3E_64 */ static inline pgd_t *pgd_alloc(struct mm_struct *mm) { diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index b499da6c1a99..d9067dfc531c 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -11,31 +11,11 @@ /* Permission masks used for kernel mappings */ #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) #define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE) -#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ - _PAGE_NO_CACHE | _PAGE_GUARDED) +#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE | _PAGE_GUARDED) #define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) #define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO) #define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) -/* - * Protection used for kernel text. We want the debuggers to be able to - * set breakpoints anywhere, so don't write protect the kernel text - * on platforms where such control is possible. - */ -#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\ - defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE) -#define PAGE_KERNEL_TEXT PAGE_KERNEL_X -#else -#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX -#endif - -/* Make modules code happy. We don't set RO yet */ -#define PAGE_KERNEL_EXEC PAGE_KERNEL_X - -/* Advertise special mapping type for AGP */ -#define PAGE_AGP (PAGE_KERNEL_NC) -#define HAVE_PAGE_AGP - #ifndef __ASSEMBLY__ /* Generic accessors to PTE bits */ @@ -277,12 +257,6 @@ static inline int pud_huge(pud_t pud) return 0; } -static inline int pgd_huge(pgd_t pgd) -{ - return 0; -} -#define pgd_huge pgd_huge - #define is_hugepd(hpd) (hugepd_ok(hpd)) #endif @@ -292,7 +266,7 @@ static inline int pgd_huge(pgd_t pgd) * We use it to ensure coherency between the i-cache and d-cache * for the page which has just been mapped in. */ -#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_HUGETLB_PAGE) +#if defined(CONFIG_PPC_E500) && defined(CONFIG_HUGETLB_PAGE) void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); #else static inline diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-e500.h index f798640422c2..0934e8965e4e 100644 --- a/arch/powerpc/include/asm/nohash/pte-book3e.h +++ b/arch/powerpc/include/asm/nohash/pte-e500.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_PTE_BOOK3E_H -#define _ASM_POWERPC_NOHASH_PTE_BOOK3E_H +#ifndef _ASM_POWERPC_NOHASH_PTE_E500_H +#define _ASM_POWERPC_NOHASH_PTE_E500_H #ifdef __KERNEL__ /* PTE bit definitions for processors compliant to the Book3E @@ -126,4 +126,4 @@ static inline pte_t pte_mkexec(pte_t pte) #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_NOHASH_PTE_BOOK3E_H */ +#endif /* _ASM_POWERPC_NOHASH_PTE_E500_H */ diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h index 698935d4f72d..bdaf34ad41ea 100644 --- a/arch/powerpc/include/asm/nohash/tlbflush.h +++ b/arch/powerpc/include/asm/nohash/tlbflush.h @@ -18,7 +18,7 @@ /* * TLB flushing for software loaded TLB chips * - * TODO: (CONFIG_FSL_BOOKE) determine if flush_tlb_range & + * TODO: (CONFIG_PPC_85xx) determine if flush_tlb_range & * flush_tlb_kernel_range are best implemented as tlbia vs * specific tlbie's */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index bfd3142cd0ba..726125a534de 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -324,16 +324,10 @@ extern int opal_flush_console(uint32_t vtermno); extern void hvc_opal_init_early(void); -extern int opal_notifier_register(struct notifier_block *nb); -extern int opal_notifier_unregister(struct notifier_block *nb); - extern int opal_message_notifier_register(enum opal_msg_type msg_type, struct notifier_block *nb); extern int opal_message_notifier_unregister(enum opal_msg_type msg_type, struct notifier_block *nb); -extern void opal_notifier_enable(void); -extern void opal_notifier_disable(void); -extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val); extern int opal_async_get_token_interruptible(void); extern int opal_async_release_token(int token); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 4d7aaab82702..09f1790d0ae1 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -18,7 +18,7 @@ #include <asm/lppaca.h> #include <asm/mmu.h> #include <asm/page.h> -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 #include <asm/exception-64e.h> #else #include <asm/exception-64s.h> @@ -127,7 +127,7 @@ struct paca_struct { #endif #endif /* CONFIG_PPC_BOOK3S_64 */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 u64 exgen[8] __aligned(0x40); /* Keep pgd in the same cacheline as the start of extlb */ pgd_t *pgd __aligned(0x40); /* Current PGD */ @@ -151,7 +151,7 @@ struct paca_struct { void *dbg_kstack; struct tlb_core_data tcd; -#endif /* CONFIG_PPC_BOOK3E */ +#endif /* CONFIG_PPC_BOOK3E_64 */ #ifdef CONFIG_PPC_64S_HASH_MMU unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; @@ -168,7 +168,7 @@ struct paca_struct { #ifdef CONFIG_PPC64 u64 exit_save_r1; /* Syscall/interrupt R1 save */ #endif -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 u16 trap_save; /* Used when bad stack is encountered */ #endif #ifdef CONFIG_PPC_BOOK3S_64 @@ -263,7 +263,6 @@ struct paca_struct { u64 l1d_flush_size; #endif #ifdef CONFIG_PPC_PSERIES - struct rtas_args *rtas_args_reentrant; u8 *mce_data_buf; /* buffer to hold per cpu rtas errlog */ #endif /* CONFIG_PPC_PSERIES */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index e5f75c70eda8..edf1dd1b0ca9 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -31,7 +31,7 @@ extern unsigned int hpage_shift; #define HPAGE_SHIFT hpage_shift #elif defined(CONFIG_PPC_8xx) #define HPAGE_SHIFT 19 /* 512k pages */ -#elif defined(CONFIG_PPC_FSL_BOOK3E) +#elif defined(CONFIG_PPC_E500) #define HPAGE_SHIFT 22 /* 4M pages */ #endif #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) @@ -308,12 +308,6 @@ static inline bool pfn_valid(unsigned long pfn) #include <asm/pgtable-types.h> #endif - -#ifndef CONFIG_HUGETLB_PAGE -#define is_hugepd(pdep) (0) -#define pgd_huge(pgd) (0) -#endif /* CONFIG_HUGETLB_PAGE */ - struct page; extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); extern void copy_user_page(void *to, void *from, unsigned long vaddr, diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index eb7df559ae74..f5ba1a3c41f8 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -21,6 +21,18 @@ static inline bool is_shared_processor(void) return static_branch_unlikely(&shared_processor); } +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +u64 pseries_paravirt_steal_clock(int cpu); + +static inline u64 paravirt_steal_clock(int cpu) +{ + return pseries_paravirt_steal_clock(cpu); +} +#endif + /* If bit 0 is set, the cpu has been ceded, conferred, or preempted */ static inline u32 yield_count_of(int cpu) { diff --git a/arch/powerpc/include/asm/paravirt_api_clock.h b/arch/powerpc/include/asm/paravirt_api_clock.h new file mode 100644 index 000000000000..d25ca7ac57c7 --- /dev/null +++ b/arch/powerpc/include/asm/paravirt_api_clock.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <asm/paravirt.h> diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h index b169bbf95fcb..82633200b500 100644 --- a/arch/powerpc/include/asm/pgtable-be-types.h +++ b/arch/powerpc/include/asm/pgtable-be-types.h @@ -101,6 +101,7 @@ static inline bool pmd_xchg(pmd_t *pmdp, pmd_t old, pmd_t new) return pmd_raw(old) == prev; } +#ifdef CONFIG_ARCH_HAS_HUGEPD typedef struct { __be64 pdbe; } hugepd_t; #define __hugepd(x) ((hugepd_t) { cpu_to_be64(x) }) @@ -108,5 +109,6 @@ static inline unsigned long hpd_val(hugepd_t x) { return be64_to_cpu(x.pdbe); } +#endif #endif /* _ASM_POWERPC_PGTABLE_BE_TYPES_H */ diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index efed0db7b1db..082c85cc09b1 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -83,11 +83,13 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) } #endif +#ifdef CONFIG_ARCH_HAS_HUGEPD typedef struct { unsigned long pd; } hugepd_t; #define __hugepd(x) ((hugepd_t) { (x) }) static inline unsigned long hpd_val(hugepd_t x) { return x.pd; } +#endif #endif /* _ASM_POWERPC_PGTABLE_TYPES_H */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 33f4bf8d22b0..283f40d05a4d 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -20,6 +20,25 @@ struct mm_struct; #include <asm/nohash/pgtable.h> #endif /* !CONFIG_PPC_BOOK3S */ +/* + * Protection used for kernel text. We want the debuggers to be able to + * set breakpoints anywhere, so don't write protect the kernel text + * on platforms where such control is possible. + */ +#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) || \ + defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE) +#define PAGE_KERNEL_TEXT PAGE_KERNEL_X +#else +#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX +#endif + +/* Make modules code happy. We don't set RO yet */ +#define PAGE_KERNEL_EXEC PAGE_KERNEL_X + +/* Advertise special mapping type for AGP */ +#define PAGE_AGP (PAGE_KERNEL_NC) +#define HAVE_PAGE_AGP + #ifndef __ASSEMBLY__ #ifndef MAX_PTRS_PER_PGD diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index c6d724104ed1..21e33e46f4b8 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -330,6 +330,7 @@ #define __PPC_XSP(s) ((((s) & 0x1e) | (((s) >> 5) & 0x1)) << 21) #define __PPC_XTP(s) __PPC_XSP(s) #define __PPC_T_TLB(t) (((t) & 0x3) << 21) +#define __PPC_PL(p) (((p) & 0x3) << 16) #define __PPC_WC(w) (((w) & 0x3) << 21) #define __PPC_WS(w) (((w) & 0x1f) << 11) #define __PPC_SH(s) __PPC_WS(s) @@ -388,7 +389,8 @@ #define PPC_RAW_RFDI (0x4c00004e) #define PPC_RAW_RFMCI (0x4c00004c) #define PPC_RAW_TLBILX(t, a, b) (0x7c000024 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_RAW_WAIT(w) (0x7c00007c | __PPC_WC(w)) +#define PPC_RAW_WAIT_v203 (0x7c00007c) +#define PPC_RAW_WAIT(w, p) (0x7c00003c | __PPC_WC(w) | __PPC_PL(p)) #define PPC_RAW_TLBIE(lp, a) (0x7c000264 | ___PPC_RB(a) | ___PPC_RS(lp)) #define PPC_RAW_TLBIE_5(rb, rs, ric, prs, r) \ (0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) @@ -606,7 +608,8 @@ #define PPC_TLBILX_ALL(a, b) PPC_TLBILX(0, a, b) #define PPC_TLBILX_PID(a, b) PPC_TLBILX(1, a, b) #define PPC_TLBILX_VA(a, b) PPC_TLBILX(3, a, b) -#define PPC_WAIT(w) stringify_in_c(.long PPC_RAW_WAIT(w)) +#define PPC_WAIT_v203 stringify_in_c(.long PPC_RAW_WAIT_v203) +#define PPC_WAIT(w, p) stringify_in_c(.long PPC_RAW_WAIT(w, p)) #define PPC_TLBIE(lp, a) stringify_in_c(.long PPC_RAW_TLBIE(lp, a)) #define PPC_TLBIE_5(rb, rs, ric, prs, r) \ stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r)) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 83c02f5a7f2a..753a2757bcd4 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -34,6 +34,20 @@ .endm /* + * This expands to a sequence of register clears for regs start to end + * inclusive, of the form: + * + * li rN, 0 + */ +.macro ZEROIZE_REGS start, end + .Lreg=\start + .rept (\end - \start + 1) + li .Lreg, 0 + .Lreg=.Lreg+1 + .endr +.endm + +/* * Macros for storing registers into and loading registers from * exception frames. */ @@ -49,6 +63,14 @@ #define REST_NVGPRS(base) REST_GPRS(13, 31, base) #endif +#define ZEROIZE_GPRS(start, end) ZEROIZE_REGS start, end +#ifdef __powerpc64__ +#define ZEROIZE_NVGPRS() ZEROIZE_GPRS(14, 31) +#else +#define ZEROIZE_NVGPRS() ZEROIZE_GPRS(13, 31) +#endif +#define ZEROIZE_GPR(n) ZEROIZE_GPRS(n, n) + #define SAVE_GPR(n, base) SAVE_GPRS(n, n, base) #define REST_GPR(n, base) REST_GPRS(n, n, base) @@ -305,6 +327,12 @@ n: #ifdef __powerpc64__ +#define __LOAD_PACA_TOC(reg) \ + ld reg,PACATOC(r13) + +#define LOAD_PACA_TOC() \ + __LOAD_PACA_TOC(r2) + #define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr #define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr) \ @@ -315,7 +343,19 @@ n: rldimi reg, tmp, 32, 0 #define LOAD_REG_ADDR(reg,name) \ - ld reg,name@got(r2) + addis reg,r2,name@toc@ha; \ + addi reg,reg,name@toc@l + +#ifdef CONFIG_PPC_BOOK3E_64 +/* + * This is used in register-constrained interrupt handlers. Not to be used + * by BOOK3S. ld complains with "got/toc optimization is not supported" if r2 + * is not used for the TOC offset, so use @got(tocreg). If the interrupt + * handlers saved r2 instead, LOAD_REG_ADDR could be used. + */ +#define LOAD_REG_ADDR_ALTTOC(reg,tocreg,name) \ + ld reg,name@got(tocreg) +#endif #define LOAD_REG_ADDRBASE(reg,name) LOAD_REG_ADDR(reg,name) #define ADDROFF(name) 0 @@ -342,7 +382,7 @@ n: #endif /* various errata or part fixups */ -#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E) +#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_E500) #define MFTB(dest) \ 90: mfspr dest, SPRN_TBRL; \ BEGIN_FTR_SECTION_NESTED(96); \ @@ -709,7 +749,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) * kernel is built for. */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 #define FIXUP_ENDIAN #else /* @@ -749,7 +789,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) .long 0x2402004c; /* hrfid */ \ 191: -#endif /* !CONFIG_PPC_BOOK3E */ +#endif /* !CONFIG_PPC_BOOK3E_64 */ #endif /* __ASSEMBLY__ */ @@ -768,7 +808,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) stringify_in_c(.llong (_target);) \ stringify_in_c(.previous) -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 #define BTB_FLUSH(reg) \ lis reg,BUCSR_INIT@h; \ ori reg,reg,BUCSR_INIT@l; \ @@ -776,6 +816,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) isync; #else #define BTB_FLUSH(reg) -#endif /* CONFIG_PPC_FSL_BOOK3E */ +#endif /* CONFIG_PPC_E500 */ #endif /* _ASM_POWERPC_PPC_ASM_H */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index fdfaae194ddd..631802999d59 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -75,7 +75,6 @@ extern int _chrp_type; struct task_struct; void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp); -void release_thread(struct task_struct *); #define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET] #define TS_CKFPR(i) ckfp_state.fpr[i][TS_FPROFFSET] @@ -355,11 +354,23 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #ifdef CONFIG_PPC64 -#define spin_begin() HMT_low() +#define spin_begin() \ + asm volatile(ASM_FTR_IFCLR( \ + "or 1,1,1", /* HMT_LOW */ \ + "nop", /* v3.1 uses pause_short in cpu_relax instead */ \ + %0) :: "i" (CPU_FTR_ARCH_31) : "memory") -#define spin_cpu_relax() barrier() +#define spin_cpu_relax() \ + asm volatile(ASM_FTR_IFCLR( \ + "nop", /* Before v3.1 use priority nops in spin_begin/end */ \ + PPC_WAIT(2, 0), /* aka pause_short */ \ + %0) :: "i" (CPU_FTR_ARCH_31) : "memory") -#define spin_end() HMT_medium() +#define spin_end() \ + asm volatile(ASM_FTR_IFCLR( \ + "or 2,2,2", /* HMT_MEDIUM */ \ + "nop", \ + %0) :: "i" (CPU_FTR_ARCH_31) : "memory") #endif @@ -426,6 +437,8 @@ extern int fix_alignment(struct pt_regs *); #endif int do_mathemu(struct pt_regs *regs); +int do_spe_mathemu(struct pt_regs *regs); +int speround_handler(struct pt_regs *regs); /* VMX copying */ int enter_vmx_usercopy(void); diff --git a/arch/powerpc/include/asm/ps3av.h b/arch/powerpc/include/asm/ps3av.h index 82db78fc169d..c8b0f2ffcd35 100644 --- a/arch/powerpc/include/asm/ps3av.h +++ b/arch/powerpc/include/asm/ps3av.h @@ -726,6 +726,4 @@ extern int ps3av_video_mode2res(u32, u32 *, u32 *); extern int ps3av_video_mute(int); extern int ps3av_audio_mute(int); extern int ps3av_audio_mute_analog(int); -extern int ps3av_dev_open(void); -extern int ps3av_dev_close(void); #endif /* _ASM_POWERPC_PS3AV_H_ */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index a03403695cd4..2efec6d87049 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -99,6 +99,13 @@ struct pt_regs #define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)) +// Always displays as "REGS" in memory dumps +#ifdef CONFIG_CPU_BIG_ENDIAN +#define STACK_FRAME_REGS_MARKER ASM_CONST(0x52454753) +#else +#define STACK_FRAME_REGS_MARKER ASM_CONST(0x53474552) +#endif + #ifdef __powerpc64__ /* @@ -115,7 +122,6 @@ struct pt_regs #define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */ -#define STACK_FRAME_REGS_MARKER ASM_CONST(0x7265677368657265) #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + \ STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) #define STACK_FRAME_MARKER 12 @@ -136,7 +142,6 @@ struct pt_regs #define KERNEL_REDZONE_SIZE 0 #define STACK_FRAME_OVERHEAD 16 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */ -#define STACK_FRAME_REGS_MARKER ASM_CONST(0x72656773) #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_FRAME_MARKER 2 #define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 17b8dcd9a40d..af56980b6cdb 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -246,7 +246,7 @@ #define PPC47x_MCSR_FPR 0x00800000 /* FPR parity error */ #define PPC47x_MCSR_IPR 0x00400000 /* Imprecise Machine Check Exception */ -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 /* All e500 */ #define MCSR_MCP 0x80000000UL /* Machine Check Input Pin */ #define MCSR_ICPERR 0x40000000UL /* I-Cache Parity Error */ @@ -282,7 +282,7 @@ #endif /* Bit definitions for the HID1 */ -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 /* e500v1/v2 */ #define HID1_PLL_CFG_MASK 0xfc000000 /* PLL_CFG input pins */ #define HID1_RFXE 0x00020000 /* Read fault exception enable */ @@ -545,7 +545,7 @@ #define TCR_FIE 0x00800000 /* FIT Interrupt Enable */ #define TCR_ARE 0x00400000 /* Auto Reload Enable */ -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 #define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \ (((tcr) & 0x1E0000) >> 15)) #else diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 00531af17ce0..56319aea646e 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -240,7 +240,6 @@ extern struct rtas_t rtas; extern int rtas_token(const char *service); extern int rtas_service_present(const char *service); extern int rtas_call(int token, int, int, int *, ...); -int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...); void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...); extern void __noreturn rtas_restart(char *cmd); diff --git a/arch/powerpc/include/asm/runlatch.h b/arch/powerpc/include/asm/runlatch.h index cfb390edf7d0..ceb66d761fe1 100644 --- a/arch/powerpc/include/asm/runlatch.h +++ b/arch/powerpc/include/asm/runlatch.h @@ -19,10 +19,9 @@ extern void __ppc64_runlatch_off(void); do { \ if (cpu_has_feature(CPU_FTR_CTRL) && \ test_thread_local_flags(_TLF_RUNLATCH)) { \ - unsigned long msr = mfmsr(); \ __hard_irq_disable(); \ __ppc64_runlatch_off(); \ - if (msr & MSR_EE) \ + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) \ __hard_irq_enable(); \ } \ } while (0) @@ -31,10 +30,9 @@ extern void __ppc64_runlatch_off(void); do { \ if (cpu_has_feature(CPU_FTR_CTRL) && \ !test_thread_local_flags(_TLF_RUNLATCH)) { \ - unsigned long msr = mfmsr(); \ __hard_irq_disable(); \ __ppc64_runlatch_on(); \ - if (msr & MSR_EE) \ + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) \ __hard_irq_enable(); \ } \ } while (0) diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 8be2c491c733..9c00c9c0ca8f 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -13,15 +13,26 @@ typedef struct func_desc func_desc_t; #include <asm-generic/sections.h> extern char __head_end[]; +extern char __srwx_boundary[]; + +/* Patch sites */ +extern s32 patch__call_flush_branch_caches1; +extern s32 patch__call_flush_branch_caches2; +extern s32 patch__call_flush_branch_caches3; +extern s32 patch__flush_count_cache_return; +extern s32 patch__flush_link_stack_return; +extern s32 patch__call_kvm_flush_link_stack; +extern s32 patch__call_kvm_flush_link_stack_p9; +extern s32 patch__memset_nocache, patch__memcpy_nocache; + +extern long flush_branch_caches; +extern long kvm_flush_link_stack; #ifdef __powerpc64__ extern char __start_interrupts[]; extern char __end_interrupts[]; -extern char __prom_init_toc_start[]; -extern char __prom_init_toc_end[]; - #ifdef CONFIG_PPC_POWERNV extern char start_real_trampolines[]; extern char end_real_trampolines[]; diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index d8c28902cf59..e29e83f8a89c 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -7,7 +7,6 @@ #ifndef __ASSEMBLY__ extern void ppc_printk_progress(char *s, unsigned short hex); -extern unsigned int rtas_data; extern unsigned long long memory_limit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); @@ -70,7 +69,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *start, void *end); static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { } #endif -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 void __init setup_spectre_v2(void); #else static inline void setup_spectre_v2(void) {} @@ -89,6 +88,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long pp, unsigned long r6, unsigned long r7, unsigned long kbase); +extern struct seq_buf ppc_hw_desc; + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index 7130176d8cb8..b0b4c64870d7 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -44,7 +44,7 @@ static inline void ppc_after_tlbiel_barrier(void) #if defined(__powerpc64__) # define LWSYNC lwsync -#elif defined(CONFIG_E500) +#elif defined(CONFIG_PPC_E500) # define LWSYNC \ START_LWSYNC_SECTION(96); \ sync; \ diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 25fc8ad9a27a..3dd36c5e334a 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -14,9 +14,16 @@ #include <linux/sched.h> #include <linux/thread_info.h> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER +typedef long (*syscall_fn)(const struct pt_regs *); +#else +typedef long (*syscall_fn)(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); +#endif + /* ftrace syscalls requires exporting the sys_call_table */ -extern const unsigned long sys_call_table[]; -extern const unsigned long compat_sys_call_table[]; +extern const syscall_fn sys_call_table[]; +extern const syscall_fn compat_sys_call_table[]; static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/powerpc/include/asm/syscall_wrapper.h b/arch/powerpc/include/asm/syscall_wrapper.h new file mode 100644 index 000000000000..67486c67e8a2 --- /dev/null +++ b/arch/powerpc/include/asm/syscall_wrapper.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * syscall_wrapper.h - powerpc specific wrappers to syscall definitions + * + * Based on arch/{x86,arm64}/include/asm/syscall_wrapper.h + */ + +#ifndef __ASM_POWERPC_SYSCALL_WRAPPER_H +#define __ASM_POWERPC_SYSCALL_WRAPPER_H + +struct pt_regs; + +#define SC_POWERPC_REGS_TO_ARGS(x, ...) \ + __MAP(x,__SC_ARGS \ + ,,regs->gpr[3],,regs->gpr[4],,regs->gpr[5] \ + ,,regs->gpr[6],,regs->gpr[7],,regs->gpr[8]) + +#define __SYSCALL_DEFINEx(x, name, ...) \ + long sys##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + long sys##name(const struct pt_regs *regs) \ + { \ + return __se_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__)); \ + } \ + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ + return ret; \ + } \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + long sys_##sname(const struct pt_regs *__unused); \ + ALLOW_ERROR_INJECTION(sys_##sname, ERRNO); \ + long sys_##sname(const struct pt_regs *__unused) + +#define COND_SYSCALL(name) \ + long sys_##name(const struct pt_regs *regs); \ + long __weak sys_##name(const struct pt_regs *regs) \ + { \ + return sys_ni_syscall(); \ + } + +#endif // __ASM_POWERPC_SYSCALL_WRAPPER_H diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h index a2b13e55254f..a1142496cd58 100644 --- a/arch/powerpc/include/asm/syscalls.h +++ b/arch/powerpc/include/asm/syscalls.h @@ -8,49 +8,147 @@ #include <linux/types.h> #include <linux/compat.h> +#include <asm/syscall.h> +#ifdef CONFIG_PPC64 +#include <asm/syscalls_32.h> +#endif +#include <asm/unistd.h> +#include <asm/ucontext.h> + +#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER +long sys_ni_syscall(void); +#else +long sys_ni_syscall(const struct pt_regs *regs); +#endif + struct rtas_args; -asmlinkage long sys_mmap(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, off_t offset); -asmlinkage long sys_mmap2(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff); -asmlinkage long ppc64_personality(unsigned long personality); -asmlinkage long sys_rtas(struct rtas_args __user *uargs); -int ppc_select(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct __kernel_old_timeval __user *tvp); -long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, - u32 len_high, u32 len_low); +/* + * long long munging: + * The 32 bit ABI passes long longs in an odd even register pair. + * High and low parts are swapped depending on endian mode, + * so define a macro (similar to mips linux32) to handle that. + */ +#ifdef __LITTLE_ENDIAN__ +#define merge_64(low, high) (((u64)high << 32) | low) +#else +#define merge_64(high, low) (((u64)high << 32) | low) +#endif + +/* + * PowerPC architecture-specific syscalls + */ + +#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER + +long sys_rtas(struct rtas_args __user *uargs); +#ifdef CONFIG_PPC64 +long sys_ppc64_personality(unsigned long personality); #ifdef CONFIG_COMPAT -unsigned long compat_sys_mmap2(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff); +long compat_sys_ppc64_personality(unsigned long personality); +#endif /* CONFIG_COMPAT */ +#endif /* CONFIG_PPC64 */ -compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, - u32 reg6, u32 pos1, u32 pos2); +long sys_swapcontext(struct ucontext __user *old_ctx, + struct ucontext __user *new_ctx, long ctx_size); +long sys_mmap(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t offset); +long sys_mmap2(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); +long sys_switch_endian(void); -compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, - u32 reg6, u32 pos1, u32 pos2); +#ifdef CONFIG_PPC32 +long sys_sigreturn(void); +long sys_debug_setcontext(struct ucontext __user *ctx, int ndbg, + struct sig_dbg_op __user *dbg); +#endif -compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count); +long sys_rt_sigreturn(void); -int compat_sys_truncate64(const char __user *path, u32 reg4, - unsigned long len1, unsigned long len2); +long sys_subpage_prot(unsigned long addr, + unsigned long len, u32 __user *map); -long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, u32 len1, u32 len2); +#ifdef CONFIG_COMPAT +long compat_sys_swapcontext(struct ucontext32 __user *old_ctx, + struct ucontext32 __user *new_ctx, + int ctx_size); +long compat_sys_old_getrlimit(unsigned int resource, + struct compat_rlimit __user *rlim); +long compat_sys_sigreturn(void); +long compat_sys_rt_sigreturn(void); +#endif /* CONFIG_COMPAT */ -int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1, - unsigned long len2); +/* + * Architecture specific signatures required by long long munging: + * The 32 bit ABI passes long longs in an odd even register pair. + * The following signatures provide a machine long parameter for + * each register that will be supplied. The implementation is + * responsible for combining parameter pairs. + */ -long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2, - size_t len, int advice); +#ifdef CONFIG_PPC32 +long sys_ppc_pread64(unsigned int fd, + char __user *ubuf, compat_size_t count, + u32 reg6, u32 pos1, u32 pos2); +long sys_ppc_pwrite64(unsigned int fd, + const char __user *ubuf, compat_size_t count, + u32 reg6, u32 pos1, u32 pos2); +long sys_ppc_readahead(int fd, u32 r4, + u32 offset1, u32 offset2, u32 count); +long sys_ppc_truncate64(const char __user *path, u32 reg4, + unsigned long len1, unsigned long len2); +long sys_ppc_ftruncate64(unsigned int fd, u32 reg4, + unsigned long len1, unsigned long len2); +long sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2, + size_t len, int advice); +#endif +#ifdef CONFIG_COMPAT +long compat_sys_mmap2(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); +long compat_sys_ppc_pread64(unsigned int fd, + char __user *ubuf, compat_size_t count, + u32 reg6, u32 pos1, u32 pos2); +long compat_sys_ppc_pwrite64(unsigned int fd, + const char __user *ubuf, compat_size_t count, + u32 reg6, u32 pos1, u32 pos2); +long compat_sys_ppc_readahead(int fd, u32 r4, + u32 offset1, u32 offset2, u32 count); +long compat_sys_ppc_truncate64(const char __user *path, u32 reg4, + unsigned long len1, unsigned long len2); +long compat_sys_ppc_ftruncate64(unsigned int fd, u32 reg4, + unsigned long len1, unsigned long len2); +long compat_sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2, + size_t len, int advice); +long compat_sys_ppc_sync_file_range2(int fd, unsigned int flags, + unsigned int offset1, + unsigned int offset2, + unsigned int nbytes1, + unsigned int nbytes2); +#endif /* CONFIG_COMPAT */ -long compat_sys_sync_file_range2(int fd, unsigned int flags, - unsigned int offset1, unsigned int offset2, - unsigned int nbytes1, unsigned int nbytes2); +#if defined(CONFIG_PPC32) || defined(CONFIG_COMPAT) +long sys_ppc_fadvise64_64(int fd, int advice, + u32 offset_high, u32 offset_low, + u32 len_high, u32 len_low); #endif +#else + +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) +#define __SYSCALL(nr, entry) \ + long entry(const struct pt_regs *regs); + +#ifdef CONFIG_PPC64 +#include <asm/syscall_table_64.h> +#else +#include <asm/syscall_table_32.h> +#endif /* CONFIG_PPC64 */ + +#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */ + #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_SYSCALLS_H */ diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/include/asm/syscalls_32.h index 2346f8c7ff2e..749255568be9 100644 --- a/arch/powerpc/kernel/ppc32.h +++ b/arch/powerpc/include/asm/syscalls_32.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _PPC64_PPC32_H -#define _PPC64_PPC32_H +#ifndef _ASM_POWERPC_SYSCALLS_32_H +#define _ASM_POWERPC_SYSCALLS_32_H #include <linux/compat.h> #include <asm/siginfo.h> @@ -57,4 +57,4 @@ struct ucontext32 { struct mcontext32 uc_mcontext; }; -#endif /* _PPC64_PPC32_H */ +#endif // _ASM_POWERPC_SYSCALLS_32_H diff --git a/arch/powerpc/include/asm/termios.h b/arch/powerpc/include/asm/termios.h deleted file mode 100644 index 205de8f8a9d3..000000000000 --- a/arch/powerpc/include/asm/termios.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Liberally adapted from alpha/termios.h. In particular, the c_cc[] - * fields have been reordered so that termio & termios share the - * common subset in the same order (for brain dead programs that don't - * know or care about the differences). - */ -#ifndef _ASM_POWERPC_TERMIOS_H -#define _ASM_POWERPC_TERMIOS_H - -#include <uapi/asm/termios.h> - -/* ^C ^\ del ^U ^D 1 0 0 0 0 ^W ^R ^Z ^Q ^S ^V ^U */ -#define INIT_C_CC "\003\034\177\025\004\001\000\000\000\000\027\022\032\021\023\026\025" - -#include <asm-generic/termios-base.h> - -#endif /* _ASM_POWERPC_TERMIOS_H */ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 1e5643a9b1f2..9f50766c4623 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -116,8 +116,9 @@ unsigned long long tb_to_ns(unsigned long long tb_ticks); void timer_broadcast_interrupt(void); -/* SPLPAR */ -void accumulate_stolen_time(void); +/* SPLPAR and VIRT_CPU_ACCOUNTING_NATIVE */ +void pseries_accumulate_stolen_time(void); +u64 pseries_calculate_stolen_time(u64 stop_tb); #endif /* __KERNEL__ */ #endif /* __POWERPC_TIME_H */ diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index b4aa0d88ce2c..b1f094728b35 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -15,13 +15,13 @@ extern void (*udbg_flush)(void); extern int (*udbg_getc)(void); extern int (*udbg_getc_poll)(void); -extern void udbg_puts(const char *s); -extern int udbg_write(const char *s, int n); +void udbg_puts(const char *s); +int udbg_write(const char *s, int n); -extern void register_early_udbg_console(void); -extern void udbg_printf(const char *fmt, ...) +void register_early_udbg_console(void); +void udbg_printf(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); -extern void udbg_progress(char *s, unsigned short hex); +void udbg_progress(char *s, unsigned short hex); void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride); void __init udbg_uart_init_pio(unsigned long port, unsigned int stride); @@ -31,28 +31,28 @@ unsigned int __init udbg_probe_uart_speed(unsigned int clock); struct device_node; void __init udbg_scc_init(int force_scc); -extern int udbg_adb_init(int force_btext); -extern void udbg_adb_init_early(void); - -extern void __init udbg_early_init(void); -extern void __init udbg_init_debug_lpar(void); -extern void __init udbg_init_debug_lpar_hvsi(void); -extern void __init udbg_init_pmac_realmode(void); -extern void __init udbg_init_maple_realmode(void); -extern void __init udbg_init_pas_realmode(void); -extern void __init udbg_init_rtas_panel(void); -extern void __init udbg_init_rtas_console(void); -extern void __init udbg_init_debug_beat(void); -extern void __init udbg_init_btext(void); -extern void __init udbg_init_44x_as1(void); -extern void __init udbg_init_40x_realmode(void); -extern void __init udbg_init_cpm(void); -extern void __init udbg_init_usbgecko(void); -extern void __init udbg_init_memcons(void); -extern void __init udbg_init_ehv_bc(void); -extern void __init udbg_init_ps3gelic(void); -extern void __init udbg_init_debug_opal_raw(void); -extern void __init udbg_init_debug_opal_hvsi(void); +int udbg_adb_init(int force_btext); +void udbg_adb_init_early(void); + +void __init udbg_early_init(void); +void __init udbg_init_debug_lpar(void); +void __init udbg_init_debug_lpar_hvsi(void); +void __init udbg_init_pmac_realmode(void); +void __init udbg_init_maple_realmode(void); +void __init udbg_init_pas_realmode(void); +void __init udbg_init_rtas_panel(void); +void __init udbg_init_rtas_console(void); +void __init udbg_init_btext(void); +void __init udbg_init_44x_as1(void); +void __init udbg_init_40x_realmode(void); +void __init udbg_init_cpm(void); +void __init udbg_init_usbgecko(void); +void __init udbg_init_memcons(void); +void __init udbg_init_ehv_bc(void); +void __init udbg_init_ps3gelic(void); +void __init udbg_init_debug_opal_raw(void); +void __init udbg_init_debug_opal_hvsi(void); +void __init udbg_init_debug_16550(void); #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_UDBG_H */ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index b1129b4ef57d..659a996c75aa 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -45,6 +45,7 @@ #define __ARCH_WANT_SYS_UTIME #define __ARCH_WANT_SYS_NEWFSTATAT #define __ARCH_WANT_COMPAT_STAT +#define __ARCH_WANT_COMPAT_FALLOCATE #define __ARCH_WANT_COMPAT_SYS_SENDFILE #endif #define __ARCH_WANT_SYS_FORK diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index 8542e9bbeead..7650b6ce14c8 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -2,9 +2,6 @@ #ifndef _ASM_POWERPC_VDSO_H #define _ASM_POWERPC_VDSO_H -/* Default map addresses for 32bit vDSO */ -#define VDSO32_MBASE 0x100000 - #define VDSO_VERSION_STRING LINUX_2.6.15 #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h index 8d79f994b4aa..80d13207c568 100644 --- a/arch/powerpc/include/asm/vdso/processor.h +++ b/arch/powerpc/include/asm/vdso/processor.h @@ -22,7 +22,13 @@ #endif #ifdef CONFIG_PPC64 -#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0) +#define cpu_relax() \ + asm volatile(ASM_FTR_IFCLR( \ + /* Pre-POWER10 uses low ; medium priority nops */ \ + "or 1,1,1 ; or 2,2,2", \ + /* POWER10 onward uses pause_short (wait 2,0) */ \ + PPC_WAIT(2, 0), \ + %0) :: "i" (CPU_FTR_ARCH_31) : "memory") #else #define cpu_relax() barrier() #endif diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h index 891c9d5eaabe..e9245f86a46c 100644 --- a/arch/powerpc/include/asm/vdso/timebase.h +++ b/arch/powerpc/include/asm/vdso/timebase.h @@ -12,7 +12,7 @@ * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit * version below in the else case of the ifdef. */ -#if defined(__powerpc64__) && (defined(CONFIG_PPC_CELL) || defined(CONFIG_E500)) +#if defined(__powerpc64__) && (defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_E500)) #define mftb() ({unsigned long rval; \ asm volatile( \ "90: mfspr %0, %2;\n" \ diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index e2e704eca5f6..89090485bec1 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -159,7 +159,6 @@ extern void xics_setup_cpu(void); extern void xics_update_irq_servers(void); extern void xics_set_cpu_giq(unsigned int gserver, unsigned int join); extern void xics_mask_unknown_vec(unsigned int vec); -extern irqreturn_t xics_ipi_dispatch(int cpu); extern void xics_smp_probe(void); extern void xics_register_ics(struct ics *ics); extern void xics_teardown_cpu(void); diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/85xx_entry_mapping.S index dedc17fac8f8..dedc17fac8f8 100644 --- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S +++ b/arch/powerpc/kernel/85xx_entry_mapping.S diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 06d2d1f78f71..9b6146056e48 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -73,6 +73,7 @@ obj-y := cputable.o syscalls.o \ obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o irq_64.o\ paca.o nvram_64.o note.o +obj-$(CONFIG_PPC32) += sys_ppc32.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32_wrapper.o obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o @@ -81,7 +82,7 @@ obj-$(CONFIG_PPC_DAWR) += dawr.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o -obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o +obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_64e.o obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o obj-$(CONFIG_PPC64) += vdso64_wrapper.o obj-$(CONFIG_ALTIVEC) += vecemu.o @@ -100,30 +101,28 @@ obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_FA_DUMP) += fadump.o obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o -ifdef CONFIG_PPC32 -obj-$(CONFIG_E500) += idle_e500.o -endif +obj-$(CONFIG_PPC_85xx) += idle_85xx.o obj-$(CONFIG_PPC_BOOK3S_32) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o obj-$(CONFIG_TAU) += tau_6xx.o obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o -ifdef CONFIG_FSL_BOOKE -obj-$(CONFIG_HIBERNATION) += swsusp_booke.o +ifdef CONFIG_PPC_85xx +obj-$(CONFIG_HIBERNATION) += swsusp_85xx.o else obj-$(CONFIG_HIBERNATION) += swsusp_$(BITS).o endif obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o obj-$(CONFIG_MODULES) += module.o module_$(BITS).o obj-$(CONFIG_44x) += cpu_setup_44x.o -obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o +obj-$(CONFIG_PPC_E500) += cpu_setup_e500.o obj-$(CONFIG_PPC_DOORBELL) += dbell.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o -extra-$(CONFIG_PPC64) := head_64.o -extra-$(CONFIG_PPC_BOOK3S_32) := head_book3s_32.o -extra-$(CONFIG_40x) := head_40x.o -extra-$(CONFIG_44x) := head_44x.o -extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o -extra-$(CONFIG_PPC_8xx) := head_8xx.o +obj-$(CONFIG_PPC64) += head_64.o +obj-$(CONFIG_PPC_BOOK3S_32) += head_book3s_32.o +obj-$(CONFIG_40x) += head_40x.o +obj-$(CONFIG_44x) += head_44x.o +obj-$(CONFIG_PPC_8xx) += head_8xx.o +obj-$(CONFIG_PPC_85xx) += head_85xx.o extra-y += vmlinux.lds obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o @@ -198,10 +197,10 @@ KCOV_INSTRUMENT_paca.o := n CFLAGS_setup_64.o += -fno-stack-protector CFLAGS_paca.o += -fno-stack-protector -extra-$(CONFIG_PPC_FPU) += fpu.o -extra-$(CONFIG_ALTIVEC) += vector.o -extra-$(CONFIG_PPC64) += entry_64.o -extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o +obj-$(CONFIG_PPC_FPU) += fpu.o +obj-$(CONFIG_ALTIVEC) += vector.o +obj-$(CONFIG_PPC64) += entry_64.o +obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 8c10f536e478..4ce2a4aa3985 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -59,7 +59,7 @@ #endif #endif -#if defined(CONFIG_PPC_FSL_BOOK3E) +#if defined(CONFIG_PPC_E500) #include "../mm/mmu_decl.h" #endif @@ -197,7 +197,7 @@ int main(void) OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled); -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 OFFSET(PACAPGD, paca_struct, pgd); OFFSET(PACA_KERNELPGD, paca_struct, kernel_pgd); OFFSET(PACA_EXGEN, paca_struct, exgen); @@ -213,7 +213,7 @@ int main(void) OFFSET(TCD_ESEL_NEXT, tlb_core_data, esel_next); OFFSET(TCD_ESEL_MAX, tlb_core_data, esel_max); OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first); -#endif /* CONFIG_PPC_BOOK3E */ +#endif /* CONFIG_PPC_BOOK3E_64 */ #ifdef CONFIG_PPC_BOOK3S_64 OFFSET(PACA_EXGEN, paca_struct, exgen); @@ -248,7 +248,7 @@ int main(void) #ifdef CONFIG_PPC64 OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1); #endif -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); #endif OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso); @@ -651,7 +651,7 @@ int main(void) DEFINE(PGD_T_LOG2, PGD_T_LOG2); DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam)); OFFSET(TLBCAM_MAS0, tlbcam, MAS0); OFFSET(TLBCAM_MAS1, tlbcam, MAS1); diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_e500.S index 4bf33f1b4193..2ab25161b0ad 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_e500.S @@ -12,7 +12,7 @@ #include <asm/processor.h> #include <asm/cputable.h> #include <asm/ppc_asm.h> -#include <asm/nohash/mmu-book3e.h> +#include <asm/nohash/mmu-e500.h> #include <asm/asm-offsets.h> #include <asm/mpc85xx.h> @@ -108,7 +108,7 @@ _GLOBAL(__setup_cpu_e6500) #endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 #ifndef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e500v1) _GLOBAL(__setup_cpu_e500v2) @@ -156,7 +156,7 @@ _GLOBAL(__setup_cpu_e5500) mtlr r5 blr #endif /* CONFIG_PPC_E500MC */ -#endif /* CONFIG_E500 */ +#endif /* CONFIG_PPC_E500 */ #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC_BOOK3E_64 diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c index 3dc61e203f37..097c033668f0 100644 --- a/arch/powerpc/kernel/cpu_setup_power.c +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -11,7 +11,7 @@ #include <asm/synch.h> #include <linux/bitops.h> #include <asm/cputable.h> -#include <asm/cpu_setup_power.h> +#include <asm/cpu_setup.h> /* Disable CPU_FTR_HVMODE and return false if MSR:HV is not set */ static bool init_hvmode_206(struct cpu_spec *t) diff --git a/arch/powerpc/kernel/cpu_specs.h b/arch/powerpc/kernel/cpu_specs.h new file mode 100644 index 000000000000..85ded3f77204 --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifdef CONFIG_40x +#include "cpu_specs_40x.h" +#endif + +#ifdef CONFIG_PPC_47x +#include "cpu_specs_47x.h" +#elif defined(CONFIG_44x) +#include "cpu_specs_44x.h" +#endif + +#ifdef CONFIG_PPC_8xx +#include "cpu_specs_8xx.h" +#endif + +#ifdef CONFIG_PPC_E500MC +#include "cpu_specs_e500mc.h" +#elif defined(CONFIG_PPC_85xx) +#include "cpu_specs_85xx.h" +#endif + +#ifdef CONFIG_PPC_BOOK3S_32 +#include "cpu_specs_book3s_32.h" +#endif + +#ifdef CONFIG_PPC_BOOK3S_64 +#include "cpu_specs_book3s_64.h" +#endif diff --git a/arch/powerpc/kernel/cpu_specs_40x.h b/arch/powerpc/kernel/cpu_specs_40x.h new file mode 100644 index 000000000000..a1362a75b8c8 --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_40x.h @@ -0,0 +1,280 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + */ + +static struct cpu_spec cpu_specs[] __initdata = { + { /* STB 04xxx */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x41810000, + .cpu_name = "STB04xxx", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* NP405L */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x41610000, + .cpu_name = "NP405L", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* NP4GS3 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x40B10000, + .cpu_name = "NP4GS3", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* NP405H */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x41410000, + .cpu_name = "NP405H", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405GPr */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x50910000, + .cpu_name = "405GPr", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* STBx25xx */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x51510000, + .cpu_name = "STBx25xx", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405LP */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x41F10000, + .cpu_name = "405LP", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EP */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x51210000, + .cpu_name = "405EP", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. A/B with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910007, + .cpu_name = "405EX Rev. A/B", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. C without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x1291000d, + .cpu_name = "405EX Rev. C", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. C with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x1291000f, + .cpu_name = "405EX Rev. C", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. D without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910003, + .cpu_name = "405EX Rev. D", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EX Rev. D with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910005, + .cpu_name = "405EX Rev. D", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. A/B without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910001, + .cpu_name = "405EXr Rev. A/B", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. C without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910009, + .cpu_name = "405EXr Rev. C", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. C with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x1291000b, + .cpu_name = "405EXr Rev. C", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. D without Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910000, + .cpu_name = "405EXr Rev. D", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* 405EXr Rev. D with Security */ + .pvr_mask = 0xffff000f, + .pvr_value = 0x12910002, + .cpu_name = "405EXr Rev. D", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { + /* 405EZ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x41510000, + .cpu_name = "405EZ", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* APM8018X */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x7ff11432, + .cpu_name = "APM8018X", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic 40x PPC)", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | + PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + } +}; diff --git a/arch/powerpc/kernel/cpu_specs_44x.h b/arch/powerpc/kernel/cpu_specs_44x.h new file mode 100644 index 000000000000..69c4cdc0cdee --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_44x.h @@ -0,0 +1,304 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + */ + +#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ + PPC_FEATURE_BOOKE) + +static struct cpu_spec cpu_specs[] __initdata = { + { + .pvr_mask = 0xf0000fff, + .pvr_value = 0x40000850, + .cpu_name = "440GR Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x40000858, + .cpu_name = "440EP Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440ep, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { + .pvr_mask = 0xf0000fff, + .pvr_value = 0x400008d3, + .cpu_name = "440GR Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */ + .pvr_mask = 0xf0000ff7, + .pvr_value = 0x400008d4, + .cpu_name = "440EP Rev. C", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440ep, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x400008db, + .cpu_name = "440EP Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440ep, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* 440GRX */ + .pvr_mask = 0xf0000ffb, + .pvr_value = 0x200008D0, + .cpu_name = "440GRX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440grx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */ + .pvr_mask = 0xf0000ffb, + .pvr_value = 0x200008D8, + .cpu_name = "440EPX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440epx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440GP Rev. B */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x40000440, + .cpu_name = "440GP Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440gp", + }, + { /* 440GP Rev. C */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x40000481, + .cpu_name = "440GP Rev. C", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440gp", + }, + { /* 440GX Rev. A */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x50000850, + .cpu_name = "440GX Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440gx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440GX Rev. B */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x50000851, + .cpu_name = "440GX Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440gx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440GX Rev. C */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x50000892, + .cpu_name = "440GX Rev. C", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440gx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440GX Rev. F */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x50000894, + .cpu_name = "440GX Rev. F", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440gx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440SP Rev. A */ + .pvr_mask = 0xfff00fff, + .pvr_value = 0x53200891, + .cpu_name = "440SP Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* 440SPe Rev. A */ + .pvr_mask = 0xfff00fff, + .pvr_value = 0x53400890, + .cpu_name = "440SPe Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440spe, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440SPe Rev. B */ + .pvr_mask = 0xfff00fff, + .pvr_value = 0x53400891, + .cpu_name = "440SPe Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440spe, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460EX */ + .pvr_mask = 0xffff0006, + .pvr_value = 0x13020002, + .cpu_name = "460EX", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460ex, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460EX Rev B */ + .pvr_mask = 0xffff0007, + .pvr_value = 0x13020004, + .cpu_name = "460EX Rev. B", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460ex, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460GT */ + .pvr_mask = 0xffff0006, + .pvr_value = 0x13020000, + .cpu_name = "460GT", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460gt, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460GT Rev B */ + .pvr_mask = 0xffff0007, + .pvr_value = 0x13020005, + .cpu_name = "460GT Rev. B", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460gt, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460SX */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x13541800, + .cpu_name = "460SX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460sx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 464 in APM821xx */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x12C41C80, + .cpu_name = "APM821XX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_apm821xx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic 44x PPC)", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + } +}; diff --git a/arch/powerpc/kernel/cpu_specs_47x.h b/arch/powerpc/kernel/cpu_specs_47x.h new file mode 100644 index 000000000000..3143cd504a51 --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_47x.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + */ + +#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ + PPC_FEATURE_BOOKE) + +static struct cpu_spec cpu_specs[] __initdata = { + { /* 476 DD2 core */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x11a52080, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | + MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* 476fpe */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x7ff50000, + .cpu_name = "476fpe", + .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | + MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* 476 iss */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00050000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | + MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* 476 others */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x11a50000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | + MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic 47x PPC)", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_47x, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + } +}; diff --git a/arch/powerpc/kernel/cpu_specs_85xx.h b/arch/powerpc/kernel/cpu_specs_85xx.h new file mode 100644 index 000000000000..aaae202c1a89 --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_85xx.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + */ + +#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ + PPC_FEATURE_BOOKE) + +static struct cpu_spec cpu_specs[] __initdata = { + { /* e500 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80200000, + .cpu_name = "e500", + .cpu_features = CPU_FTRS_E500, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE_COMP, + .cpu_user_features2 = PPC_FEATURE2_ISEL, + .mmu_features = MMU_FTR_TYPE_FSL_E, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_e500v1, + .machine_check = machine_check_e500, + .platform = "ppc8540", + }, + { /* e500v2 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80210000, + .cpu_name = "e500v2", + .cpu_features = CPU_FTRS_E500_2, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE_COMP | + PPC_FEATURE_HAS_EFP_DOUBLE_COMP, + .cpu_user_features2 = PPC_FEATURE2_ISEL, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_e500v2, + .machine_check = machine_check_e500, + .platform = "ppc8548", + .cpu_down_flush = cpu_down_flush_e500v2, + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic E500 PPC)", + .cpu_features = CPU_FTRS_E500, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE_COMP, + .mmu_features = MMU_FTR_TYPE_FSL_E, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_e500, + .platform = "powerpc", + } +}; diff --git a/arch/powerpc/kernel/cpu_specs_8xx.h b/arch/powerpc/kernel/cpu_specs_8xx.h new file mode 100644 index 000000000000..93ddbc202ba3 --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_8xx.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + */ + +static struct cpu_spec cpu_specs[] __initdata = { + { /* 8xx */ + .pvr_mask = 0xffff0000, + .pvr_value = PVR_8xx, + .cpu_name = "8xx", + /* + * CPU_FTR_MAYBE_CAN_DOZE is possible, + * if the 8xx code is there.... + */ + .cpu_features = CPU_FTRS_8XX, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .mmu_features = MMU_FTR_TYPE_8xx, + .icache_bsize = 16, + .dcache_bsize = 16, + .machine_check = machine_check_8xx, + .platform = "ppc823", + }, +}; diff --git a/arch/powerpc/kernel/cpu_specs_book3s_32.h b/arch/powerpc/kernel/cpu_specs_book3s_32.h new file mode 100644 index 000000000000..3714634d194a --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_book3s_32.h @@ -0,0 +1,605 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + */ + +#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \ + PPC_FEATURE_HAS_MMU) + +static struct cpu_spec cpu_specs[] __initdata = { +#ifdef CONFIG_PPC_BOOK3S_603 + { /* 603 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00030000, + .cpu_name = "603", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603e */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00060000, + .cpu_name = "603e", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603ev */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00070000, + .cpu_name = "603ev", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 82xx (8240, 8245, 8260 are all 603e cores) */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00810000, + .cpu_name = "82xx", + .cpu_features = CPU_FTRS_82XX, + .cpu_user_features = COMMON_USER, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* All G2_LE (603e core, plus some) have the same pvr */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00820000, + .cpu_name = "G2_LE", + .cpu_features = CPU_FTRS_G2_LE, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, +#ifdef CONFIG_PPC_83xx + { /* e300c1 (a 603e core, plus some) on 83xx */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00830000, + .cpu_name = "e300c1", + .cpu_features = CPU_FTRS_E300, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_83xx, + .platform = "ppc603", + }, + { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00840000, + .cpu_name = "e300c2", + .cpu_features = CPU_FTRS_E300C2, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .mmu_features = MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_83xx, + .platform = "ppc603", + }, + { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00850000, + .cpu_name = "e300c3", + .cpu_features = CPU_FTRS_E300, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_83xx, + .num_pmcs = 4, + .platform = "ppc603", + }, + { /* e300c4 (e300c1, plus one IU) */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00860000, + .cpu_name = "e300c4", + .cpu_features = CPU_FTRS_E300, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_83xx, + .num_pmcs = 4, + .platform = "ppc603", + }, +#endif +#endif /* CONFIG_PPC_BOOK3S_603 */ +#ifdef CONFIG_PPC_BOOK3S_604 + { /* 604 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00040000, + .cpu_name = "604", + .cpu_features = CPU_FTRS_604, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 2, + .cpu_setup = __setup_cpu_604, + .machine_check = machine_check_generic, + .platform = "ppc604", + }, + { /* 604e */ + .pvr_mask = 0xfffff000, + .pvr_value = 0x00090000, + .cpu_name = "604e", + .cpu_features = CPU_FTRS_604, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_604, + .machine_check = machine_check_generic, + .platform = "ppc604", + }, + { /* 604r */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00090000, + .cpu_name = "604r", + .cpu_features = CPU_FTRS_604, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_604, + .machine_check = machine_check_generic, + .platform = "ppc604", + }, + { /* 604ev */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x000a0000, + .cpu_name = "604ev", + .cpu_features = CPU_FTRS_604, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_604, + .machine_check = machine_check_generic, + .platform = "ppc604", + }, + { /* 740/750 (0x4202, don't support TAU ?) */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x00084202, + .cpu_name = "740/750", + .cpu_features = CPU_FTRS_740_NOTAU, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CX (80100 and 8010x?) */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x00080100, + .cpu_name = "750CX", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_750cx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CX (82201 and 82202) */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x00082200, + .cpu_name = "750CX", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750cx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CXe (82214) */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x00082210, + .cpu_name = "750CXe", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750cx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CXe "Gekko" (83214) */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x00083214, + .cpu_name = "750CXe", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750cx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CL (and "Broadway") */ + .pvr_mask = 0xfffff0e0, + .pvr_value = 0x00087000, + .cpu_name = "750CL", + .cpu_features = CPU_FTRS_750CL, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 745/755 */ + .pvr_mask = 0xfffff000, + .pvr_value = 0x00083000, + .cpu_name = "745/755", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750FX rev 1.x */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x70000100, + .cpu_name = "750FX", + .cpu_features = CPU_FTRS_750FX1, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750FX rev 2.0 must disable HID0[DPM] */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x70000200, + .cpu_name = "750FX", + .cpu_features = CPU_FTRS_750FX2, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750FX (All revs except 2.0) */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x70000000, + .cpu_name = "750FX", + .cpu_features = CPU_FTRS_750FX, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750fx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750GX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x70020000, + .cpu_name = "750GX", + .cpu_features = CPU_FTRS_750GX, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750fx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 740/750 (L2CR bit need fixup for 740) */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00080000, + .cpu_name = "740/750", + .cpu_features = CPU_FTRS_740, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 7400 rev 1.1 ? (no TAU) */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x000c1101, + .cpu_name = "7400 (1.1)", + .cpu_features = CPU_FTRS_7400_NOTAU, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_7400, + .machine_check = machine_check_generic, + .platform = "ppc7400", + }, + { /* 7400 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x000c0000, + .cpu_name = "7400", + .cpu_features = CPU_FTRS_7400, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_7400, + .machine_check = machine_check_generic, + .platform = "ppc7400", + }, + { /* 7410 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x800c0000, + .cpu_name = "7410", + .cpu_features = CPU_FTRS_7400, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_7410, + .machine_check = machine_check_generic, + .platform = "ppc7400", + }, + { /* 7450 2.0 - no doze/nap */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80000200, + .cpu_name = "7450", + .cpu_features = CPU_FTRS_7450_20, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7450 2.1 */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80000201, + .cpu_name = "7450", + .cpu_features = CPU_FTRS_7450_21, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7450 2.3 and newer */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80000000, + .cpu_name = "7450", + .cpu_features = CPU_FTRS_7450_23, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7455 rev 1.x */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x80010100, + .cpu_name = "7455", + .cpu_features = CPU_FTRS_7455_1, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7455 rev 2.0 */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80010200, + .cpu_name = "7455", + .cpu_features = CPU_FTRS_7455_20, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7455 others */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80010000, + .cpu_name = "7455", + .cpu_features = CPU_FTRS_7455, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7447/7457 Rev 1.0 */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80020100, + .cpu_name = "7447/7457", + .cpu_features = CPU_FTRS_7447_10, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7447/7457 Rev 1.1 */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80020101, + .cpu_name = "7447/7457", + .cpu_features = CPU_FTRS_7447_10, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7447/7457 Rev 1.2 and later */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80020000, + .cpu_name = "7447/7457", + .cpu_features = CPU_FTRS_7447, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7447A */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80030000, + .cpu_name = "7447A", + .cpu_features = CPU_FTRS_7447A, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7448 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80040000, + .cpu_name = "7448", + .cpu_features = CPU_FTRS_7448, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* default match, we assume split I/D cache & TB (non-601)... */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic PPC)", + .cpu_features = CPU_FTRS_CLASSIC32, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, +#endif /* CONFIG_PPC_BOOK3S_604 */ +}; diff --git a/arch/powerpc/kernel/cpu_specs_book3s_64.h b/arch/powerpc/kernel/cpu_specs_book3s_64.h new file mode 100644 index 000000000000..c370c1b804a9 --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_book3s_64.h @@ -0,0 +1,481 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> + */ + +/* NOTE: + * Unlike ppc32, ppc64 will only call cpu_setup() for the boot CPU, it's + * the responsibility of the appropriate CPU save/restore functions to + * eventually copy these settings over. Those save/restore aren't yet + * part of the cputable though. That has to be fixed for both ppc32 + * and ppc64 + */ +#define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \ + PPC_FEATURE_HAS_MMU | PPC_FEATURE_64) +#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4) +#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) +#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) +#define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_PSERIES_PERFMON_COMPAT) +#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_PSERIES_PERFMON_COMPAT) +#define COMMON_USER2_POWER7 (PPC_FEATURE2_DSCR) +#define COMMON_USER_POWER8 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_PSERIES_PERFMON_COMPAT) +#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \ + PPC_FEATURE2_HTM_COMP | \ + PPC_FEATURE2_HTM_NOSC_COMP | \ + PPC_FEATURE2_DSCR | \ + PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ + PPC_FEATURE2_VEC_CRYPTO) +#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_HAS_ALTIVEC_COMP) +#define COMMON_USER_POWER9 COMMON_USER_POWER8 +#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ + PPC_FEATURE2_ARCH_3_00 | \ + PPC_FEATURE2_HAS_IEEE128 | \ + PPC_FEATURE2_DARN | \ + PPC_FEATURE2_SCV) +#define COMMON_USER_POWER10 COMMON_USER_POWER9 +#define COMMON_USER2_POWER10 (PPC_FEATURE2_ARCH_3_1 | \ + PPC_FEATURE2_MMA | \ + PPC_FEATURE2_ARCH_3_00 | \ + PPC_FEATURE2_HAS_IEEE128 | \ + PPC_FEATURE2_DARN | \ + PPC_FEATURE2_SCV | \ + PPC_FEATURE2_ARCH_2_07 | \ + PPC_FEATURE2_DSCR | \ + PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ + PPC_FEATURE2_VEC_CRYPTO) + +static struct cpu_spec cpu_specs[] __initdata = { + { /* PPC970 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00390000, + .cpu_name = "PPC970", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970, + .cpu_restore = __restore_cpu_ppc970, + .platform = "ppc970", + }, + { /* PPC970FX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003c0000, + .cpu_name = "PPC970FX", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970, + .cpu_restore = __restore_cpu_ppc970, + .platform = "ppc970", + }, + { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x00440100, + .cpu_name = "PPC970MP", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970, + .cpu_restore = __restore_cpu_ppc970, + .platform = "ppc970", + }, + { /* PPC970MP */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00440000, + .cpu_name = "PPC970MP", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970MP, + .cpu_restore = __restore_cpu_ppc970, + .platform = "ppc970", + }, + { /* PPC970GX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00450000, + .cpu_name = "PPC970GX", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970, + .platform = "ppc970", + }, + { /* Power5 GR */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003a0000, + .cpu_name = "POWER5 (gr)", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5, + .mmu_features = MMU_FTRS_POWER5, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .platform = "power5", + }, + { /* Power5++ */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x003b0300, + .cpu_name = "POWER5+ (gs)", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5_PLUS, + .mmu_features = MMU_FTRS_POWER5, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .platform = "power5+", + }, + { /* Power5 GS */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003b0000, + .cpu_name = "POWER5+ (gs)", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5_PLUS, + .mmu_features = MMU_FTRS_POWER5, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .platform = "power5+", + }, + { /* POWER6 in P5+ mode; 2.04-compliant processor */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000001, + .cpu_name = "POWER5+", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5_PLUS, + .mmu_features = MMU_FTRS_POWER5, + .icache_bsize = 128, + .dcache_bsize = 128, + .platform = "power5+", + }, + { /* Power6 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003e0000, + .cpu_name = "POWER6 (raw)", + .cpu_features = CPU_FTRS_POWER6, + .cpu_user_features = COMMON_USER_POWER6 | PPC_FEATURE_POWER6_EXT, + .mmu_features = MMU_FTRS_POWER6, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .platform = "power6x", + }, + { /* 2.05-compliant processor, i.e. Power6 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000002, + .cpu_name = "POWER6 (architected)", + .cpu_features = CPU_FTRS_POWER6, + .cpu_user_features = COMMON_USER_POWER6, + .mmu_features = MMU_FTRS_POWER6, + .icache_bsize = 128, + .dcache_bsize = 128, + .platform = "power6", + }, + { /* 2.06-compliant processor, i.e. Power7 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000003, + .cpu_name = "POWER7 (architected)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .cpu_user_features2 = COMMON_USER2_POWER7, + .mmu_features = MMU_FTRS_POWER7, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, + .machine_check_early = __machine_check_early_realmode_p7, + .platform = "power7", + }, + { /* 2.07-compliant processor, i.e. Power8 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000004, + .cpu_name = "POWER8 (architected)", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, + { /* 3.00-compliant processor, i.e. Power9 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000005, + .cpu_name = "POWER9 (architected)", + .cpu_features = CPU_FTRS_POWER9, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .platform = "power9", + }, + { /* 3.1-compliant processor, i.e. Power10 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000006, + .cpu_name = "POWER10 (architected)", + .cpu_features = CPU_FTRS_POWER10, + .cpu_user_features = COMMON_USER_POWER10, + .cpu_user_features2 = COMMON_USER2_POWER10, + .mmu_features = MMU_FTRS_POWER10, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .platform = "power10", + }, + { /* Power7 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003f0000, + .cpu_name = "POWER7 (raw)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .cpu_user_features2 = COMMON_USER2_POWER7, + .mmu_features = MMU_FTRS_POWER7, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, + .machine_check_early = __machine_check_early_realmode_p7, + .platform = "power7", + }, + { /* Power7+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004A0000, + .cpu_name = "POWER7+ (raw)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .cpu_user_features2 = COMMON_USER2_POWER7, + .mmu_features = MMU_FTRS_POWER7, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, + .machine_check_early = __machine_check_early_realmode_p7, + .platform = "power7+", + }, + { /* Power8E */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004b0000, + .cpu_name = "POWER8E (raw)", + .cpu_features = CPU_FTRS_POWER8E, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, + { /* Power8NVL */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004c0000, + .cpu_name = "POWER8NVL (raw)", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, + { /* Power8 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004d0000, + .cpu_name = "POWER8 (raw)", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, + { /* Power9 DD2.0 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0200, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD2_0, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .machine_check_early = __machine_check_early_realmode_p9, + .platform = "power9", + }, + { /* Power9 DD 2.1 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0201, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD2_1, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .machine_check_early = __machine_check_early_realmode_p9, + .platform = "power9", + }, + { /* Power9 DD2.2 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0202, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD2_2, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .machine_check_early = __machine_check_early_realmode_p9, + .platform = "power9", + }, + { /* Power9 DD2.3 or later */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004e0000, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD2_3, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .machine_check_early = __machine_check_early_realmode_p9, + .platform = "power9", + }, + { /* Power10 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00800000, + .cpu_name = "POWER10 (raw)", + .cpu_features = CPU_FTRS_POWER10, + .cpu_user_features = COMMON_USER_POWER10, + .cpu_user_features2 = COMMON_USER2_POWER10, + .mmu_features = MMU_FTRS_POWER10, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .machine_check_early = __machine_check_early_realmode_p10, + .platform = "power10", + }, + { /* Cell Broadband Engine */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00700000, + .cpu_name = "Cell Broadband Engine", + .cpu_features = CPU_FTRS_CELL, + .cpu_user_features = COMMON_USER_PPC64 | PPC_FEATURE_CELL | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_SMT, + .mmu_features = MMU_FTRS_CELL, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .platform = "ppc-cell-be", + }, + { /* PA Semi PA6T */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00900000, + .cpu_name = "PA6T", + .cpu_features = CPU_FTRS_PA6T, + .cpu_user_features = COMMON_USER_PA6T, + .mmu_features = MMU_FTRS_PA6T, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 6, + .pmc_type = PPC_PMC_PA6T, + .cpu_setup = __setup_cpu_pa6t, + .cpu_restore = __restore_cpu_pa6t, + .platform = "pa6t", + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "POWER5 (compatible)", + .cpu_features = CPU_FTRS_COMPATIBLE, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTRS_POWER, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .platform = "power5", + } +}; diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h new file mode 100644 index 000000000000..ceb06b109f83 --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_e500mc.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> + */ + +#ifdef CONFIG_PPC64 +#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ + PPC_FEATURE_HAS_FPU | PPC_FEATURE_64) +#else +#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ + PPC_FEATURE_BOOKE) +#endif + +static struct cpu_spec cpu_specs[] __initdata = { +#ifdef CONFIG_PPC32 + { /* e500mc */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80230000, + .cpu_name = "e500mc", + .cpu_features = CPU_FTRS_E500MC, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .cpu_user_features2 = PPC_FEATURE2_ISEL, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_e500mc, + .machine_check = machine_check_e500mc, + .platform = "ppce500mc", + .cpu_down_flush = cpu_down_flush_e500mc, + }, +#endif /* CONFIG_PPC32 */ + { /* e5500 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80240000, + .cpu_name = "e5500", + .cpu_features = CPU_FTRS_E5500, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .cpu_user_features2 = PPC_FEATURE2_ISEL, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_e5500, +#ifndef CONFIG_PPC32 + .cpu_restore = __restore_cpu_e5500, +#endif + .machine_check = machine_check_e500mc, + .platform = "ppce5500", + .cpu_down_flush = cpu_down_flush_e5500, + }, + { /* e6500 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80400000, + .cpu_name = "e6500", + .cpu_features = CPU_FTRS_E6500, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .cpu_user_features2 = PPC_FEATURE2_ISEL, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 6, + .cpu_setup = __setup_cpu_e6500, +#ifndef CONFIG_PPC32 + .cpu_restore = __restore_cpu_e6500, +#endif + .machine_check = machine_check_e500mc, + .platform = "ppce6500", + .cpu_down_flush = cpu_down_flush_e6500, + }, +}; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index d8e42ef750f1..8a32bffefa5b 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -18,6 +18,7 @@ #include <asm/mce.h> #include <asm/mmu.h> #include <asm/setup.h> +#include <asm/cpu_setup.h> static struct cpu_spec the_cpu_spec __read_mostly; @@ -27,1922 +28,7 @@ EXPORT_SYMBOL(cur_cpu_spec); /* The platform string corresponding to the real PVR */ const char *powerpc_base_platform; -/* NOTE: - * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's - * the responsibility of the appropriate CPU save/restore functions to - * eventually copy these settings over. Those save/restore aren't yet - * part of the cputable though. That has to be fixed for both ppc32 - * and ppc64 - */ -#ifdef CONFIG_PPC32 -extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_440ep(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_440epx(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_440x5(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec); -extern void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec); -extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_750cx(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_750fx(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_7400(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); -#endif /* CONFIG_PPC32 */ -#ifdef CONFIG_PPC64 -#include <asm/cpu_setup_power.h> -extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_pa6t(void); -extern void __restore_cpu_ppc970(void); -extern long __machine_check_early_realmode_p7(struct pt_regs *regs); -extern long __machine_check_early_realmode_p8(struct pt_regs *regs); -extern long __machine_check_early_realmode_p9(struct pt_regs *regs); -#endif /* CONFIG_PPC64 */ -#if defined(CONFIG_E500) -extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_e6500(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_e5500(void); -extern void __restore_cpu_e6500(void); -#endif /* CONFIG_E500 */ - -/* This table only contains "desktop" CPUs, it need to be filled with embedded - * ones as well... - */ -#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \ - PPC_FEATURE_HAS_MMU) -#define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64) -#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4) -#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\ - PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) -#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\ - PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) -#define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\ - PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ - PPC_FEATURE_TRUE_LE | \ - PPC_FEATURE_PSERIES_PERFMON_COMPAT) -#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\ - PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ - PPC_FEATURE_TRUE_LE | \ - PPC_FEATURE_PSERIES_PERFMON_COMPAT) -#define COMMON_USER2_POWER7 (PPC_FEATURE2_DSCR) -#define COMMON_USER_POWER8 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\ - PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ - PPC_FEATURE_TRUE_LE | \ - PPC_FEATURE_PSERIES_PERFMON_COMPAT) -#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \ - PPC_FEATURE2_HTM_COMP | \ - PPC_FEATURE2_HTM_NOSC_COMP | \ - PPC_FEATURE2_DSCR | \ - PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ - PPC_FEATURE2_VEC_CRYPTO) -#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ - PPC_FEATURE_TRUE_LE | \ - PPC_FEATURE_HAS_ALTIVEC_COMP) -#define COMMON_USER_POWER9 COMMON_USER_POWER8 -#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ - PPC_FEATURE2_ARCH_3_00 | \ - PPC_FEATURE2_HAS_IEEE128 | \ - PPC_FEATURE2_DARN | \ - PPC_FEATURE2_SCV) -#define COMMON_USER_POWER10 COMMON_USER_POWER9 -#define COMMON_USER2_POWER10 (PPC_FEATURE2_ARCH_3_1 | \ - PPC_FEATURE2_MMA | \ - PPC_FEATURE2_ARCH_3_00 | \ - PPC_FEATURE2_HAS_IEEE128 | \ - PPC_FEATURE2_DARN | \ - PPC_FEATURE2_SCV | \ - PPC_FEATURE2_ARCH_2_07 | \ - PPC_FEATURE2_DSCR | \ - PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ - PPC_FEATURE2_VEC_CRYPTO) - -#ifdef CONFIG_PPC_BOOK3E_64 -#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) -#else -#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ - PPC_FEATURE_BOOKE) -#endif - -static struct cpu_spec __initdata cpu_specs[] = { -#ifdef CONFIG_PPC_BOOK3S_64 - { /* PPC970 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00390000, - .cpu_name = "PPC970", - .cpu_features = CPU_FTRS_PPC970, - .cpu_user_features = COMMON_USER_POWER4 | - PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTRS_PPC970, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_ppc970, - .cpu_restore = __restore_cpu_ppc970, - .platform = "ppc970", - }, - { /* PPC970FX */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003c0000, - .cpu_name = "PPC970FX", - .cpu_features = CPU_FTRS_PPC970, - .cpu_user_features = COMMON_USER_POWER4 | - PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTRS_PPC970, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_ppc970, - .cpu_restore = __restore_cpu_ppc970, - .platform = "ppc970", - }, - { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x00440100, - .cpu_name = "PPC970MP", - .cpu_features = CPU_FTRS_PPC970, - .cpu_user_features = COMMON_USER_POWER4 | - PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTRS_PPC970, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_ppc970, - .cpu_restore = __restore_cpu_ppc970, - .platform = "ppc970", - }, - { /* PPC970MP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00440000, - .cpu_name = "PPC970MP", - .cpu_features = CPU_FTRS_PPC970, - .cpu_user_features = COMMON_USER_POWER4 | - PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTRS_PPC970, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_ppc970MP, - .cpu_restore = __restore_cpu_ppc970, - .platform = "ppc970", - }, - { /* PPC970GX */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00450000, - .cpu_name = "PPC970GX", - .cpu_features = CPU_FTRS_PPC970, - .cpu_user_features = COMMON_USER_POWER4 | - PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTRS_PPC970, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_ppc970, - .platform = "ppc970", - }, - { /* Power5 GR */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003a0000, - .cpu_name = "POWER5 (gr)", - .cpu_features = CPU_FTRS_POWER5, - .cpu_user_features = COMMON_USER_POWER5, - .mmu_features = MMU_FTRS_POWER5, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .platform = "power5", - }, - { /* Power5++ */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x003b0300, - .cpu_name = "POWER5+ (gs)", - .cpu_features = CPU_FTRS_POWER5, - .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTRS_POWER5, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .platform = "power5+", - }, - { /* Power5 GS */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003b0000, - .cpu_name = "POWER5+ (gs)", - .cpu_features = CPU_FTRS_POWER5, - .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTRS_POWER5, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .platform = "power5+", - }, - { /* POWER6 in P5+ mode; 2.04-compliant processor */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000001, - .cpu_name = "POWER5+", - .cpu_features = CPU_FTRS_POWER5, - .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTRS_POWER5, - .icache_bsize = 128, - .dcache_bsize = 128, - .platform = "power5+", - }, - { /* Power6 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003e0000, - .cpu_name = "POWER6 (raw)", - .cpu_features = CPU_FTRS_POWER6, - .cpu_user_features = COMMON_USER_POWER6 | - PPC_FEATURE_POWER6_EXT, - .mmu_features = MMU_FTRS_POWER6, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .platform = "power6x", - }, - { /* 2.05-compliant processor, i.e. Power6 "architected" mode */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000002, - .cpu_name = "POWER6 (architected)", - .cpu_features = CPU_FTRS_POWER6, - .cpu_user_features = COMMON_USER_POWER6, - .mmu_features = MMU_FTRS_POWER6, - .icache_bsize = 128, - .dcache_bsize = 128, - .platform = "power6", - }, - { /* 2.06-compliant processor, i.e. Power7 "architected" mode */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000003, - .cpu_name = "POWER7 (architected)", - .cpu_features = CPU_FTRS_POWER7, - .cpu_user_features = COMMON_USER_POWER7, - .cpu_user_features2 = COMMON_USER2_POWER7, - .mmu_features = MMU_FTRS_POWER7, - .icache_bsize = 128, - .dcache_bsize = 128, - .cpu_setup = __setup_cpu_power7, - .cpu_restore = __restore_cpu_power7, - .machine_check_early = __machine_check_early_realmode_p7, - .platform = "power7", - }, - { /* 2.07-compliant processor, i.e. Power8 "architected" mode */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000004, - .cpu_name = "POWER8 (architected)", - .cpu_features = CPU_FTRS_POWER8, - .cpu_user_features = COMMON_USER_POWER8, - .cpu_user_features2 = COMMON_USER2_POWER8, - .mmu_features = MMU_FTRS_POWER8, - .icache_bsize = 128, - .dcache_bsize = 128, - .cpu_setup = __setup_cpu_power8, - .cpu_restore = __restore_cpu_power8, - .machine_check_early = __machine_check_early_realmode_p8, - .platform = "power8", - }, - { /* 3.00-compliant processor, i.e. Power9 "architected" mode */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000005, - .cpu_name = "POWER9 (architected)", - .cpu_features = CPU_FTRS_POWER9, - .cpu_user_features = COMMON_USER_POWER9, - .cpu_user_features2 = COMMON_USER2_POWER9, - .mmu_features = MMU_FTRS_POWER9, - .icache_bsize = 128, - .dcache_bsize = 128, - .cpu_setup = __setup_cpu_power9, - .cpu_restore = __restore_cpu_power9, - .platform = "power9", - }, - { /* 3.1-compliant processor, i.e. Power10 "architected" mode */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000006, - .cpu_name = "POWER10 (architected)", - .cpu_features = CPU_FTRS_POWER10, - .cpu_user_features = COMMON_USER_POWER10, - .cpu_user_features2 = COMMON_USER2_POWER10, - .mmu_features = MMU_FTRS_POWER10, - .icache_bsize = 128, - .dcache_bsize = 128, - .cpu_setup = __setup_cpu_power10, - .cpu_restore = __restore_cpu_power10, - .platform = "power10", - }, - { /* Power7 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003f0000, - .cpu_name = "POWER7 (raw)", - .cpu_features = CPU_FTRS_POWER7, - .cpu_user_features = COMMON_USER_POWER7, - .cpu_user_features2 = COMMON_USER2_POWER7, - .mmu_features = MMU_FTRS_POWER7, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_power7, - .cpu_restore = __restore_cpu_power7, - .machine_check_early = __machine_check_early_realmode_p7, - .platform = "power7", - }, - { /* Power7+ */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004A0000, - .cpu_name = "POWER7+ (raw)", - .cpu_features = CPU_FTRS_POWER7, - .cpu_user_features = COMMON_USER_POWER7, - .cpu_user_features2 = COMMON_USER2_POWER7, - .mmu_features = MMU_FTRS_POWER7, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_power7, - .cpu_restore = __restore_cpu_power7, - .machine_check_early = __machine_check_early_realmode_p7, - .platform = "power7+", - }, - { /* Power8E */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004b0000, - .cpu_name = "POWER8E (raw)", - .cpu_features = CPU_FTRS_POWER8E, - .cpu_user_features = COMMON_USER_POWER8, - .cpu_user_features2 = COMMON_USER2_POWER8, - .mmu_features = MMU_FTRS_POWER8, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_power8, - .cpu_restore = __restore_cpu_power8, - .machine_check_early = __machine_check_early_realmode_p8, - .platform = "power8", - }, - { /* Power8NVL */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004c0000, - .cpu_name = "POWER8NVL (raw)", - .cpu_features = CPU_FTRS_POWER8, - .cpu_user_features = COMMON_USER_POWER8, - .cpu_user_features2 = COMMON_USER2_POWER8, - .mmu_features = MMU_FTRS_POWER8, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_power8, - .cpu_restore = __restore_cpu_power8, - .machine_check_early = __machine_check_early_realmode_p8, - .platform = "power8", - }, - { /* Power8 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004d0000, - .cpu_name = "POWER8 (raw)", - .cpu_features = CPU_FTRS_POWER8, - .cpu_user_features = COMMON_USER_POWER8, - .cpu_user_features2 = COMMON_USER2_POWER8, - .mmu_features = MMU_FTRS_POWER8, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_power8, - .cpu_restore = __restore_cpu_power8, - .machine_check_early = __machine_check_early_realmode_p8, - .platform = "power8", - }, - { /* Power9 DD2.0 */ - .pvr_mask = 0xffffefff, - .pvr_value = 0x004e0200, - .cpu_name = "POWER9 (raw)", - .cpu_features = CPU_FTRS_POWER9_DD2_0, - .cpu_user_features = COMMON_USER_POWER9, - .cpu_user_features2 = COMMON_USER2_POWER9, - .mmu_features = MMU_FTRS_POWER9, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_power9, - .cpu_restore = __restore_cpu_power9, - .machine_check_early = __machine_check_early_realmode_p9, - .platform = "power9", - }, - { /* Power9 DD 2.1 */ - .pvr_mask = 0xffffefff, - .pvr_value = 0x004e0201, - .cpu_name = "POWER9 (raw)", - .cpu_features = CPU_FTRS_POWER9_DD2_1, - .cpu_user_features = COMMON_USER_POWER9, - .cpu_user_features2 = COMMON_USER2_POWER9, - .mmu_features = MMU_FTRS_POWER9, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_power9, - .cpu_restore = __restore_cpu_power9, - .machine_check_early = __machine_check_early_realmode_p9, - .platform = "power9", - }, - { /* Power9 DD2.2 */ - .pvr_mask = 0xffffefff, - .pvr_value = 0x004e0202, - .cpu_name = "POWER9 (raw)", - .cpu_features = CPU_FTRS_POWER9_DD2_2, - .cpu_user_features = COMMON_USER_POWER9, - .cpu_user_features2 = COMMON_USER2_POWER9, - .mmu_features = MMU_FTRS_POWER9, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_power9, - .cpu_restore = __restore_cpu_power9, - .machine_check_early = __machine_check_early_realmode_p9, - .platform = "power9", - }, - { /* Power9 DD2.3 or later */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004e0000, - .cpu_name = "POWER9 (raw)", - .cpu_features = CPU_FTRS_POWER9_DD2_3, - .cpu_user_features = COMMON_USER_POWER9, - .cpu_user_features2 = COMMON_USER2_POWER9, - .mmu_features = MMU_FTRS_POWER9, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_power9, - .cpu_restore = __restore_cpu_power9, - .machine_check_early = __machine_check_early_realmode_p9, - .platform = "power9", - }, - { /* Power10 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00800000, - .cpu_name = "POWER10 (raw)", - .cpu_features = CPU_FTRS_POWER10, - .cpu_user_features = COMMON_USER_POWER10, - .cpu_user_features2 = COMMON_USER2_POWER10, - .mmu_features = MMU_FTRS_POWER10, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_power10, - .cpu_restore = __restore_cpu_power10, - .machine_check_early = __machine_check_early_realmode_p10, - .platform = "power10", - }, - { /* Cell Broadband Engine */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00700000, - .cpu_name = "Cell Broadband Engine", - .cpu_features = CPU_FTRS_CELL, - .cpu_user_features = COMMON_USER_PPC64 | - PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP | - PPC_FEATURE_SMT, - .mmu_features = MMU_FTRS_CELL, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .platform = "ppc-cell-be", - }, - { /* PA Semi PA6T */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00900000, - .cpu_name = "PA6T", - .cpu_features = CPU_FTRS_PA6T, - .cpu_user_features = COMMON_USER_PA6T, - .mmu_features = MMU_FTRS_PA6T, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 6, - .pmc_type = PPC_PMC_PA6T, - .cpu_setup = __setup_cpu_pa6t, - .cpu_restore = __restore_cpu_pa6t, - .platform = "pa6t", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "POWER5 (compatible)", - .cpu_features = CPU_FTRS_COMPATIBLE, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTRS_POWER, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .platform = "power5", - } -#endif /* CONFIG_PPC_BOOK3S_64 */ - -#ifdef CONFIG_PPC32 -#ifdef CONFIG_PPC_BOOK3S_32 -#ifdef CONFIG_PPC_BOOK3S_604 - { /* 604 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00040000, - .cpu_name = "604", - .cpu_features = CPU_FTRS_604, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 2, - .cpu_setup = __setup_cpu_604, - .machine_check = machine_check_generic, - .platform = "ppc604", - }, - { /* 604e */ - .pvr_mask = 0xfffff000, - .pvr_value = 0x00090000, - .cpu_name = "604e", - .cpu_features = CPU_FTRS_604, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_604, - .machine_check = machine_check_generic, - .platform = "ppc604", - }, - { /* 604r */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00090000, - .cpu_name = "604r", - .cpu_features = CPU_FTRS_604, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_604, - .machine_check = machine_check_generic, - .platform = "ppc604", - }, - { /* 604ev */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x000a0000, - .cpu_name = "604ev", - .cpu_features = CPU_FTRS_604, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_604, - .machine_check = machine_check_generic, - .platform = "ppc604", - }, - { /* 740/750 (0x4202, don't support TAU ?) */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x00084202, - .cpu_name = "740/750", - .cpu_features = CPU_FTRS_740_NOTAU, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_750, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750CX (80100 and 8010x?) */ - .pvr_mask = 0xfffffff0, - .pvr_value = 0x00080100, - .cpu_name = "750CX", - .cpu_features = CPU_FTRS_750, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_750cx, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750CX (82201 and 82202) */ - .pvr_mask = 0xfffffff0, - .pvr_value = 0x00082200, - .cpu_name = "750CX", - .cpu_features = CPU_FTRS_750, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750cx, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750CXe (82214) */ - .pvr_mask = 0xfffffff0, - .pvr_value = 0x00082210, - .cpu_name = "750CXe", - .cpu_features = CPU_FTRS_750, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750cx, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750CXe "Gekko" (83214) */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x00083214, - .cpu_name = "750CXe", - .cpu_features = CPU_FTRS_750, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750cx, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750CL (and "Broadway") */ - .pvr_mask = 0xfffff0e0, - .pvr_value = 0x00087000, - .cpu_name = "750CL", - .cpu_features = CPU_FTRS_750CL, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 745/755 */ - .pvr_mask = 0xfffff000, - .pvr_value = 0x00083000, - .cpu_name = "745/755", - .cpu_features = CPU_FTRS_750, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750FX rev 1.x */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x70000100, - .cpu_name = "750FX", - .cpu_features = CPU_FTRS_750FX1, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750FX rev 2.0 must disable HID0[DPM] */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x70000200, - .cpu_name = "750FX", - .cpu_features = CPU_FTRS_750FX2, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750FX (All revs except 2.0) */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x70000000, - .cpu_name = "750FX", - .cpu_features = CPU_FTRS_750FX, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750fx, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750GX */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x70020000, - .cpu_name = "750GX", - .cpu_features = CPU_FTRS_750GX, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750fx, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 740/750 (L2CR bit need fixup for 740) */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00080000, - .cpu_name = "740/750", - .cpu_features = CPU_FTRS_740, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 7400 rev 1.1 ? (no TAU) */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x000c1101, - .cpu_name = "7400 (1.1)", - .cpu_features = CPU_FTRS_7400_NOTAU, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_7400, - .machine_check = machine_check_generic, - .platform = "ppc7400", - }, - { /* 7400 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x000c0000, - .cpu_name = "7400", - .cpu_features = CPU_FTRS_7400, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_7400, - .machine_check = machine_check_generic, - .platform = "ppc7400", - }, - { /* 7410 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x800c0000, - .cpu_name = "7410", - .cpu_features = CPU_FTRS_7400, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_7410, - .machine_check = machine_check_generic, - .platform = "ppc7400", - }, - { /* 7450 2.0 - no doze/nap */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x80000200, - .cpu_name = "7450", - .cpu_features = CPU_FTRS_7450_20, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7450 2.1 */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x80000201, - .cpu_name = "7450", - .cpu_features = CPU_FTRS_7450_21, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7450 2.3 and newer */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80000000, - .cpu_name = "7450", - .cpu_features = CPU_FTRS_7450_23, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7455 rev 1.x */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x80010100, - .cpu_name = "7455", - .cpu_features = CPU_FTRS_7455_1, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7455 rev 2.0 */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x80010200, - .cpu_name = "7455", - .cpu_features = CPU_FTRS_7455_20, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7455 others */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80010000, - .cpu_name = "7455", - .cpu_features = CPU_FTRS_7455, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7447/7457 Rev 1.0 */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x80020100, - .cpu_name = "7447/7457", - .cpu_features = CPU_FTRS_7447_10, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7447/7457 Rev 1.1 */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x80020101, - .cpu_name = "7447/7457", - .cpu_features = CPU_FTRS_7447_10, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7447/7457 Rev 1.2 and later */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80020000, - .cpu_name = "7447/7457", - .cpu_features = CPU_FTRS_7447, - .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7447A */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80030000, - .cpu_name = "7447A", - .cpu_features = CPU_FTRS_7447A, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7448 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80040000, - .cpu_name = "7448", - .cpu_features = CPU_FTRS_7448, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, -#endif /* CONFIG_PPC_BOOK3S_604 */ -#ifdef CONFIG_PPC_BOOK3S_603 - { /* 603 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00030000, - .cpu_name = "603", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 603e */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00060000, - .cpu_name = "603e", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 603ev */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00070000, - .cpu_name = "603ev", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 82xx (8240, 8245, 8260 are all 603e cores) */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00810000, - .cpu_name = "82xx", - .cpu_features = CPU_FTRS_82XX, - .cpu_user_features = COMMON_USER, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* All G2_LE (603e core, plus some) have the same pvr */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00820000, - .cpu_name = "G2_LE", - .cpu_features = CPU_FTRS_G2_LE, - .cpu_user_features = COMMON_USER, - .mmu_features = MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, -#ifdef CONFIG_PPC_83xx - { /* e300c1 (a 603e core, plus some) on 83xx */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00830000, - .cpu_name = "e300c1", - .cpu_features = CPU_FTRS_E300, - .cpu_user_features = COMMON_USER, - .mmu_features = MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_83xx, - .platform = "ppc603", - }, - { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00840000, - .cpu_name = "e300c2", - .cpu_features = CPU_FTRS_E300C2, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_USE_HIGH_BATS | - MMU_FTR_NEED_DTLB_SW_LRU, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_83xx, - .platform = "ppc603", - }, - { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00850000, - .cpu_name = "e300c3", - .cpu_features = CPU_FTRS_E300, - .cpu_user_features = COMMON_USER, - .mmu_features = MMU_FTR_USE_HIGH_BATS | - MMU_FTR_NEED_DTLB_SW_LRU, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_83xx, - .num_pmcs = 4, - .platform = "ppc603", - }, - { /* e300c4 (e300c1, plus one IU) */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00860000, - .cpu_name = "e300c4", - .cpu_features = CPU_FTRS_E300, - .cpu_user_features = COMMON_USER, - .mmu_features = MMU_FTR_USE_HIGH_BATS | - MMU_FTR_NEED_DTLB_SW_LRU, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_83xx, - .num_pmcs = 4, - .platform = "ppc603", - }, -#endif -#endif /* CONFIG_PPC_BOOK3S_603 */ -#ifdef CONFIG_PPC_BOOK3S_604 - { /* default match, we assume split I/D cache & TB (non-601)... */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic PPC)", - .cpu_features = CPU_FTRS_CLASSIC32, - .cpu_user_features = COMMON_USER, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, -#endif /* CONFIG_PPC_BOOK3S_604 */ -#endif /* CONFIG_PPC_BOOK3S_32 */ -#ifdef CONFIG_PPC_8xx - { /* 8xx */ - .pvr_mask = 0xffff0000, - .pvr_value = PVR_8xx, - .cpu_name = "8xx", - /* CPU_FTR_MAYBE_CAN_DOZE is possible, - * if the 8xx code is there.... */ - .cpu_features = CPU_FTRS_8XX, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_TYPE_8xx, - .icache_bsize = 16, - .dcache_bsize = 16, - .machine_check = machine_check_8xx, - .platform = "ppc823", - }, -#endif /* CONFIG_PPC_8xx */ -#ifdef CONFIG_40x - { /* STB 04xxx */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41810000, - .cpu_name = "STB04xxx", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* NP405L */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41610000, - .cpu_name = "NP405L", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* NP4GS3 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x40B10000, - .cpu_name = "NP4GS3", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* NP405H */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41410000, - .cpu_name = "NP405H", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405GPr */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x50910000, - .cpu_name = "405GPr", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* STBx25xx */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x51510000, - .cpu_name = "STBx25xx", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405LP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41F10000, - .cpu_name = "405LP", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x51210000, - .cpu_name = "405EP", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. A/B with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910007, - .cpu_name = "405EX Rev. A/B", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. C without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x1291000d, - .cpu_name = "405EX Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. C with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x1291000f, - .cpu_name = "405EX Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. D without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910003, - .cpu_name = "405EX Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. D with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910005, - .cpu_name = "405EX Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. A/B without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910001, - .cpu_name = "405EXr Rev. A/B", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. C without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910009, - .cpu_name = "405EXr Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. C with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x1291000b, - .cpu_name = "405EXr Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. D without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910000, - .cpu_name = "405EXr Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. D with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910002, - .cpu_name = "405EXr Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { - /* 405EZ */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41510000, - .cpu_name = "405EZ", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* APM8018X */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x7ff11432, - .cpu_name = "APM8018X", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic 40x PPC)", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - } - -#endif /* CONFIG_40x */ -#ifdef CONFIG_44x -#ifndef CONFIG_PPC_47x - { - .pvr_mask = 0xf0000fff, - .pvr_value = 0x40000850, - .cpu_name = "440GR Rev. A", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440", - }, - { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x40000858, - .cpu_name = "440EP Rev. A", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440ep, - .machine_check = machine_check_4xx, - .platform = "ppc440", - }, - { - .pvr_mask = 0xf0000fff, - .pvr_value = 0x400008d3, - .cpu_name = "440GR Rev. B", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440", - }, - { /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */ - .pvr_mask = 0xf0000ff7, - .pvr_value = 0x400008d4, - .cpu_name = "440EP Rev. C", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440ep, - .machine_check = machine_check_4xx, - .platform = "ppc440", - }, - { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x400008db, - .cpu_name = "440EP Rev. B", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440ep, - .machine_check = machine_check_4xx, - .platform = "ppc440", - }, - { /* 440GRX */ - .pvr_mask = 0xf0000ffb, - .pvr_value = 0x200008D0, - .cpu_name = "440GRX", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440grx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */ - .pvr_mask = 0xf0000ffb, - .pvr_value = 0x200008D8, - .cpu_name = "440EPX", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440epx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 440GP Rev. B */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x40000440, - .cpu_name = "440GP Rev. B", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440gp", - }, - { /* 440GP Rev. C */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x40000481, - .cpu_name = "440GP Rev. C", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440gp", - }, - { /* 440GX Rev. A */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x50000850, - .cpu_name = "440GX Rev. A", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440gx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 440GX Rev. B */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x50000851, - .cpu_name = "440GX Rev. B", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440gx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 440GX Rev. C */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x50000892, - .cpu_name = "440GX Rev. C", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440gx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 440GX Rev. F */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x50000894, - .cpu_name = "440GX Rev. F", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440gx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 440SP Rev. A */ - .pvr_mask = 0xfff00fff, - .pvr_value = 0x53200891, - .cpu_name = "440SP Rev. A", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440", - }, - { /* 440SPe Rev. A */ - .pvr_mask = 0xfff00fff, - .pvr_value = 0x53400890, - .cpu_name = "440SPe Rev. A", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440spe, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 440SPe Rev. B */ - .pvr_mask = 0xfff00fff, - .pvr_value = 0x53400891, - .cpu_name = "440SPe Rev. B", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440spe, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 460EX */ - .pvr_mask = 0xffff0006, - .pvr_value = 0x13020002, - .cpu_name = "460EX", - .cpu_features = CPU_FTRS_440x6, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_460ex, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 460EX Rev B */ - .pvr_mask = 0xffff0007, - .pvr_value = 0x13020004, - .cpu_name = "460EX Rev. B", - .cpu_features = CPU_FTRS_440x6, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_460ex, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 460GT */ - .pvr_mask = 0xffff0006, - .pvr_value = 0x13020000, - .cpu_name = "460GT", - .cpu_features = CPU_FTRS_440x6, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_460gt, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 460GT Rev B */ - .pvr_mask = 0xffff0007, - .pvr_value = 0x13020005, - .cpu_name = "460GT Rev. B", - .cpu_features = CPU_FTRS_440x6, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_460gt, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 460SX */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x13541800, - .cpu_name = "460SX", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_460sx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 464 in APM821xx */ - .pvr_mask = 0xfffffff0, - .pvr_value = 0x12C41C80, - .cpu_name = "APM821XX", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_apm821xx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic 44x PPC)", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440", - } -#else /* CONFIG_PPC_47x */ - { /* 476 DD2 core */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x11a52080, - .cpu_name = "476", - .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_47x | - MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 32, - .dcache_bsize = 128, - .machine_check = machine_check_47x, - .platform = "ppc470", - }, - { /* 476fpe */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x7ff50000, - .cpu_name = "476fpe", - .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_47x | - MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 32, - .dcache_bsize = 128, - .machine_check = machine_check_47x, - .platform = "ppc470", - }, - { /* 476 iss */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00050000, - .cpu_name = "476", - .cpu_features = CPU_FTRS_47X, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_47x | - MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 32, - .dcache_bsize = 128, - .machine_check = machine_check_47x, - .platform = "ppc470", - }, - { /* 476 others */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x11a50000, - .cpu_name = "476", - .cpu_features = CPU_FTRS_47X, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_47x | - MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 32, - .dcache_bsize = 128, - .machine_check = machine_check_47x, - .platform = "ppc470", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic 47x PPC)", - .cpu_features = CPU_FTRS_47X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_47x, - .icache_bsize = 32, - .dcache_bsize = 128, - .machine_check = machine_check_47x, - .platform = "ppc470", - } -#endif /* CONFIG_PPC_47x */ -#endif /* CONFIG_44x */ -#endif /* CONFIG_PPC32 */ -#ifdef CONFIG_E500 -#ifdef CONFIG_PPC32 -#ifndef CONFIG_PPC_E500MC - { /* e500 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80200000, - .cpu_name = "e500", - .cpu_features = CPU_FTRS_E500, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_SPE_COMP | - PPC_FEATURE_HAS_EFP_SINGLE_COMP, - .cpu_user_features2 = PPC_FEATURE2_ISEL, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_e500v1, - .machine_check = machine_check_e500, - .platform = "ppc8540", - }, - { /* e500v2 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80210000, - .cpu_name = "e500v2", - .cpu_features = CPU_FTRS_E500_2, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_SPE_COMP | - PPC_FEATURE_HAS_EFP_SINGLE_COMP | - PPC_FEATURE_HAS_EFP_DOUBLE_COMP, - .cpu_user_features2 = PPC_FEATURE2_ISEL, - .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_e500v2, - .machine_check = machine_check_e500, - .platform = "ppc8548", - .cpu_down_flush = cpu_down_flush_e500v2, - }, -#else - { /* e500mc */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80230000, - .cpu_name = "e500mc", - .cpu_features = CPU_FTRS_E500MC, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .cpu_user_features2 = PPC_FEATURE2_ISEL, - .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | - MMU_FTR_USE_TLBILX, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_e500mc, - .machine_check = machine_check_e500mc, - .platform = "ppce500mc", - .cpu_down_flush = cpu_down_flush_e500mc, - }, -#endif /* CONFIG_PPC_E500MC */ -#endif /* CONFIG_PPC32 */ -#ifdef CONFIG_PPC_E500MC - { /* e5500 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80240000, - .cpu_name = "e5500", - .cpu_features = CPU_FTRS_E5500, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .cpu_user_features2 = PPC_FEATURE2_ISEL, - .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | - MMU_FTR_USE_TLBILX, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_e5500, -#ifndef CONFIG_PPC32 - .cpu_restore = __restore_cpu_e5500, -#endif - .machine_check = machine_check_e500mc, - .platform = "ppce5500", - .cpu_down_flush = cpu_down_flush_e5500, - }, - { /* e6500 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80400000, - .cpu_name = "e6500", - .cpu_features = CPU_FTRS_E6500, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU | - PPC_FEATURE_HAS_ALTIVEC_COMP, - .cpu_user_features2 = PPC_FEATURE2_ISEL, - .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | - MMU_FTR_USE_TLBILX, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 6, - .cpu_setup = __setup_cpu_e6500, -#ifndef CONFIG_PPC32 - .cpu_restore = __restore_cpu_e6500, -#endif - .machine_check = machine_check_e500mc, - .platform = "ppce6500", - .cpu_down_flush = cpu_down_flush_e6500, - }, -#endif /* CONFIG_PPC_E500MC */ -#ifdef CONFIG_PPC32 - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic E500 PPC)", - .cpu_features = CPU_FTRS_E500, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_SPE_COMP | - PPC_FEATURE_HAS_EFP_SINGLE_COMP, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_e500, - .platform = "powerpc", - } -#endif /* CONFIG_PPC32 */ -#endif /* CONFIG_E500 */ -}; +#include "cpu_specs.h" void __init set_cur_cpu_spec(struct cpu_spec *s) { @@ -2018,6 +104,8 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr) struct cpu_spec *s = cpu_specs; int i; + BUILD_BUG_ON(!ARRAY_SIZE(cpu_specs)); + s = PTRRELOC(s); for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) { diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index fc800a9fb2c4..c3fb9fdf5bd7 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -1099,7 +1099,7 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char prop = of_get_flat_dt_prop(node, "display-name", NULL); if (prop && strlen((char *)prop) != 0) { - strlcpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name)); + strscpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name)); cur_cpu_spec->cpu_name = dt_cpu_name; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 1d599df6f169..3fc7c9886bb7 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -49,7 +49,7 @@ */ .align 12 -#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) +#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_E500) .globl prepare_transfer_to_handler prepare_transfer_to_handler: /* if from kernel, check interrupted DOZE/NAP mode */ @@ -68,10 +68,10 @@ prepare_transfer_to_handler: lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ - lwz r2, GPR2(r11) + REST_GPR(2, r11) b fast_exception_return _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) -#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ +#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_PPC_E500 */ #if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) .globl __kuep_lock @@ -101,6 +101,7 @@ __kuep_unlock: .globl transfer_to_syscall transfer_to_syscall: + stw r3, ORIG_GPR3(r1) stw r11, GPR1(r1) stw r11, 0(r1) mflr r12 @@ -121,9 +122,9 @@ transfer_to_syscall: SAVE_NVGPRS(r1) kuep_lock - /* Calling convention has r9 = orig r0, r10 = regs */ - addi r10,r1,STACK_FRAME_OVERHEAD - mr r9,r0 + /* Calling convention has r3 = regs, r4 = orig r0 */ + addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r0 bl system_call_exception ret_from_syscall: @@ -143,7 +144,7 @@ ret_from_syscall: lwz r7,_NIP(r1) lwz r8,_MSR(r1) cmpwi r3,0 - lwz r3,GPR3(r1) + REST_GPR(3, r1) syscall_exit_finish: mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 @@ -151,8 +152,8 @@ syscall_exit_finish: bne 3f mtcr r5 -1: lwz r2,GPR2(r1) - lwz r1,GPR1(r1) +1: REST_GPR(2, r1) + REST_GPR(1, r1) rfi #ifdef CONFIG_40x b . /* Prevent prefetch past rfi */ @@ -164,10 +165,8 @@ syscall_exit_finish: REST_NVGPRS(r1) mtctr r4 mtxer r5 - lwz r0,GPR0(r1) - lwz r3,GPR3(r1) - REST_GPRS(4, 11, r1) - lwz r12,GPR12(r1) + REST_GPR(0, r1) + REST_GPRS(3, 12, r1) b 1b #ifdef CONFIG_44x @@ -259,13 +258,12 @@ fast_exception_return: beq 3f /* if not, we've got problems */ #endif -2: REST_GPRS(3, 6, r11) - lwz r10,_CCR(r11) - REST_GPRS(1, 2, r11) +2: lwz r10,_CCR(r11) + REST_GPRS(1, 6, r11) mtcr r10 lwz r10,_LINK(r11) mtlr r10 - /* Clear the exception_marker on the stack to avoid confusing stacktrace */ + /* Clear the exception marker on the stack to avoid confusing stacktrace */ li r10, 0 stw r10, 8(r11) REST_GPR(10, r11) @@ -276,7 +274,7 @@ fast_exception_return: mtspr SPRN_SRR0,r12 REST_GPR(9, r11) REST_GPR(12, r11) - lwz r11,GPR11(r11) + REST_GPR(11, r11) rfi #ifdef CONFIG_40x b . /* Prevent prefetch past rfi */ @@ -322,7 +320,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) li r0,0 /* - * Leaving a stale exception_marker on the stack can confuse + * Leaving a stale exception marker on the stack can confuse * the reliable stack unwinder later on. Clear it. */ stw r0,8(r1) @@ -374,7 +372,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) mtspr SPRN_XER,r5 /* - * Leaving a stale exception_marker on the stack can confuse + * Leaving a stale exception marker on the stack can confuse * the reliable stack unwinder later on. Clear it. */ stw r0,8(r1) @@ -453,9 +451,8 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) lwz r3,_MSR(r1); \ andi. r3,r3,MSR_PR; \ bne interrupt_return; \ - lwz r0,GPR0(r1); \ - lwz r2,GPR2(r1); \ - REST_GPRS(3, 8, r1); \ + REST_GPR(0, r1); \ + REST_GPRS(2, 8, r1); \ lwz r10,_XER(r1); \ lwz r11,_CTR(r1); \ mtspr SPRN_XER,r10; \ @@ -474,11 +471,8 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) lwz r12,_MSR(r1); \ mtspr exc_lvl_srr0,r11; \ mtspr exc_lvl_srr1,r12; \ - lwz r9,GPR9(r1); \ - lwz r12,GPR12(r1); \ - lwz r10,GPR10(r1); \ - lwz r11,GPR11(r1); \ - lwz r1,GPR1(r1); \ + REST_GPRS(9, 12, r1); \ + REST_GPR(1, r1); \ exc_lvl_rfi; \ b .; /* prevent prefetch past exc_lvl_rfi */ @@ -488,7 +482,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) mtspr SPRN_##exc_lvl_srr0,r9; \ mtspr SPRN_##exc_lvl_srr1,r10; -#if defined(CONFIG_PPC_BOOK3E_MMU) +#if defined(CONFIG_PPC_E500) #ifdef CONFIG_PHYS_64BIT #define RESTORE_MAS7 \ lwz r11,MAS7(r1); \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 01ace4c56104..3e2e37e6ecab 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -292,16 +292,16 @@ _GLOBAL(enter_prom) /* Prepare a 32-bit mode big endian MSR */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 rlwinm r11,r11,0,1,31 mtsrr1 r11 rfi -#else /* CONFIG_PPC_BOOK3E */ +#else /* CONFIG_PPC_BOOK3E_64 */ LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE) andc r11,r11,r12 mtsrr1 r11 RFI_TO_KERNEL -#endif /* CONFIG_PPC_BOOK3E */ +#endif /* CONFIG_PPC_BOOK3E_64 */ 1: /* Return from OF */ FIXUP_ENDIAN diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 67dc4e3179a0..930e36099015 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -216,17 +216,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mtlr r10 mtcr r11 - ld r10,GPR10(r1) - ld r11,GPR11(r1) - ld r12,GPR12(r1) + REST_GPRS(10, 12, r1) mtspr \scratch,r0 std r10,\paca_ex+EX_R10(r13); std r11,\paca_ex+EX_R11(r13); ld r10,_NIP(r1) ld r11,_MSR(r1) - ld r0,GPR0(r1) - ld r1,GPR1(r1) + REST_GPR(0, r1) + REST_GPR(1, r1) mtspr \srr0,r10 mtspr \srr1,r11 ld r10,\paca_ex+EX_R10(r13) @@ -291,7 +289,6 @@ ret_from_mc_except: #define SPRN_MC_SRR0 SPRN_MCSRR0 #define SPRN_MC_SRR1 SPRN_MCSRR1 -#ifdef CONFIG_PPC_FSL_BOOK3E #define GEN_BTB_FLUSH \ START_BTB_FLUSH_SECTION \ beq 1f; \ @@ -307,13 +304,6 @@ ret_from_mc_except: #define DBG_BTB_FLUSH CRIT_BTB_FLUSH #define MC_BTB_FLUSH CRIT_BTB_FLUSH #define GDBELL_BTB_FLUSH GEN_BTB_FLUSH -#else -#define GEN_BTB_FLUSH -#define CRIT_BTB_FLUSH -#define DBG_BTB_FLUSH -#define MC_BTB_FLUSH -#define GDBELL_BTB_FLUSH -#endif #define NORMAL_EXCEPTION_PROLOG(n, intnum, addition) \ EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n)) @@ -372,25 +362,24 @@ ret_from_mc_except: /* Core exception code for all exceptions except TLB misses. */ #define EXCEPTION_COMMON_LVL(n, scratch, excf) \ exc_##n##_common: \ - std r0,GPR0(r1); /* save r0 in stackframe */ \ - std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ + SAVE_GPR(0, r1); /* save r0 in stackframe */ \ + SAVE_GPRS(2, 9, r1); /* save r2 - r9 in stackframe */ \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ beq 2f; /* if from kernel mode */ \ 2: ld r3,excf+EX_R10(r13); /* get back r10 */ \ ld r4,excf+EX_R11(r13); /* get back r11 */ \ mfspr r5,scratch; /* get back r13 */ \ - std r12,GPR12(r1); /* save r12 in stackframe */ \ - ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ + SAVE_GPR(12, r1); /* save r12 in stackframe */ \ + LOAD_PACA_TOC(); /* get kernel TOC into r2 */ \ mflr r6; /* save LR in stackframe */ \ mfctr r7; /* save CTR in stackframe */ \ mfspr r8,SPRN_XER; /* save XER in stackframe */ \ ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \ lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \ lbz r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */ \ - ld r12,exception_marker@toc(r2); \ - li r0,0; \ + LOAD_REG_IMMEDIATE(r12, STACK_FRAME_REGS_MARKER); \ + ZEROIZE_GPR(0); \ std r3,GPR10(r1); /* save r10 to stackframe */ \ std r4,GPR11(r1); /* save r11 to stackframe */ \ std r5,GPR13(r1); /* save it to stackframe */ \ @@ -470,12 +459,6 @@ exc_##n##_bad_stack: \ bl hdlr; \ b interrupt_return -/* This value is used to mark exception frames on the stack. */ - .section ".toc","aw" -exception_marker: - .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER - - /* * And here we have the exception vectors ! */ @@ -704,9 +687,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) beq+ 1f #ifdef CONFIG_RELOCATABLE - ld r15,PACATOC(r13) - ld r14,interrupt_base_book3e@got(r15) - ld r15,__end_interrupts@got(r15) + __LOAD_PACA_TOC(r15) + LOAD_REG_ADDR_ALTTOC(r14, r15, interrupt_base_book3e) + LOAD_REG_ADDR_ALTTOC(r15, r15, __end_interrupts) cmpld cr0,r10,r14 cmpld cr1,r10,r15 #else @@ -775,9 +758,9 @@ kernel_dbg_exc: beq+ 1f #ifdef CONFIG_RELOCATABLE - ld r15,PACATOC(r13) - ld r14,interrupt_base_book3e@got(r15) - ld r15,__end_interrupts@got(r15) + __LOAD_PACA_TOC(r15) + LOAD_REG_ADDR_ALTTOC(r14, r15, interrupt_base_book3e) + LOAD_REG_ADDR_ALTTOC(r15, r15, __end_interrupts) cmpld cr0,r10,r14 cmpld cr1,r10,r15 #else @@ -900,9 +883,9 @@ kernel_dbg_exc: .macro SEARCH_RESTART_TABLE #ifdef CONFIG_RELOCATABLE - ld r11,PACATOC(r13) - ld r14,__start___restart_table@got(r11) - ld r15,__stop___restart_table@got(r11) + __LOAD_PACA_TOC(r11) + LOAD_REG_ADDR_ALTTOC(r14, r11, __start___restart_table) + LOAD_REG_ADDR_ALTTOC(r15, r11, __stop___restart_table) #else LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table) LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table) @@ -1056,15 +1039,14 @@ bad_stack_book3e: mfspr r11,SPRN_ESR std r10,_DEAR(r1) std r11,_ESR(r1) - std r0,GPR0(r1); /* save r0 in stackframe */ \ - std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ + SAVE_GPR(0, r1); /* save r0 in stackframe */ \ + SAVE_GPRS(2, 9, r1); /* save r2 - r9 in stackframe */ \ ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \ ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \ mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \ std r3,GPR10(r1); /* save r10 to stackframe */ \ std r4,GPR11(r1); /* save r11 to stackframe */ \ - std r12,GPR12(r1); /* save r12 in stackframe */ \ + SAVE_GPR(12, r1); /* save r12 in stackframe */ \ std r5,GPR13(r1); /* save it to stackframe */ \ mflr r10 mfctr r11 @@ -1072,14 +1054,14 @@ bad_stack_book3e: std r10,_LINK(r1) std r11,_CTR(r1) std r12,_XER(r1) - SAVE_GPRS(14, 31, r1) + SAVE_NVGPRS(r1) lhz r12,PACA_TRAP_SAVE(r13) std r12,_TRAP(r1) addi r11,r1,INT_FRAME_SIZE std r11,0(r1) - li r12,0 + ZEROIZE_GPR(12) std r12,0(r11) - ld r2,PACATOC(r13) + LOAD_PACA_TOC() 1: addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_bad_stack b 1b @@ -1320,8 +1302,8 @@ a2_tlbinit_after_linear_map: /* Now we branch the new virtual address mapped by this entry */ #ifdef CONFIG_RELOCATABLE - ld r5,PACATOC(r13) - ld r3,1f@got(r5) + __LOAD_PACA_TOC(r5) + LOAD_REG_ADDR_ALTTOC(r3, r5, 1f) #else LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f) #endif diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3d0dc133a9ae..5381a43e50fe 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -281,7 +281,7 @@ BEGIN_FTR_SECTION mfspr r9,SPRN_PPR END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) HMT_MEDIUM - std r10,IAREA+EX_R10(r13) /* save r10 - r12 */ + std r10,IAREA+EX_R10(r13) /* save r10 */ .if ICFAR BEGIN_FTR_SECTION mfspr r10,SPRN_CFAR @@ -321,7 +321,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) mfctr r10 std r10,IAREA+EX_CTR(r13) mfcr r9 - std r11,IAREA+EX_R11(r13) + std r11,IAREA+EX_R11(r13) /* save r11 - r12 */ std r12,IAREA+EX_R12(r13) /* @@ -580,7 +580,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) std r2,GPR2(r1) /* save r2 in stackframe */ SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */ mflr r9 /* Get LR, later save to stack */ - ld r2,PACATOC(r13) /* get kernel TOC into r2 */ + LOAD_PACA_TOC() /* get kernel TOC into r2 */ std r9,_LINK(r1) lbz r10,PACAIRQSOFTMASK(r13) mfspr r11,SPRN_XER /* save XER in stackframe */ @@ -589,7 +589,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) li r9,IVEC std r9,_TRAP(r1) /* set trap number */ li r10,0 - ld r11,exception_marker@toc(r2) + LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) std r10,RESULT(r1) /* clear regs->result */ std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */ .endm @@ -610,7 +610,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .macro SEARCH_RESTART_TABLE #ifdef CONFIG_RELOCATABLE mr r12,r2 - ld r2,PACATOC(r13) + LOAD_PACA_TOC() LOAD_REG_ADDR(r9, __start___restart_table) LOAD_REG_ADDR(r10, __stop___restart_table) mr r2,r12 @@ -640,7 +640,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .macro SEARCH_SOFT_MASK_TABLE #ifdef CONFIG_RELOCATABLE mr r12,r2 - ld r2,PACATOC(r13) + LOAD_PACA_TOC() LOAD_REG_ADDR(r9, __start___soft_mask_table) LOAD_REG_ADDR(r10, __stop___soft_mask_table) mr r2,r12 @@ -703,6 +703,71 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .endm /* + * EARLY_BOOT_FIXUP - Fix real-mode interrupt with wrong endian in early boot. + * + * There's a short window during boot where although the kernel is running + * little endian, any exceptions will cause the CPU to switch back to big + * endian. For example a WARN() boils down to a trap instruction, which will + * cause a program check, and we end up here but with the CPU in big endian + * mode. The first instruction of the program check handler (in GEN_INT_ENTRY + * below) is an mtsprg, which when executed in the wrong endian is an lhzu with + * a ~3GB displacement from r3. The content of r3 is random, so that is a load + * from some random location, and depending on the system can easily lead to a + * checkstop, or an infinitely recursive page fault. + * + * So to handle that case we have a trampoline here that can detect we are in + * the wrong endian and flip us back to the correct endian. We can't flip + * MSR[LE] using mtmsr, so we have to use rfid. That requires backing up SRR0/1 + * as well as a GPR. To do that we use SPRG0/2/3, as SPRG1 is already used for + * the paca. SPRG3 is user readable, but this trampoline is only active very + * early in boot, and SPRG3 will be reinitialised in vdso_getcpu_init() before + * userspace starts. + */ +.macro EARLY_BOOT_FIXUP +BEGIN_FTR_SECTION +#ifdef CONFIG_CPU_LITTLE_ENDIAN + tdi 0,0,0x48 // Trap never, or in reverse endian: b . + 8 + b 2f // Skip trampoline if endian is correct + .long 0xa643707d // mtsprg 0, r11 Backup r11 + .long 0xa6027a7d // mfsrr0 r11 + .long 0xa643727d // mtsprg 2, r11 Backup SRR0 in SPRG2 + .long 0xa6027b7d // mfsrr1 r11 + .long 0xa643737d // mtsprg 3, r11 Backup SRR1 in SPRG3 + .long 0xa600607d // mfmsr r11 + .long 0x01006b69 // xori r11, r11, 1 Invert MSR[LE] + .long 0xa6037b7d // mtsrr1 r11 + /* + * This is 'li r11,1f' where 1f is the absolute address of that + * label, byteswapped into the SI field of the instruction. + */ + .long 0x00006039 | \ + ((ABS_ADDR(1f, real_vectors) & 0x00ff) << 24) | \ + ((ABS_ADDR(1f, real_vectors) & 0xff00) << 8) + .long 0xa6037a7d // mtsrr0 r11 + .long 0x2400004c // rfid +1: + mfsprg r11, 3 + mtsrr1 r11 // Restore SRR1 + mfsprg r11, 2 + mtsrr0 r11 // Restore SRR0 + mfsprg r11, 0 // Restore r11 +2: +#endif + /* + * program check could hit at any time, and pseries can not block + * MSR[ME] in early boot. So check if there is anything useful in r13 + * yet, and spin forever if not. + */ + mtsprg 0, r11 + mfcr r11 + cmpdi r13, 0 + beq . + mtcr r11 + mfsprg r11, 0 +END_FTR_SECTION(0, 1) // nop out after boot +.endm + +/* * There are a few constraints to be concerned with. * - Real mode exceptions code/data must be located at their physical location. * - Virtual mode exceptions must be mapped at their 0xc000... location. @@ -1079,6 +1144,7 @@ INT_DEFINE_BEGIN(machine_check) INT_DEFINE_END(machine_check) EXC_REAL_BEGIN(machine_check, 0x200, 0x100) + EARLY_BOOT_FIXUP GEN_INT_ENTRY machine_check_early, virt=0 EXC_REAL_END(machine_check, 0x200, 0x100) EXC_VIRT_NONE(0x4200, 0x100) @@ -1143,6 +1209,9 @@ BEGIN_FTR_SECTION bl enable_machine_check END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) addi r3,r1,STACK_FRAME_OVERHEAD +BEGIN_FTR_SECTION + bl machine_check_early_boot +END_FTR_SECTION(0, 1) // nop out after boot bl machine_check_early std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) @@ -1619,51 +1688,7 @@ INT_DEFINE_BEGIN(program_check) INT_DEFINE_END(program_check) EXC_REAL_BEGIN(program_check, 0x700, 0x100) - -#ifdef CONFIG_CPU_LITTLE_ENDIAN - /* - * There's a short window during boot where although the kernel is - * running little endian, any exceptions will cause the CPU to switch - * back to big endian. For example a WARN() boils down to a trap - * instruction, which will cause a program check, and we end up here but - * with the CPU in big endian mode. The first instruction of the program - * check handler (in GEN_INT_ENTRY below) is an mtsprg, which when - * executed in the wrong endian is an lhzu with a ~3GB displacement from - * r3. The content of r3 is random, so that is a load from some random - * location, and depending on the system can easily lead to a checkstop, - * or an infinitely recursive page fault. - * - * So to handle that case we have a trampoline here that can detect we - * are in the wrong endian and flip us back to the correct endian. We - * can't flip MSR[LE] using mtmsr, so we have to use rfid. That requires - * backing up SRR0/1 as well as a GPR. To do that we use SPRG0/2/3, as - * SPRG1 is already used for the paca. SPRG3 is user readable, but this - * trampoline is only active very early in boot, and SPRG3 will be - * reinitialised in vdso_getcpu_init() before userspace starts. - */ -BEGIN_FTR_SECTION - tdi 0,0,0x48 // Trap never, or in reverse endian: b . + 8 - b 1f // Skip trampoline if endian is correct - .long 0xa643707d // mtsprg 0, r11 Backup r11 - .long 0xa6027a7d // mfsrr0 r11 - .long 0xa643727d // mtsprg 2, r11 Backup SRR0 in SPRG2 - .long 0xa6027b7d // mfsrr1 r11 - .long 0xa643737d // mtsprg 3, r11 Backup SRR1 in SPRG3 - .long 0xa600607d // mfmsr r11 - .long 0x01006b69 // xori r11, r11, 1 Invert MSR[LE] - .long 0xa6037b7d // mtsrr1 r11 - .long 0x34076039 // li r11, 0x734 - .long 0xa6037a7d // mtsrr0 r11 - .long 0x2400004c // rfid - mfsprg r11, 3 - mtsrr1 r11 // Restore SRR1 - mfsprg r11, 2 - mtsrr0 r11 // Restore SRR0 - mfsprg r11, 0 // Restore r11 -1: -END_FTR_SECTION(0, 1) // nop out after boot -#endif /* CONFIG_CPU_LITTLE_ENDIAN */ - + EARLY_BOOT_FIXUP GEN_INT_ENTRY program_check, virt=0 EXC_REAL_END(program_check, 0x700, 0x100) EXC_VIRT_BEGIN(program_check, 0x4700, 0x100) @@ -2794,6 +2819,20 @@ masked_Hinterrupt: masked_interrupt: .endif stw r9,PACA_EXGEN+EX_CCR(r13) +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + /* + * Ensure there was no previous MUST_HARD_MASK interrupt or + * HARD_DIS setting. If this does fire, the interrupt is still + * masked and MSR[EE] will be cleared on return, so no need to + * panic, but somebody probably enabled MSR[EE] under + * PACA_IRQ_HARD_DIS, mtmsr(mfmsr() | MSR_x) being a common + * cause. + */ + lbz r9,PACAIRQHAPPENED(r13) + andi. r9,r9,(PACA_IRQ_MUST_HARD_MASK|PACA_IRQ_HARD_DIS) +0: tdnei r9,0 + EMIT_WARN_ENTRY 0b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) +#endif lbz r9,PACAIRQHAPPENED(r13) or r9,r9,r10 stb r9,PACAIRQHAPPENED(r13) @@ -3034,22 +3073,6 @@ EXPORT_SYMBOL(do_uaccess_flush) MASKED_INTERRUPT MASKED_INTERRUPT hsrr=1 - /* - * Relocation-on interrupts: A subset of the interrupts can be delivered - * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering - * it. Addresses are the same as the original interrupt addresses, but - * offset by 0xc000000000004000. - * It's impossible to receive interrupts below 0x300 via this mechanism. - * KVM: None of these traps are from the guest ; anything that escalated - * to HV=1 from HV=0 is delivered via real mode handlers. - */ - - /* - * This uses the standard macro, since the original 0x300 vector - * only has extra guff for STAB-based processors -- which never - * come here. - */ - USE_FIXED_SECTION(virt_trampolines) /* * All code below __end_soft_masked is treated as soft-masked. If @@ -3075,7 +3098,7 @@ CLOSE_FIXED_SECTION(virt_trampolines); USE_TEXT_SECTION() /* MSR[RI] should be clear because this uses SRR[01] */ -enable_machine_check: +_GLOBAL(enable_machine_check) mflr r0 bcl 20,31,$+4 0: mflr r3 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index cf2c08902c05..dedcc6fe2263 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -143,7 +143,7 @@ DEFINE_FIXED_SYMBOL(__run_at_load, first_256B) .globl __secondary_hold __secondary_hold: FIXUP_ENDIAN -#ifndef CONFIG_PPC_BOOK3E +#ifndef CONFIG_PPC_BOOK3E_64 mfmsr r24 ori r24,r24,MSR_RI mtmsrd r24 /* RI on */ @@ -160,7 +160,7 @@ __secondary_hold: sync li r26,0 -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 tovirt(r26,r26) #endif /* All secondary cpus wait here until told to start. */ @@ -169,7 +169,7 @@ __secondary_hold: beq 100b #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 tovirt(r12,r12) #endif mtctr r12 @@ -178,7 +178,7 @@ __secondary_hold: * it may be the case that other platforms have r4 right to * begin with, this gives us some safety in case it is not */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 mr r4,r25 #else li r4,0 @@ -192,13 +192,6 @@ __secondary_hold: #endif CLOSE_FIXED_SECTION(first_256B) -/* This value is used to mark exception frames on the stack. */ - .section ".toc","aw" -/* This value is used to mark exception frames on the stack. */ -exception_marker: - .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER - .previous - /* * On server, we include the exception vectors code here as it * relies on absolute addressing which is only possible within @@ -214,7 +207,7 @@ USE_TEXT_SECTION() #include "interrupt_64.S" -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* * The booting_thread_hwid holds the thread id we want to boot in cpu * hotplug case. It is set by cpu hotplug code, and is invalid by default. @@ -322,7 +315,7 @@ _GLOBAL(fsl_secondary_thread_init) bl book3e_secondary_thread_init b generic_secondary_common_init -#endif /* CONFIG_PPC_BOOK3E */ +#endif /* CONFIG_PPC_BOOK3E_64 */ /* * On pSeries and most other platforms, secondary processors spin @@ -345,7 +338,7 @@ _GLOBAL(generic_secondary_smp_init) bl relative_toc tovirt(r2,r2) -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* Book3E initialization */ mr r3,r24 mr r4,r25 @@ -400,8 +393,12 @@ generic_secondary_common_init: #else LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */ ld r8,0(r8) /* Get base vaddr of array */ +#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS) + LOAD_REG_IMMEDIATE(r7, NR_CPUS) +#else LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */ lwz r7,0(r7) /* also the max paca allocated */ +#endif li r5,0 /* logical cpu id */ 1: sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */ @@ -417,7 +414,7 @@ generic_secondary_common_init: b kexec_wait /* next kernel might do better */ 2: SET_PACA(r13) -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */ mtspr SPRN_SPRG_TLB_EXFRAME,r12 #endif @@ -494,6 +491,9 @@ __start_initialization_multiplatform: /* Make sure we are running in 64 bits mode */ bl enable_64b_mode + /* Zero r13 (paca) so early program check / mce don't use it */ + li r13,0 + /* Get TOC pointer (current runtime address) */ bl relative_toc @@ -519,7 +519,7 @@ __start_initialization_multiplatform: mr r29,r9 #endif -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 bl start_initialization_book3e b __after_prom_start #else @@ -540,7 +540,7 @@ __start_initialization_multiplatform: /* Switch off MMU if not already off */ bl __mmu_off b __after_prom_start -#endif /* CONFIG_PPC_BOOK3E */ +#endif /* CONFIG_PPC_BOOK3E_64 */ __REF __boot_from_prom: @@ -587,11 +587,11 @@ __after_prom_start: /* process relocations for the final address of the kernel */ lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */ sldi r25,r25,32 -#if defined(CONFIG_PPC_BOOK3E) +#if defined(CONFIG_PPC_BOOK3E_64) tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ #endif lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) -#if defined(CONFIG_PPC_BOOK3E) +#if defined(CONFIG_PPC_BOOK3E_64) tophys(r26,r26) #endif cmplwi cr0,r7,1 /* flagged to stay where we are ? */ @@ -599,7 +599,7 @@ __after_prom_start: add r25,r25,r26 1: mr r3,r25 bl relocate -#if defined(CONFIG_PPC_BOOK3E) +#if defined(CONFIG_PPC_BOOK3E_64) /* IVPR needs to be set after relocation. */ bl init_core_book3e #endif @@ -613,11 +613,11 @@ __after_prom_start: * Note: This process overwrites the OF exception vectors. */ li r3,0 /* target addr */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */ #endif mr. r4,r26 /* In some cases the loader may */ -#if defined(CONFIG_PPC_BOOK3E) +#if defined(CONFIG_PPC_BOOK3E_64) tovirt(r4,r4) #endif beq 9f /* have already put us at zero */ @@ -630,14 +630,14 @@ __after_prom_start: * variable __run_at_load, if it is set the kernel is treated as relocatable * kernel, otherwise it will be moved to PHYSICAL_START */ -#if defined(CONFIG_PPC_BOOK3E) +#if defined(CONFIG_PPC_BOOK3E_64) tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ #endif lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) cmplwi cr0,r7,1 bne 3f -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 LOAD_REG_ADDR(r5, __end_interrupts) LOAD_REG_ADDR(r11, _stext) sub r5,r5,r11 @@ -848,7 +848,7 @@ __secondary_start: * before going into C code. */ start_secondary_prolog: - ld r2,PACATOC(r13) + LOAD_PACA_TOC() li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ bl start_secondary @@ -871,10 +871,10 @@ _GLOBAL(start_secondary_resume) */ enable_64b_mode: mfmsr r11 /* grab the current MSR */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ mtmsr r11 -#else /* CONFIG_PPC_BOOK3E */ +#else /* CONFIG_PPC_BOOK3E_64 */ LOAD_REG_IMMEDIATE(r12, MSR_64BIT) or r11,r11,r12 mtmsrd r11 @@ -940,7 +940,7 @@ start_here_multiplatform: std r29,8(r11); #endif -#ifndef CONFIG_PPC_BOOK3E +#ifndef CONFIG_PPC_BOOK3E_64 mfmsr r6 ori r6,r6,MSR_RI mtmsrd r6 /* RI on */ @@ -988,7 +988,7 @@ start_here_common: std r1,PACAKSAVE(r13) /* Load the TOC (virtual address) */ - ld r2,PACATOC(r13) + LOAD_PACA_TOC() /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_85xx.S index f0db4f52bc00..52c0ab416326 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -129,7 +129,7 @@ _GLOBAL(_start); /* * For the second relocation, we already set the right tlb entries - * for the kernel space, so skip the code in fsl_booke_entry_mapping.S + * for the kernel space, so skip the code in 85xx_entry_mapping.S */ cmpwi r19,1 beq set_ivor @@ -159,7 +159,7 @@ _GLOBAL(__early_start) lwz r20,0(r20) #define ENTRY_MAPPING_BOOT_SETUP -#include "fsl_booke_entry_mapping.S" +#include "85xx_entry_mapping.S" #undef ENTRY_MAPPING_BOOT_SETUP set_ivor: @@ -912,7 +912,7 @@ get_phys_addr: * Global functions */ -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 #ifndef CONFIG_PPC_E500MC /* Adjust or setup IVORs for e500v1/v2 */ _GLOBAL(__setup_e500_ivors) @@ -955,7 +955,7 @@ _GLOBAL(__setup_ehv_ivors) sync blr #endif /* CONFIG_PPC_E500MC */ -#endif /* CONFIG_E500 */ +#endif /* CONFIG_PPC_E500 */ #ifdef CONFIG_SPE /* diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index bb6d5d0fc4ac..1cb9d0f7cbf2 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -34,7 +34,7 @@ */ #define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4)) -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 #define BOOKE_CLEAR_BTB(reg) \ START_BTB_FLUSH_SECTION \ BTB_FLUSH(reg) \ @@ -103,7 +103,7 @@ END_BTB_FLUSH_SECTION .endm .macro prepare_transfer_to_handler -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 andi. r12,r9,MSR_PR bne 777f bl prepare_transfer_to_handler @@ -242,7 +242,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) .macro SAVE_MMU_REGS -#ifdef CONFIG_PPC_BOOK3E_MMU +#ifdef CONFIG_PPC_E500 mfspr r0,SPRN_MAS0 stw r0,MAS0(r1) mfspr r0,SPRN_MAS1 @@ -257,7 +257,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mfspr r0,SPRN_MAS7 stw r0,MAS7(r1) #endif /* CONFIG_PHYS_64BIT */ -#endif /* CONFIG_PPC_BOOK3E_MMU */ +#endif /* CONFIG_PPC_E500 */ #ifdef CONFIG_44x mfspr r0,SPRN_MMUCR stw r0,MMUCR(r1) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 2669f80b3a49..8db1a15d7acb 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/smp.h> +#include <linux/spinlock.h> #include <linux/debugfs.h> #include <linux/init.h> @@ -129,7 +130,14 @@ struct breakpoint { bool ptrace_bp; }; +/* + * While kernel/events/hw_breakpoint.c does its own synchronization, we cannot + * rely on it safely synchronizing internals here; however, we can rely on it + * not requesting more breakpoints than available. + */ +static DEFINE_SPINLOCK(cpu_bps_lock); static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]); +static DEFINE_SPINLOCK(task_bps_lock); static LIST_HEAD(task_bps); static struct breakpoint *alloc_breakpoint(struct perf_event *bp) @@ -174,7 +182,9 @@ static int task_bps_add(struct perf_event *bp) if (IS_ERR(tmp)) return PTR_ERR(tmp); + spin_lock(&task_bps_lock); list_add(&tmp->list, &task_bps); + spin_unlock(&task_bps_lock); return 0; } @@ -182,6 +192,7 @@ static void task_bps_remove(struct perf_event *bp) { struct list_head *pos, *q; + spin_lock(&task_bps_lock); list_for_each_safe(pos, q, &task_bps) { struct breakpoint *tmp = list_entry(pos, struct breakpoint, list); @@ -191,6 +202,7 @@ static void task_bps_remove(struct perf_event *bp) break; } } + spin_unlock(&task_bps_lock); } /* @@ -200,12 +212,17 @@ static void task_bps_remove(struct perf_event *bp) static bool all_task_bps_check(struct perf_event *bp) { struct breakpoint *tmp; + bool ret = false; + spin_lock(&task_bps_lock); list_for_each_entry(tmp, &task_bps, list) { - if (!can_co_exist(tmp, bp)) - return true; + if (!can_co_exist(tmp, bp)) { + ret = true; + break; + } } - return false; + spin_unlock(&task_bps_lock); + return ret; } /* @@ -215,13 +232,18 @@ static bool all_task_bps_check(struct perf_event *bp) static bool same_task_bps_check(struct perf_event *bp) { struct breakpoint *tmp; + bool ret = false; + spin_lock(&task_bps_lock); list_for_each_entry(tmp, &task_bps, list) { if (tmp->bp->hw.target == bp->hw.target && - !can_co_exist(tmp, bp)) - return true; + !can_co_exist(tmp, bp)) { + ret = true; + break; + } } - return false; + spin_unlock(&task_bps_lock); + return ret; } static int cpu_bps_add(struct perf_event *bp) @@ -234,6 +256,7 @@ static int cpu_bps_add(struct perf_event *bp) if (IS_ERR(tmp)) return PTR_ERR(tmp); + spin_lock(&cpu_bps_lock); cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); for (i = 0; i < nr_wp_slots(); i++) { if (!cpu_bp[i]) { @@ -241,6 +264,7 @@ static int cpu_bps_add(struct perf_event *bp) break; } } + spin_unlock(&cpu_bps_lock); return 0; } @@ -249,6 +273,7 @@ static void cpu_bps_remove(struct perf_event *bp) struct breakpoint **cpu_bp; int i = 0; + spin_lock(&cpu_bps_lock); cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); for (i = 0; i < nr_wp_slots(); i++) { if (!cpu_bp[i]) @@ -260,19 +285,25 @@ static void cpu_bps_remove(struct perf_event *bp) break; } } + spin_unlock(&cpu_bps_lock); } static bool cpu_bps_check(int cpu, struct perf_event *bp) { struct breakpoint **cpu_bp; + bool ret = false; int i; + spin_lock(&cpu_bps_lock); cpu_bp = per_cpu_ptr(cpu_bps, cpu); for (i = 0; i < nr_wp_slots(); i++) { - if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) - return true; + if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) { + ret = true; + break; + } } - return false; + spin_unlock(&cpu_bps_lock); + return ret; } static bool all_cpu_bps_check(struct perf_event *bp) @@ -286,10 +317,6 @@ static bool all_cpu_bps_check(struct perf_event *bp) return false; } -/* - * We don't use any locks to serialize accesses to cpu_bps or task_bps - * because are already inside nr_bp_mutex. - */ int arch_reserve_bp_slot(struct perf_event *bp) { int ret; diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_64e.S index cc008de58b05..0fc680e03dee 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_64e.S @@ -2,7 +2,7 @@ /* * Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org> * - * Generic idle routine for Book3E processors + * Generic idle routine for 64 bits e500 processors */ #include <linux/threads.h> @@ -16,8 +16,6 @@ #include <asm/hw_irq.h> /* 64-bit version only for now */ -#ifdef CONFIG_PPC64 - .macro BOOK3E_IDLE name loop _GLOBAL(\name) /* Save LR for later */ @@ -77,7 +75,7 @@ _GLOBAL(\name) .macro BOOK3E_IDLE_LOOP 1: - PPC_WAIT(0) + PPC_WAIT_v203 b 1b .endm @@ -98,6 +96,4 @@ epapr_ev_idle_start: BOOK3E_IDLE epapr_ev_idle EPAPR_EV_IDLE_LOOP -BOOK3E_IDLE book3e_idle BOOK3E_IDLE_LOOP - -#endif /* CONFIG_PPC64 */ +BOOK3E_IDLE e500_idle BOOK3E_IDLE_LOOP diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_85xx.S index 9e1bc4502c50..9e1bc4502c50 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_85xx.S diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 0e75cb03244a..f9db0a172401 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -431,16 +431,6 @@ again: if (unlikely(stack_store)) __hard_EE_RI_disable(); - /* - * Returning to a kernel context with local irqs disabled. - * Here, if EE was enabled in the interrupted context, enable - * it on return as well. A problem exists here where a soft - * masked interrupt may have cleared MSR[EE] and set HARD_DIS - * here, and it will still exist on return to the caller. This - * will be resolved by the masked interrupt firing again. - */ - if (regs->msr & MSR_EE) - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; #endif /* CONFIG_PPC64 */ } diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index ce25b28cf418..978a173eb339 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -13,16 +13,6 @@ #include <asm/ppc_asm.h> #include <asm/ptrace.h> - .section ".toc","aw" -SYS_CALL_TABLE: - .tc sys_call_table[TC],sys_call_table - -#ifdef CONFIG_COMPAT -COMPAT_SYS_CALL_TABLE: - .tc compat_sys_call_table[TC],compat_sys_call_table -#endif - .previous - .align 7 .macro DEBUG_SRR_VALID srr @@ -67,16 +57,11 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r0,GPR0(r1) std r10,GPR1(r1) std r2,GPR2(r1) - ld r2,PACATOC(r13) + LOAD_PACA_TOC() mfcr r12 li r11,0 - /* Can we avoid saving r3-r8 in common case? */ - std r3,GPR3(r1) - std r4,GPR4(r1) - std r5,GPR5(r1) - std r6,GPR6(r1) - std r7,GPR7(r1) - std r8,GPR8(r1) + /* Save syscall parameters in r3-r8 */ + SAVE_GPRS(3, 8, r1) /* Zero r9-r12, this should only be required when restoring all GPRs */ std r11,GPR9(r1) std r11,GPR10(r1) @@ -91,9 +76,12 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) li r11,\trapnr std r11,_TRAP(r1) std r12,_CCR(r1) - addi r10,r1,STACK_FRAME_OVERHEAD - ld r11,exception_marker@toc(r2) - std r11,-16(r10) /* "regshere" marker */ + std r3,ORIG_GPR3(r1) + /* Calling convention has r3 = regs, r4 = orig r0 */ + addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r0 + LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) + std r11,-16(r3) /* "regshere" marker */ BEGIN_FTR_SECTION HMT_MEDIUM @@ -108,8 +96,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * but this is the best we can do. */ - /* Calling convention has r9 = orig r0, r10 = regs */ - mr r9,r0 bl system_call_exception .Lsyscall_vectored_\name\()_exit: @@ -148,17 +134,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Could zero these as per ABI, but we may consider a stricter ABI * which preserves these if libc implementations can benefit, so * restore them for now until further measurement is done. */ - ld r0,GPR0(r1) - ld r4,GPR4(r1) - ld r5,GPR5(r1) - ld r6,GPR6(r1) - ld r7,GPR7(r1) - ld r8,GPR8(r1) + REST_GPR(0, r1) + REST_GPRS(4, 8, r1) /* Zero volatile regs that may contain sensitive kernel data */ - li r9,0 - li r10,0 - li r11,0 - li r12,0 + ZEROIZE_GPRS(9, 12) mtspr SPRN_XER,r0 /* @@ -181,7 +160,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r5,_XER(r1) REST_NVGPRS(r1) - ld r0,GPR0(r1) + REST_GPR(0, r1) mtcr r2 mtctr r3 mtlr r4 @@ -195,7 +174,7 @@ syscall_vectored_\name\()_restart: _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) - ld r2,PACATOC(r13) + LOAD_PACA_TOC() ld r3,RESULT(r1) addi r4,r1,STACK_FRAME_OVERHEAD li r11,IRQS_ALL_DISABLED @@ -240,21 +219,16 @@ _ASM_NOKPROBE_SYMBOL(system_call_common) std r0,GPR0(r1) std r10,GPR1(r1) std r2,GPR2(r1) -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 START_BTB_FLUSH_SECTION BTB_FLUSH(r10) END_BTB_FLUSH_SECTION #endif - ld r2,PACATOC(r13) + LOAD_PACA_TOC() mfcr r12 li r11,0 - /* Can we avoid saving r3-r8 in common case? */ - std r3,GPR3(r1) - std r4,GPR4(r1) - std r5,GPR5(r1) - std r6,GPR6(r1) - std r7,GPR7(r1) - std r8,GPR8(r1) + /* Save syscall parameters in r3-r8 */ + SAVE_GPRS(3, 8, r1) /* Zero r9-r12, this should only be required when restoring all GPRs */ std r11,GPR9(r1) std r11,GPR10(r1) @@ -275,9 +249,12 @@ END_BTB_FLUSH_SECTION std r10,_LINK(r1) std r11,_TRAP(r1) std r12,_CCR(r1) - addi r10,r1,STACK_FRAME_OVERHEAD - ld r11,exception_marker@toc(r2) - std r11,-16(r10) /* "regshere" marker */ + std r3,ORIG_GPR3(r1) + /* Calling convention has r3 = regs, r4 = orig r0 */ + addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r0 + LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) + std r11,-16(r3) /* "regshere" marker */ #ifdef CONFIG_PPC_BOOK3S li r11,1 @@ -298,8 +275,6 @@ END_BTB_FLUSH_SECTION wrteei 1 #endif - /* Calling convention has r9 = orig r0, r10 = regs */ - mr r9,r0 bl system_call_exception .Lsyscall_exit: @@ -343,16 +318,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) cmpdi r3,0 bne .Lsyscall_restore_regs /* Zero volatile regs that may contain sensitive kernel data */ - li r0,0 - li r4,0 - li r5,0 - li r6,0 - li r7,0 - li r8,0 - li r9,0 - li r10,0 - li r11,0 - li r12,0 + ZEROIZE_GPR(0) + ZEROIZE_GPRS(4, 12) mtctr r0 mtspr SPRN_XER,r0 .Lsyscall_restore_regs_cont: @@ -378,7 +345,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) REST_NVGPRS(r1) mtctr r3 mtspr SPRN_XER,r4 - ld r0,GPR0(r1) + REST_GPR(0, r1) REST_GPRS(4, 12, r1) b .Lsyscall_restore_regs_cont .Lsyscall_rst_end: @@ -388,7 +355,7 @@ syscall_restart: _ASM_NOKPROBE_SYMBOL(syscall_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) - ld r2,PACATOC(r13) + LOAD_PACA_TOC() ld r3,RESULT(r1) addi r4,r1,STACK_FRAME_OVERHEAD li r11,IRQS_ALL_DISABLED @@ -535,7 +502,7 @@ interrupt_return_\srr\()_user_restart: _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) - ld r2,PACATOC(r13) + LOAD_PACA_TOC() addi r3,r1,STACK_FRAME_OVERHEAD li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) @@ -559,15 +526,54 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) ld r11,SOFTE(r1) cmpwi r11,IRQS_ENABLED stb r11,PACAIRQSOFTMASK(r13) - bne 1f + beq .Linterrupt_return_\srr\()_soft_enabled + + /* + * Returning to soft-disabled context. + * Check if a MUST_HARD_MASK interrupt has become pending, in which + * case we need to disable MSR[EE] in the return context. + */ + ld r12,_MSR(r1) + andi. r10,r12,MSR_EE + beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled + lbz r11,PACAIRQHAPPENED(r13) + andi. r10,r11,PACA_IRQ_MUST_HARD_MASK + beq .Lfast_kernel_interrupt_return_\srr\() // No HARD_MASK pending + + /* Must clear MSR_EE from _MSR */ +#ifdef CONFIG_PPC_BOOK3S + li r10,0 + /* Clear valid before changing _MSR */ + .ifc \srr,srr + stb r10,PACASRR_VALID(r13) + .else + stb r10,PACAHSRR_VALID(r13) + .endif +#endif + xori r12,r12,MSR_EE + std r12,_MSR(r1) + b .Lfast_kernel_interrupt_return_\srr\() + +.Linterrupt_return_\srr\()_soft_enabled: + /* + * In the soft-enabled case, need to double-check that we have no + * pending interrupts that might have come in before we reached the + * restart section of code, and restart the exit so those can be + * handled. + * + * If there are none, it is be possible that the interrupt still + * has PACA_IRQ_HARD_DIS set, which needs to be cleared for the + * interrupted context. This clear will not clobber a new pending + * interrupt coming in, because we're in the restart section, so + * such would return to the restart location. + */ #ifdef CONFIG_PPC_BOOK3S lbz r11,PACAIRQHAPPENED(r13) andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l bne- interrupt_return_\srr\()_kernel_restart #endif li r11,0 - stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS -1: + stb r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS .Lfast_kernel_interrupt_return_\srr\(): cmpdi cr1,r3,0 @@ -619,7 +625,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) mtspr SPRN_XER,r5 /* - * Leaving a stale exception_marker on the stack can confuse + * Leaving a stale STACK_FRAME_REGS_MARKER on the stack can confuse * the reliable stack unwinder later on. Clear it. */ std r0,STACK_FRAME_OVERHEAD-16(r1) @@ -668,7 +674,7 @@ interrupt_return_\srr\()_kernel_restart: _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) - ld r2,PACATOC(r13) + LOAD_PACA_TOC() addi r3,r1,STACK_FRAME_OVERHEAD li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index 01645e03e9f0..eb2b380e52a0 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -68,6 +68,35 @@ int distribute_irqs = 1; +static inline void next_interrupt(struct pt_regs *regs) +{ + /* + * Softirq processing can enable/disable irqs, which will leave + * MSR[EE] enabled and the soft mask set to IRQS_DISABLED. Fix + * this up. + */ + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + else + irq_soft_mask_set(IRQS_ALL_DISABLED); + + /* + * We are responding to the next interrupt, so interrupt-off + * latencies should be reset here. + */ + trace_hardirqs_on(); + trace_hardirqs_off(); +} + +static inline bool irq_happened_test_and_clear(u8 irq) +{ + if (local_paca->irq_happened & irq) { + local_paca->irq_happened &= ~irq; + return true; + } + return false; +} + void replay_soft_interrupts(void) { struct pt_regs regs; @@ -79,18 +108,25 @@ void replay_soft_interrupts(void) * recurse into this function. Don't keep any state across * interrupt handler calls which may change underneath us. * + * Softirqs can not be disabled over replay to stop this recursion + * because interrupts taken in idle code may require RCU softirq + * to run in the irq RCU tracking context. This is a hard problem + * to fix without changes to the softirq or idle layer. + * * We use local_paca rather than get_paca() to avoid all the * debug_smp_processor_id() business in this low level function. */ + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON_ONCE(mfmsr() & MSR_EE); + WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)); + } + ppc_save_regs(®s); regs.softe = IRQS_ENABLED; regs.msr |= MSR_EE; again: - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(mfmsr() & MSR_EE); - /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. @@ -105,56 +141,47 @@ again: * This is a higher priority interrupt than the others, so * replay it first. */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { - local_paca->irq_happened &= ~PACA_IRQ_HMI; + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && + irq_happened_test_and_clear(PACA_IRQ_HMI)) { regs.trap = INTERRUPT_HMI; handle_hmi_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); + next_interrupt(®s); } - if (local_paca->irq_happened & PACA_IRQ_DEC) { - local_paca->irq_happened &= ~PACA_IRQ_DEC; + if (irq_happened_test_and_clear(PACA_IRQ_DEC)) { regs.trap = INTERRUPT_DECREMENTER; timer_interrupt(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); + next_interrupt(®s); } - if (local_paca->irq_happened & PACA_IRQ_EE) { - local_paca->irq_happened &= ~PACA_IRQ_EE; + if (irq_happened_test_and_clear(PACA_IRQ_EE)) { regs.trap = INTERRUPT_EXTERNAL; do_IRQ(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); + next_interrupt(®s); } - if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { - local_paca->irq_happened &= ~PACA_IRQ_DBELL; + if (IS_ENABLED(CONFIG_PPC_DOORBELL) && + irq_happened_test_and_clear(PACA_IRQ_DBELL)) { regs.trap = INTERRUPT_DOORBELL; doorbell_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); + next_interrupt(®s); } /* Book3E does not support soft-masking PMI interrupts */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { - local_paca->irq_happened &= ~PACA_IRQ_PMI; + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && + irq_happened_test_and_clear(PACA_IRQ_PMI)) { regs.trap = INTERRUPT_PERFMON; performance_monitor_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); + next_interrupt(®s); } - if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) { - /* - * We are responding to the next interrupt, so interrupt-off - * latencies should be reset here. - */ - trace_hardirqs_on(); - trace_hardirqs_off(); + /* + * Softirq processing can enable and disable interrupts, which can + * result in new irqs becoming pending. Must keep looping until we + * have cleared out all pending interrupts. + */ + if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) goto again; - } } #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) @@ -270,10 +297,12 @@ happened: trace_hardirqs_off(); replay_soft_interrupts_irqrestore(); - local_paca->irq_happened = 0; trace_hardirqs_on(); irq_soft_mask_set(IRQS_ENABLED); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS); + local_paca->irq_happened = 0; __hard_irq_enable(); preempt_enable(); } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index a20deebf233f..1a1e9995dae3 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -47,7 +47,7 @@ static struct hard_trap_info { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ #ifdef CONFIG_BOOKE_OR_40x { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ -#if defined(CONFIG_FSL_BOOKE) +#if defined(CONFIG_PPC_85xx) { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */ { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */ @@ -57,7 +57,7 @@ static struct hard_trap_info { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */ { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */ { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */ -#else /* ! CONFIG_FSL_BOOKE */ +#else /* ! CONFIG_PPC_85xx */ { 0x1000, 0x0e /* SIGALRM */ }, /* prog interval timer */ { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */ { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */ @@ -208,7 +208,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) for (reg = 14; reg < 32; reg++) PACK64(ptr, regs->gpr[reg]); -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_85xx #ifdef CONFIG_SPE for (reg = 0; reg < 32; reg++) PACK64(ptr, p->thread.evr[reg]); @@ -234,7 +234,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) #define GDB_SIZEOF_REG sizeof(unsigned long) #define GDB_SIZEOF_REG_U32 sizeof(u32) -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_85xx #define GDB_SIZEOF_FLOAT_REG sizeof(unsigned long) #else #define GDB_SIZEOF_FLOAT_REG sizeof(u64) @@ -329,7 +329,7 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) if (regno >= 32 && regno < 64) { /* FP registers 32 -> 63 */ -#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) +#if defined(CONFIG_PPC_85xx) && defined(CONFIG_SPE) if (current) memcpy(mem, ¤t->thread.evr[regno-32], dbg_reg_def[regno].size); @@ -355,7 +355,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) if (regno >= 32 && regno < 64) { /* FP registers 32 -> 63 */ -#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) +#if defined(CONFIG_PPC_85xx) && defined(CONFIG_SPE) memcpy(¤t->thread.evr[regno-32], mem, dbg_reg_def[regno].size); #else diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 912d4f8a13be..bd7b1a035459 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -161,7 +161,13 @@ int arch_prepare_kprobe(struct kprobe *p) preempt_disable(); prev = get_kprobe(p->addr - 1); preempt_enable_no_resched(); - if (prev && ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) { + + /* + * When prev is a ftrace-based kprobe, we don't have an insn, and it + * doesn't probe for prefixed instruction. + */ + if (prev && !kprobe_ftrace(prev) && + ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 6568823cf306..5b3c093611ba 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -455,7 +455,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features) kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt); break; -#ifdef CONFIG_PPC_BOOK3E_MMU +#ifdef CONFIG_PPC_E500 case KVM_INST_MFSPR(SPRN_MAS0): if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt); @@ -484,7 +484,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features) if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt); break; -#endif /* CONFIG_PPC_BOOK3E_MMU */ +#endif /* CONFIG_PPC_E500 */ case KVM_INST_MFSPR(SPRN_SPRG4): #ifdef CONFIG_BOOKE @@ -557,7 +557,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features) case KVM_INST_MTSPR(SPRN_DSISR): kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt); break; -#ifdef CONFIG_PPC_BOOK3E_MMU +#ifdef CONFIG_PPC_E500 case KVM_INST_MTSPR(SPRN_MAS0): if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt); @@ -586,7 +586,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features) if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt); break; -#endif /* CONFIG_PPC_BOOK3E_MMU */ +#endif /* CONFIG_PPC_E500 */ case KVM_INST_MTSPR(SPRN_SPRG4): if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 5c58460b269a..f048c424c525 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -471,6 +471,8 @@ void __init find_legacy_serial_ports(void) } #endif + of_node_put(stdout); + DBG("legacy_serial_console = %d\n", legacy_serial_console); if (legacy_serial_console >= 0) setup_legacy_serial_console(legacy_serial_console); diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index fd6d8d3a548e..36184cada00b 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -286,7 +286,7 @@ kexec_flag: #ifdef CONFIG_KEXEC_CORE -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* * BOOK3E has no real MMU mode, so we have to setup the initial TLB * for a core to identity map v:0 to p:0. This current implementation @@ -354,7 +354,7 @@ _GLOBAL(kexec_smp_wait) * don't overwrite r3 here, it is live for kexec_wait above. */ real_mode: /* assume normal blr return */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* Create an identity mapping. */ b kexec_create_tlb #else @@ -413,7 +413,7 @@ _GLOBAL(kexec_sequence) lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */ /* disable interrupts, we are overwriting kernel data next */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 wrteei 0 #else mfmsr r3 diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index 5c7f0b4b784b..cd4e7bc32609 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -73,7 +73,7 @@ optprobe_template_entry: * further below. */ #ifdef CONFIG_PPC64 - ld r2,PACATOC(r13) + LOAD_PACA_TOC() #endif .global optprobe_template_op_address diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index ba593fd60124..be8db402e963 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -16,7 +16,6 @@ #include <asm/kexec.h> #include <asm/svm.h> #include <asm/ultravisor.h> -#include <asm/rtas.h> #include "setup.h" @@ -170,30 +169,6 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) } #endif /* CONFIG_PPC_64S_HASH_MMU */ -#ifdef CONFIG_PPC_PSERIES -/** - * new_rtas_args() - Allocates rtas args - * @cpu: CPU number - * @limit: Memory limit for this allocation - * - * Allocates a struct rtas_args and return it's pointer, - * if not in Hypervisor mode - * - * Return: Pointer to allocated rtas_args - * NULL if CPU in Hypervisor Mode - */ -static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit) -{ - limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX); - - if (early_cpu_has_feature(CPU_FTR_HVMODE)) - return NULL; - - return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES, - limit, cpu); -} -#endif /* CONFIG_PPC_PSERIES */ - /* The Paca is an array with one entry per processor. Each contains an * lppaca, which contains the information shared between the * hypervisor and Linux. @@ -211,7 +186,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) #ifdef CONFIG_PPC_PSERIES new_paca->lppaca_ptr = NULL; #endif -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 new_paca->kernel_pgd = swapper_pg_dir; #endif new_paca->lock_token = 0x8000; @@ -228,14 +203,10 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->slb_shadow_ptr = NULL; #endif -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* For now -- if we have threads this will be adjusted later */ new_paca->tcd_ptr = &new_paca->tcd; #endif - -#ifdef CONFIG_PPC_PSERIES - new_paca->rtas_args_reentrant = NULL; -#endif } /* Put the paca pointer into r13 and SPRG_PACA */ @@ -244,7 +215,7 @@ void setup_paca(struct paca_struct *new_paca) /* Setup r13 */ local_paca = new_paca; -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* On Book3E, initialize the TLB miss exception frames */ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); #else @@ -308,9 +279,6 @@ void __init allocate_paca(int cpu) #ifdef CONFIG_PPC_64S_HASH_MMU paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); #endif -#ifdef CONFIG_PPC_PSERIES - paca->rtas_args_reentrant = new_rtas_args(cpu, limit); -#endif paca_struct_size += sizeof(struct paca_struct); } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 31de91c8359c..d67cf79bf5d0 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -135,7 +135,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) list_add_tail(&phb->list_node, &hose_list); spin_unlock(&hose_spinlock); - phb->dn = dev; + phb->dn = of_node_get(dev); phb->is_dynamic = slab_is_available(); #ifdef CONFIG_PPC64 if (dev) { @@ -158,7 +158,7 @@ void pcibios_free_controller(struct pci_controller *phb) /* Clear bit of phb_bitmap to allow reuse of this PHB number. */ if (phb->global_number < MAX_PHBS) clear_bit(phb->global_number, phb_bitmap); - + of_node_put(phb->dn); list_del(&phb->list_node); spin_unlock(&hose_spinlock); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 7a35fc25a304..38561d6a2079 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -330,6 +330,7 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, INIT_LIST_HEAD(&pdn->list); parent = of_get_parent(dn); pdn->parent = parent ? PCI_DN(parent) : NULL; + of_node_put(parent); if (pdn->parent) list_add_tail(&pdn->list, &pdn->parent->child_list); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0fbda89cd1bb..67da147fe34d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -127,7 +127,7 @@ unsigned long notrace msr_check_and_set(unsigned long bits) newmsr |= MSR_VSX; if (oldmsr != newmsr) - mtmsr_isync(newmsr); + newmsr = mtmsr_isync_irqsafe(newmsr); return newmsr; } @@ -145,7 +145,7 @@ void notrace __msr_check_and_clear(unsigned long bits) newmsr &= ~MSR_VSX; if (oldmsr != newmsr) - mtmsr_isync(newmsr); + mtmsr_isync_irqsafe(newmsr); } EXPORT_SYMBOL(__msr_check_and_clear); @@ -1655,11 +1655,6 @@ EXPORT_SYMBOL_GPL(set_thread_tidr); #endif /* CONFIG_PPC64 */ -void -release_thread(struct task_struct *t) -{ -} - /* * this gets called so that we can store coprocessor state into memory and * copy the current task into the new thread. @@ -2308,6 +2303,6 @@ void notrace __ppc64_runlatch_off(void) unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() & ~PAGE_MASK; + sp -= prandom_u32_max(PAGE_SIZE); return sp & ~0xf; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index a730b951b64b..1eed87d954ba 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -30,6 +30,7 @@ #include <linux/libfdt.h> #include <linux/cpu.h> #include <linux/pgtable.h> +#include <linux/seq_buf.h> #include <asm/rtas.h> #include <asm/page.h> @@ -137,7 +138,7 @@ static void __init move_device_tree(void) } /* - * ibm,pa-features is a per-cpu property that contains a string of + * ibm,pa/pi-features is a per-cpu property that contains a string of * attribute descriptors, each of which has a 2 byte header plus up * to 254 bytes worth of processor attribute bits. First header * byte specifies the number of bytes following the header. @@ -149,15 +150,17 @@ static void __init move_device_tree(void) * is supported/not supported. Note that the bit numbers are * big-endian to match the definition in PAPR. */ -static struct ibm_pa_feature { +struct ibm_feature { unsigned long cpu_features; /* CPU_FTR_xxx bit */ unsigned long mmu_features; /* MMU_FTR_xxx bit */ unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */ unsigned int cpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */ - unsigned char pabyte; /* byte number in ibm,pa-features */ + unsigned char pabyte; /* byte number in ibm,pa/pi-features */ unsigned char pabit; /* bit number (big-endian) */ unsigned char invert; /* if 1, pa bit set => clear feature */ -} ibm_pa_features[] __initdata = { +}; + +static struct ibm_feature ibm_pa_features[] __initdata = { { .pabyte = 0, .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU }, { .pabyte = 0, .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU }, { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL }, @@ -179,9 +182,19 @@ static struct ibm_pa_feature { { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 }, }; +/* + * ibm,pi-features property provides the support of processor specific + * options not described in ibm,pa-features. Right now use byte 0, bit 3 + * which indicates the occurrence of DSI interrupt when the paste operation + * on the suspended NX window. + */ +static struct ibm_feature ibm_pi_features[] __initdata = { + { .pabyte = 0, .pabit = 3, .mmu_features = MMU_FTR_NX_DSI }, +}; + static void __init scan_features(unsigned long node, const unsigned char *ftrs, unsigned long tablelen, - struct ibm_pa_feature *fp, + struct ibm_feature *fp, unsigned long ft_size) { unsigned long i, len, bit; @@ -218,17 +231,18 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs, } } -static void __init check_cpu_pa_features(unsigned long node) +static void __init check_cpu_features(unsigned long node, char *name, + struct ibm_feature *fp, + unsigned long size) { const unsigned char *pa_ftrs; int tablelen; - pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen); + pa_ftrs = of_get_flat_dt_prop(node, name, &tablelen); if (pa_ftrs == NULL) return; - scan_features(node, pa_ftrs, tablelen, - ibm_pa_features, ARRAY_SIZE(ibm_pa_features)); + scan_features(node, pa_ftrs, tablelen, fp, size); } #ifdef CONFIG_PPC_64S_HASH_MMU @@ -376,11 +390,16 @@ static int __init early_init_dt_scan_cpus(unsigned long node, */ if (!dt_cpu_ftrs_in_use()) { prop = of_get_flat_dt_prop(node, "cpu-version", NULL); - if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) + if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) { identify_cpu(0, be32_to_cpup(prop)); + seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(prop)); + } check_cpu_feature_properties(node); - check_cpu_pa_features(node); + check_cpu_features(node, "ibm,pa-features", ibm_pa_features, + ARRAY_SIZE(ibm_pa_features)); + check_cpu_features(node, "ibm,pi-features", ibm_pi_features, + ARRAY_SIZE(ibm_pi_features)); } identical_pvr_fixup(node); @@ -696,6 +715,23 @@ static void __init tm_init(void) static void tm_init(void) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +static int __init +early_init_dt_scan_model(unsigned long node, const char *uname, + int depth, void *data) +{ + const char *prop; + + if (depth != 0) + return 0; + + prop = of_get_flat_dt_prop(node, "model", NULL); + if (prop) + seq_buf_printf(&ppc_hw_desc, "%s ", prop); + + /* break now */ + return 1; +} + #ifdef CONFIG_PPC64 static void __init save_fscr_to_task(void) { @@ -724,6 +760,8 @@ void __init early_init_devtree(void *params) if (!early_init_dt_verify(params)) panic("BUG: Failed verifying flat device tree, bad version?"); + of_scan_flat_dt(early_init_dt_scan_model, NULL); + #ifdef CONFIG_PPC_RTAS /* Some machines might need RTAS info for debugging, grab it now. */ of_scan_flat_dt(early_init_dt_scan_rtas, NULL); @@ -803,6 +841,9 @@ void __init early_init_devtree(void *params) dt_cpu_ftrs_scan(); + // We can now add the CPU name & PVR to the hardware description + seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR)); + /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a6669c40c1db..d464ba412084 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -96,12 +96,6 @@ static int of_workarounds __prombss; #define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */ #define OF_WA_LONGTRAIL 2 /* work around longtrail bugs */ -#define PROM_BUG() do { \ - prom_printf("kernel BUG at %s line 0x%x!\n", \ - __FILE__, __LINE__); \ - __builtin_trap(); \ -} while (0) - #ifdef DEBUG_PROM #define prom_debug(x...) prom_printf(x) #else diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index dfa5f729f774..311890d71c4c 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -26,8 +26,7 @@ _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start logo_linux_clut224 btext_prepare_BAT reloc_got2 kernstart_addr memstart_addr linux_banner _stext -__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC. -relocate" +btext_setup_display TOC. relocate" NM="$1" OBJ="$2" diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index 232e4549defe..efd52f2e7033 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -27,8 +27,8 @@ _GLOBAL(relocate) add r9,r9,r12 /* r9 has runtime addr of .rela.dyn section */ ld r10,(p_st - 0b)(r12) add r10,r10,r12 /* r10 has runtime addr of _stext */ - ld r13,(p_sym - 0b)(r12) - add r13,r13,r12 /* r13 has runtime addr of .dynsym */ + ld r4,(p_sym - 0b)(r12) + add r4,r4,r12 /* r4 has runtime addr of .dynsym */ /* * Scan the dynamic section for the RELA, RELASZ and RELAENT entries. @@ -84,16 +84,16 @@ _GLOBAL(relocate) ld r0,16(r9) /* reloc->r_addend */ b .Lstore .Luaddr64: - srdi r14,r0,32 /* ELF64_R_SYM(reloc->r_info) */ + srdi r5,r0,32 /* ELF64_R_SYM(reloc->r_info) */ clrldi r0,r0,32 cmpdi r0,R_PPC64_UADDR64 bne .Lnext ld r6,0(r9) ld r0,16(r9) - mulli r14,r14,24 /* 24 == sizeof(elf64_sym) */ - add r14,r14,r13 /* elf64_sym[ELF64_R_SYM] */ - ld r14,8(r14) - add r0,r0,r14 + mulli r5,r5,24 /* 24 == sizeof(elf64_sym) */ + add r5,r5,r4 /* elf64_sym[ELF64_R_SYM] */ + ld r5,8(r5) + add r0,r0,r5 .Lstore: add r0,r0,r3 stdx r0,r7,r6 diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 693133972294..e847f9b1c5b9 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -23,6 +23,7 @@ #include <linux/memblock.h> #include <linux/slab.h> #include <linux/reboot.h> +#include <linux/security.h> #include <linux/syscalls.h> #include <linux/of.h> #include <linux/of_fdt.h> @@ -43,7 +44,6 @@ #include <asm/time.h> #include <asm/mmu.h> #include <asm/topology.h> -#include <asm/paca.h> /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); @@ -464,6 +464,9 @@ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, va_end(list); } +static int ibm_open_errinjct_token; +static int ibm_errinjct_token; + int rtas_call(int token, int nargs, int nret, int *outputs, ...) { va_list list; @@ -476,6 +479,16 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) return -1; + if (token == ibm_open_errinjct_token || token == ibm_errinjct_token) { + /* + * It would be nicer to not discard the error value + * from security_locked_down(), but callers expect an + * RTAS status, not an errno. + */ + if (security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION)) + return -1; + } + if ((mfmsr() & (MSR_IR|MSR_DR)) != (MSR_IR|MSR_DR)) { WARN_ON_ONCE(1); return -1; @@ -932,59 +945,6 @@ void rtas_activate_firmware(void) pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } -#ifdef CONFIG_PPC_PSERIES -/** - * rtas_call_reentrant() - Used for reentrant rtas calls - * @token: Token for desired reentrant RTAS call - * @nargs: Number of Input Parameters - * @nret: Number of Output Parameters - * @outputs: Array of outputs - * @...: Inputs for desired RTAS call - * - * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off", - * "ibm,get-xive" and "ibm,set-xive" are currently reentrant. - * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but - * PACA one instead. - * - * Return: -1 on error, - * First output value of RTAS call if (nret > 0), - * 0 otherwise, - */ -int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) -{ - va_list list; - struct rtas_args *args; - unsigned long flags; - int i, ret = 0; - - if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) - return -1; - - local_irq_save(flags); - preempt_disable(); - - /* We use the per-cpu (PACA) rtas args buffer */ - args = local_paca->rtas_args_reentrant; - - va_start(list, outputs); - va_rtas_call_unlocked(args, token, nargs, nret, list); - va_end(list); - - if (nret > 1 && outputs) - for (i = 0; i < nret - 1; ++i) - outputs[i] = be32_to_cpu(args->rets[i + 1]); - - if (nret > 0) - ret = be32_to_cpu(args->rets[0]); - - local_irq_restore(flags); - preempt_enable(); - - return ret; -} - -#endif /* CONFIG_PPC_PSERIES */ - /** * get_pseries_errorlog() - Find a specific pseries error log in an RTAS * extended event log. @@ -1227,6 +1187,14 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) if (block_rtas_call(token, nargs, &args)) return -EINVAL; + if (token == ibm_open_errinjct_token || token == ibm_errinjct_token) { + int err; + + err = security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION); + if (err) + return err; + } + /* Need to handle ibm,suspend_me call specially */ if (token == rtas_token("ibm,suspend-me")) { @@ -1325,7 +1293,8 @@ void __init rtas_initialize(void) #ifdef CONFIG_RTAS_ERROR_LOGGING rtas_last_error_token = rtas_token("rtas-last-error"); #endif - + ibm_open_errinjct_token = rtas_token("ibm,open-errinjct"); + ibm_errinjct_token = rtas_token("ibm,errinjct"); rtas_syscall_filter_init(); } diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index d96fd14bd7c9..206475e3e0b4 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -16,6 +16,7 @@ #include <asm/asm-prototypes.h> #include <asm/code-patching.h> #include <asm/security_features.h> +#include <asm/sections.h> #include <asm/setup.h> #include <asm/inst.h> @@ -34,7 +35,7 @@ static enum branch_cache_flush_type link_stack_flush_type = BRANCH_CACHE_FLUSH_N bool barrier_nospec_enabled; static bool no_nospec; static bool btb_flush_enabled; -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64) +#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64) static bool no_spectrev2; #endif @@ -121,7 +122,7 @@ static __init int security_feature_debugfs_init(void) device_initcall(security_feature_debugfs_init); #endif /* CONFIG_DEBUG_FS */ -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64) +#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64) static int __init handle_nospectre_v2(char *p) { no_spectrev2 = true; @@ -129,9 +130,9 @@ static int __init handle_nospectre_v2(char *p) return 0; } early_param("nospectre_v2", handle_nospectre_v2); -#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_E500 || CONFIG_PPC_BOOK3S_64 */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 void __init setup_spectre_v2(void) { if (no_spectrev2 || cpu_mitigations_off()) @@ -139,7 +140,7 @@ void __init setup_spectre_v2(void) else btb_flush_enabled = true; } -#endif /* CONFIG_PPC_FSL_BOOK3E */ +#endif /* CONFIG_PPC_E500 */ #ifdef CONFIG_PPC_BOOK3S_64 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index dd98f43bd685..6d041993a45d 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -18,6 +18,7 @@ #include <linux/delay.h> #include <linux/initrd.h> #include <linux/platform_device.h> +#include <linux/printk.h> #include <linux/seq_file.h> #include <linux/ioport.h> #include <linux/console.h> @@ -25,6 +26,7 @@ #include <linux/root_dev.h> #include <linux/cpu.h> #include <linux/unistd.h> +#include <linux/seq_buf.h> #include <linux/serial.h> #include <linux/serial_8250.h> #include <linux/percpu.h> @@ -588,6 +590,15 @@ static __init int add_pcspkr(void) device_initcall(add_pcspkr); #endif /* CONFIG_PCSPKR_PLATFORM */ +static char ppc_hw_desc_buf[128] __initdata; + +struct seq_buf ppc_hw_desc __initdata = { + .buffer = ppc_hw_desc_buf, + .size = sizeof(ppc_hw_desc_buf), + .len = 0, + .readpos = 0, +}; + static __init void probe_machine(void) { extern struct machdep_calls __machine_desc_start; @@ -628,7 +639,13 @@ static __init void probe_machine(void) for (;;); } - printk(KERN_INFO "Using %s machine description\n", ppc_md.name); + // Append the machine name to other info we've gathered + seq_buf_puts(&ppc_hw_desc, ppc_md.name); + + // Set the generic hardware description shown in oopses + dump_stack_set_arch_desc(ppc_hw_desc.buffer); + + pr_info("Hardware name: %s\n", ppc_hw_desc.buffer); } /* Match a class of boards, not a specific device configuration. */ diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 93f22da12abe..7912bb50a7cb 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -23,7 +23,7 @@ void check_smt_enabled(void); static inline void check_smt_enabled(void) { } #endif -#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP) void setup_tlb_core_data(void); #else static inline void setup_tlb_core_data(void) { } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 813261789303..b761cc1a403c 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -207,7 +207,7 @@ void __init setup_power_save(void) ppc_md.power_save = ppc6xx_idle; #endif -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 if (cpu_has_feature(CPU_FTR_CAN_DOZE) || cpu_has_feature(CPU_FTR_CAN_NAP)) ppc_md.power_save = e500_idle; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2b2d0b0fbb30..a0dee7354fe6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -34,6 +34,7 @@ #include <linux/of.h> #include <linux/of_fdt.h> +#include <asm/asm-prototypes.h> #include <asm/kvm_guest.h> #include <asm/io.h> #include <asm/kdump.h> @@ -86,7 +87,7 @@ struct ppc64_caches ppc64_caches = { }; EXPORT_SYMBOL_GPL(ppc64_caches); -#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP) void __init setup_tlb_core_data(void) { int cpu; @@ -176,14 +177,26 @@ early_param("smt-enabled", early_smt_enabled); #endif /* CONFIG_SMP */ /** Fix up paca fields required for the boot cpu */ -static void __init fixup_boot_paca(void) +static void __init fixup_boot_paca(struct paca_struct *boot_paca) { /* The boot cpu is started */ - get_paca()->cpu_start = 1; + boot_paca->cpu_start = 1; +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * Give the early boot machine check stack somewhere to use, use + * half of the init stack. This is a bit hacky but there should not be + * deep stack usage in early init so shouldn't overflow it or overwrite + * things. + */ + boot_paca->mc_emergency_sp = (void *)&init_thread_union + + (THREAD_SIZE/2); +#endif /* Allow percpu accesses to work until we setup percpu data */ - get_paca()->data_offset = 0; - /* Mark interrupts disabled in PACA */ - irq_soft_mask_set(IRQS_DISABLED); + boot_paca->data_offset = 0; + /* Mark interrupts soft and hard disabled in PACA */ + boot_paca->irq_soft_mask = IRQS_DISABLED; + boot_paca->irq_happened = PACA_IRQ_HARD_DIS; + WARN_ON(mfmsr() & MSR_EE); } static void __init configure_exceptions(void) @@ -350,11 +363,15 @@ void __init early_setup(unsigned long dt_ptr) * what CPU we are on. */ initialise_paca(&boot_paca, 0); - setup_paca(&boot_paca); - fixup_boot_paca(); + fixup_boot_paca(&boot_paca); + WARN_ON(local_paca != 0); + setup_paca(&boot_paca); /* install the paca into registers */ /* -------- printk is now safe to use ------- */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && (mfmsr() & MSR_HV)) + enable_machine_check(); + /* Try new device tree based feature discovery ... */ if (!dt_cpu_ftrs_init(__va(dt_ptr))) /* Otherwise use the old style CPU table */ @@ -377,8 +394,8 @@ void __init early_setup(unsigned long dt_ptr) /* Poison paca_ptrs[0] again if it's not the boot cpu */ memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0])); } - setup_paca(paca_ptrs[boot_cpuid]); - fixup_boot_paca(); + fixup_boot_paca(paca_ptrs[boot_cpuid]); + setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */ /* * Configure exception handlers. This include setting up trampolines @@ -673,7 +690,7 @@ void __init initialize_cache_info(void) */ __init u64 ppc64_bolted_size(void) { -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* Freescale BookE bolts the entire linear mapping */ /* XXX: BookE ppc64_rma_limit setup seems to disagree? */ if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) @@ -723,7 +740,7 @@ void __init irqstack_early_init(void) } } -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 void __init exc_lvl_early_init(void) { unsigned int i; @@ -825,7 +842,7 @@ void __init setup_per_cpu_areas(void) /* * BookE and BookS radix are historical values and should be revisited. */ - if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { atom_size = SZ_1M; } else if (radix_enabled()) { atom_size = PAGE_SIZE; diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 618aeccdf691..a429c57ed433 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -196,9 +196,6 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #else /* CONFIG_PPC64 */ -extern long sys_rt_sigreturn(void); -extern long sys_sigreturn(void); - static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct task_struct *tsk) { diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 157a7403e3eb..c114c7f25645 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -43,7 +43,7 @@ #include <asm/tm.h> #include <asm/asm-prototypes.h> #ifdef CONFIG_PPC64 -#include "ppc32.h" +#include <asm/syscalls_32.h> #include <asm/unistd.h> #else #include <asm/ucontext.h> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 169703fead57..0da6e59161cd 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -708,7 +708,7 @@ static struct task_struct *current_set[NR_CPUS]; static void smp_store_cpu_info(int id) { per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR); -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 per_cpu(next_tlbcam_idx, id) = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; #endif @@ -1257,7 +1257,12 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - int rc, c; + const unsigned long boot_spin_ms = 5 * MSEC_PER_SEC; + const bool booting = system_state < SYSTEM_RUNNING; + const unsigned long hp_spin_ms = 1; + unsigned long deadline; + int rc; + const unsigned long spin_wait_ms = booting ? boot_spin_ms : hp_spin_ms; /* * Don't allow secondary threads to come online if inhibited @@ -1302,22 +1307,23 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) } /* - * wait to see if the cpu made a callin (is actually up). - * use this value that I found through experimentation. - * -- Cort + * At boot time, simply spin on the callin word until the + * deadline passes. + * + * At run time, spin for an optimistic amount of time to avoid + * sleeping in the common case. */ - if (system_state < SYSTEM_RUNNING) - for (c = 50000; c && !cpu_callin_map[cpu]; c--) - udelay(100); -#ifdef CONFIG_HOTPLUG_CPU - else - /* - * CPUs can take much longer to come up in the - * hotplug case. Wait five seconds. - */ - for (c = 5000; c && !cpu_callin_map[cpu]; c--) - msleep(1); -#endif + deadline = jiffies + msecs_to_jiffies(spin_wait_ms); + spin_until_cond(cpu_callin_map[cpu] || time_is_before_jiffies(deadline)); + + if (!cpu_callin_map[cpu] && system_state >= SYSTEM_RUNNING) { + const unsigned long sleep_interval_us = 10 * USEC_PER_MSEC; + const unsigned long sleep_wait_ms = 100 * MSEC_PER_SEC; + + deadline = jiffies + msecs_to_jiffies(sleep_wait_ms); + while (!cpu_callin_map[cpu] && time_is_after_jiffies(deadline)) + fsleep(sleep_interval_us); + } if (!cpu_callin_map[cpu]) { printk(KERN_ERR "Processor %u is stuck.\n", cpu); diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_85xx.S index 88cfdbd530f1..88cfdbd530f1 100644 --- a/arch/powerpc/kernel/swsusp_booke.S +++ b/arch/powerpc/kernel/swsusp_85xx.S diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 9f1903c7f540..f645652c2654 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -76,16 +76,10 @@ swsusp_save_area: .space SL_SIZE - .section ".toc","aw" -swsusp_save_area_ptr: - .tc swsusp_save_area[TC],swsusp_save_area -restore_pblist_ptr: - .tc restore_pblist[TC],restore_pblist - .section .text .align 5 _GLOBAL(swsusp_arch_suspend) - ld r11,swsusp_save_area_ptr@toc(r2) + LOAD_REG_ADDR(r11, swsusp_save_area) SAVE_SPECIAL(LR) SAVE_REGISTER(r1) SAVE_SPECIAL(CR) @@ -131,7 +125,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) bl swsusp_save /* restore LR */ - ld r11,swsusp_save_area_ptr@toc(r2) + LOAD_REG_ADDR(r11, swsusp_save_area) RESTORE_SPECIAL(LR) addi r1,r1,128 @@ -145,7 +139,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync - ld r12,restore_pblist_ptr@toc(r2) + LOAD_REG_ADDR(r11, restore_pblist) ld r12,0(r12) cmpdi r12,0 @@ -187,7 +181,7 @@ nothing_to_copy: tlbia #endif - ld r11,swsusp_save_area_ptr@toc(r2) + LOAD_REG_ADDR(r11, swsusp_save_area) RESTORE_SPECIAL(CR) @@ -265,7 +259,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) bl do_after_copyback addi r1,r1,128 - ld r11,swsusp_save_area_ptr@toc(r2) + LOAD_REG_ADDR(r11, swsusp_save_area) RESTORE_SPECIAL(LR) li r3, 0 diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 16ff0399a257..1ab4a4d95aba 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -1,13 +1,23 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * sys_ppc32.c: Conversion between 32bit and 64bit native syscalls. + * sys_ppc32.c: 32-bit system calls with complex calling conventions. * * Copyright (C) 2001 IBM * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) * - * These routines maintain argument size conversion between 32bit and 64bit - * environment. + * 32-bit system calls with 64-bit arguments pass those in register pairs. + * This must be specially dealt with on 64-bit kernels. The compat_arg_u64_dual + * in generic compat syscalls is not always usable because the register + * pairing is constrained depending on preceding arguments. + * + * An analogous problem exists on 32-bit kernels with ARCH_HAS_SYSCALL_WRAPPER, + * the defined system call functions take the pt_regs as an argument, and there + * is a mapping macro which maps registers to arguments + * (SC_POWERPC_REGS_TO_ARGS) which also does not deal with these 64-bit + * arguments. + * + * This file contains these system calls. */ #include <linux/kernel.h> @@ -25,7 +35,6 @@ #include <linux/poll.h> #include <linux/personality.h> #include <linux/stat.h> -#include <linux/mman.h> #include <linux/in.h> #include <linux/syscalls.h> #include <linux/unistd.h> @@ -48,72 +57,65 @@ #include <asm/syscalls.h> #include <asm/switch_to.h> -unsigned long compat_sys_mmap2(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - /* This should remain 12 even if PAGE_SIZE changes */ - return sys_mmap(addr, len, prot, flags, fd, pgoff << 12); -} - -/* - * long long munging: - * The 32 bit ABI passes long longs in an odd even register pair. - * High and low parts are swapped depending on endian mode, - * so define a macro (similar to mips linux32) to handle that. - */ -#ifdef __LITTLE_ENDIAN__ -#define merge_64(low, high) ((u64)high << 32) | low +#ifdef CONFIG_PPC32 +#define PPC32_SYSCALL_DEFINE4 SYSCALL_DEFINE4 +#define PPC32_SYSCALL_DEFINE5 SYSCALL_DEFINE5 +#define PPC32_SYSCALL_DEFINE6 SYSCALL_DEFINE6 #else -#define merge_64(high, low) ((u64)high << 32) | low +#define PPC32_SYSCALL_DEFINE4 COMPAT_SYSCALL_DEFINE4 +#define PPC32_SYSCALL_DEFINE5 COMPAT_SYSCALL_DEFINE5 +#define PPC32_SYSCALL_DEFINE6 COMPAT_SYSCALL_DEFINE6 #endif -compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, - u32 reg6, u32 pos1, u32 pos2) +PPC32_SYSCALL_DEFINE6(ppc_pread64, + unsigned int, fd, + char __user *, ubuf, compat_size_t, count, + u32, reg6, u32, pos1, u32, pos2) { return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2)); } -compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, - u32 reg6, u32 pos1, u32 pos2) +PPC32_SYSCALL_DEFINE6(ppc_pwrite64, + unsigned int, fd, + const char __user *, ubuf, compat_size_t, count, + u32, reg6, u32, pos1, u32, pos2) { return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2)); } -compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count) +PPC32_SYSCALL_DEFINE5(ppc_readahead, + int, fd, u32, r4, + u32, offset1, u32, offset2, u32, count) { return ksys_readahead(fd, merge_64(offset1, offset2), count); } -asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4, - unsigned long len1, unsigned long len2) +PPC32_SYSCALL_DEFINE4(ppc_truncate64, + const char __user *, path, u32, reg4, + unsigned long, len1, unsigned long, len2) { return ksys_truncate(path, merge_64(len1, len2)); } -asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, - u32 len1, u32 len2) -{ - return ksys_fallocate(fd, mode, ((loff_t)offset1 << 32) | offset2, - merge_64(len1, len2)); -} - -asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1, - unsigned long len2) +PPC32_SYSCALL_DEFINE4(ppc_ftruncate64, + unsigned int, fd, u32, reg4, + unsigned long, len1, unsigned long, len2) { return ksys_ftruncate(fd, merge_64(len1, len2)); } -long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2, - size_t len, int advice) +PPC32_SYSCALL_DEFINE6(ppc32_fadvise64, + int, fd, u32, unused, u32, offset1, u32, offset2, + size_t, len, int, advice) { return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len, advice); } -asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags, - unsigned offset1, unsigned offset2, - unsigned nbytes1, unsigned nbytes2) +COMPAT_SYSCALL_DEFINE6(ppc_sync_file_range2, + int, fd, unsigned int, flags, + unsigned int, offset1, unsigned int, offset2, + unsigned int, nbytes1, unsigned int, nbytes2) { loff_t offset = merge_64(offset1, offset2); loff_t nbytes = merge_64(nbytes1, nbytes2); diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c index 81ace9e8b72b..18b9d325395f 100644 --- a/arch/powerpc/kernel/syscall.c +++ b/arch/powerpc/kernel/syscall.c @@ -12,12 +12,8 @@ #include <asm/unistd.h> -typedef long (*syscall_fn)(long, long, long, long, long, long); - /* Has to run notrace because it is entered not completely "reconciled" */ -notrace long system_call_exception(long r3, long r4, long r5, - long r6, long r7, long r8, - unsigned long r0, struct pt_regs *regs) +notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) { long ret; syscall_fn f; @@ -25,7 +21,6 @@ notrace long system_call_exception(long r3, long r4, long r5, kuap_lock(); add_random_kstack_offset(); - regs->orig_gpr3 = r3; if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); @@ -139,12 +134,6 @@ notrace long system_call_exception(long r3, long r4, long r5, r0 = do_syscall_trace_enter(regs); if (unlikely(r0 >= NR_syscalls)) return regs->gpr[3]; - r3 = regs->gpr[3]; - r4 = regs->gpr[4]; - r5 = regs->gpr[5]; - r6 = regs->gpr[6]; - r7 = regs->gpr[7]; - r8 = regs->gpr[8]; } else if (unlikely(r0 >= NR_syscalls)) { if (unlikely(trap_is_unsupported_scv(regs))) { @@ -158,21 +147,31 @@ notrace long system_call_exception(long r3, long r4, long r5, /* May be faster to do array_index_nospec? */ barrier_nospec(); +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER + // No COMPAT if we have SYSCALL_WRAPPER, see Kconfig + f = (void *)sys_call_table[r0]; + ret = f(regs); +#else if (unlikely(is_compat_task())) { + unsigned long r3, r4, r5, r6, r7, r8; + f = (void *)compat_sys_call_table[r0]; - r3 &= 0x00000000ffffffffULL; - r4 &= 0x00000000ffffffffULL; - r5 &= 0x00000000ffffffffULL; - r6 &= 0x00000000ffffffffULL; - r7 &= 0x00000000ffffffffULL; - r8 &= 0x00000000ffffffffULL; + r3 = regs->gpr[3] & 0x00000000ffffffffULL; + r4 = regs->gpr[4] & 0x00000000ffffffffULL; + r5 = regs->gpr[5] & 0x00000000ffffffffULL; + r6 = regs->gpr[6] & 0x00000000ffffffffULL; + r7 = regs->gpr[7] & 0x00000000ffffffffULL; + r8 = regs->gpr[8] & 0x00000000ffffffffULL; + ret = f(r3, r4, r5, r6, r7, r8); } else { f = (void *)sys_call_table[r0]; - } - ret = f(r3, r4, r5, r6, r7, r8); + ret = f(regs->gpr[3], regs->gpr[4], regs->gpr[5], + regs->gpr[6], regs->gpr[7], regs->gpr[8]); + } +#endif /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index fc999140bc27..68ebb23a5af4 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -36,9 +36,9 @@ #include <asm/time.h> #include <asm/unistd.h> -static inline long do_mmap2(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long off, int shift) +static long do_mmap2(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long off, int shift) { if (!arch_validate_prot(prot, addr)) return -EINVAL; @@ -56,6 +56,16 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len, return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12); } +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE6(mmap2, + unsigned long, addr, size_t, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, off_4k) +{ + return do_mmap2(addr, len, prot, flags, fd, off_4k, PAGE_SHIFT-12); +} +#endif + SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, off_t, offset) @@ -63,43 +73,39 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len, return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT); } -#ifdef CONFIG_PPC32 -/* - * Due to some executables calling the wrong select we sometimes - * get wrong args. This determines how the args are being passed - * (a single ptr to them all args passed) then calls - * sys_select() with the appropriate args. -- Cort - */ -int -ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp) -{ - if ((unsigned long)n >= 4096) - return sys_old_select((void __user *)n); - - return sys_select(n, inp, outp, exp, tvp); -} -#endif - #ifdef CONFIG_PPC64 -long ppc64_personality(unsigned long personality) +static long do_ppc64_personality(unsigned long personality) { long ret; if (personality(current->personality) == PER_LINUX32 && personality(personality) == PER_LINUX) personality = (personality & ~PER_MASK) | PER_LINUX32; - ret = sys_personality(personality); + ret = ksys_personality(personality); if (personality(ret) == PER_LINUX32) ret = (ret & ~PER_MASK) | PER_LINUX; return ret; } -#endif -long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, - u32 len_high, u32 len_low) +SYSCALL_DEFINE1(ppc64_personality, unsigned long, personality) +{ + return do_ppc64_personality(personality); +} + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE1(ppc64_personality, unsigned long, personality) +{ + return do_ppc64_personality(personality); +} +#endif /* CONFIG_COMPAT */ +#endif /* CONFIG_PPC64 */ + +SYSCALL_DEFINE6(ppc_fadvise64_64, + int, fd, int, advice, u32, offset_high, u32, offset_low, + u32, len_high, u32, len_low) { - return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low, - (u64)len_high << 32 | len_low, advice); + return ksys_fadvise64_64(fd, merge_64(offset_high, offset_low), + merge_64(len_high, len_low), advice); } SYSCALL_DEFINE0(switch_endian) diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 2600b4237292..e9e0df4f9a61 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -110,7 +110,7 @@ 79 common settimeofday sys_settimeofday compat_sys_settimeofday 80 common getgroups sys_getgroups 81 common setgroups sys_setgroups -82 32 select ppc_select sys_ni_syscall +82 32 select sys_old_select compat_sys_old_select 82 64 select sys_ni_syscall 82 spu select sys_ni_syscall 83 common symlink sys_symlink @@ -178,9 +178,9 @@ 133 common fchdir sys_fchdir 134 common bdflush sys_ni_syscall 135 common sysfs sys_sysfs -136 32 personality sys_personality ppc64_personality -136 64 personality ppc64_personality -136 spu personality ppc64_personality +136 32 personality sys_personality compat_sys_ppc64_personality +136 64 personality sys_ppc64_personality +136 spu personality sys_ppc64_personality 137 common afs_syscall sys_ni_syscall 138 common setfsuid sys_setfsuid 139 common setfsgid sys_setfsgid @@ -228,8 +228,10 @@ 176 64 rt_sigtimedwait sys_rt_sigtimedwait 177 nospu rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend -179 common pread64 sys_pread64 compat_sys_pread64 -180 common pwrite64 sys_pwrite64 compat_sys_pwrite64 +179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 +179 64 pread64 sys_pread64 +180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 +180 64 pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -242,10 +244,11 @@ 188 common putpmsg sys_ni_syscall 189 nospu vfork sys_vfork 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit -191 common readahead sys_readahead compat_sys_readahead +191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead +191 64 readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 -193 32 truncate64 sys_truncate64 compat_sys_truncate64 -194 32 ftruncate64 sys_ftruncate64 compat_sys_ftruncate64 +193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 +194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 195 32 stat64 sys_stat64 196 32 lstat64 sys_lstat64 197 32 fstat64 sys_fstat64 @@ -288,7 +291,8 @@ 230 common io_submit sys_io_submit compat_sys_io_submit 231 common io_cancel sys_io_cancel 232 nospu set_tid_address sys_set_tid_address -233 common fadvise64 sys_fadvise64 ppc32_fadvise64 +233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 +233 64 fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group 235 nospu lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie 236 common epoll_create sys_epoll_create @@ -323,7 +327,7 @@ 251 spu utimes sys_utimes 252 common statfs64 sys_statfs64 compat_sys_statfs64 253 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 -254 32 fadvise64_64 ppc_fadvise64_64 +254 32 fadvise64_64 sys_ppc_fadvise64_64 254 spu fadvise64_64 sys_ni_syscall 255 common rtas sys_rtas 256 32 sys_debug_setcontext sys_debug_setcontext sys_ni_syscall @@ -390,7 +394,7 @@ 305 common signalfd sys_signalfd compat_sys_signalfd 306 common timerfd_create sys_timerfd_create 307 common eventfd sys_eventfd -308 common sync_file_range2 sys_sync_file_range2 compat_sys_sync_file_range2 +308 common sync_file_range2 sys_sync_file_range2 compat_sys_ppc_sync_file_range2 309 nospu fallocate sys_fallocate compat_sys_fallocate 310 nospu subpage_prot sys_subpage_prot 311 32 timerfd_settime sys_timerfd_settime32 diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 3a10cda9c05e..ef9a61718940 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -228,7 +228,7 @@ static void __init sysfs_create_dscr_default(void) } #endif /* CONFIG_PPC64 */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 #define MAX_BIT 63 static u64 pw20_wt; @@ -907,7 +907,7 @@ static int register_cpu_online(unsigned int cpu) device_create_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) { device_create_file(s, &dev_attr_pw20_state); device_create_file(s, &dev_attr_pw20_wait_time); @@ -1003,7 +1003,7 @@ static int unregister_cpu_online(unsigned int cpu) device_remove_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) { device_remove_file(s, &dev_attr_pw20_state); device_remove_file(s, &dev_attr_pw20_wait_time); diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.c index 6c1db3b6de2d..4305f2a2162f 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.c @@ -10,32 +10,37 @@ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) */ -#include <asm/ppc_asm.h> +#include <linux/syscalls.h> +#include <linux/compat.h> +#include <asm/unistd.h> +#include <asm/syscalls.h> -.section .rodata,"a" +#undef __SYSCALL_WITH_COMPAT +#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry) -#ifdef CONFIG_PPC64 - .p2align 3 -#define __SYSCALL(nr, entry) .8byte entry +#undef __SYSCALL +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER +#define __SYSCALL(nr, entry) [nr] = entry, #else - .p2align 2 -#define __SYSCALL(nr, entry) .long entry +/* + * Coerce syscall handlers with arbitrary parameters to common type + * requires cast to void* to avoid -Wcast-function-type. + */ +#define __SYSCALL(nr, entry) [nr] = (void *) entry, #endif -#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) -.globl sys_call_table -sys_call_table: +const syscall_fn sys_call_table[] = { #ifdef CONFIG_PPC64 #include <asm/syscall_table_64.h> #else #include <asm/syscall_table_32.h> #endif +}; #ifdef CONFIG_COMPAT #undef __SYSCALL_WITH_COMPAT #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat) -.globl compat_sys_call_table -compat_sys_call_table: -#define compat_sys_sigsuspend sys_sigsuspend +const syscall_fn compat_sys_call_table[] = { #include <asm/syscall_table_32.h> -#endif +}; +#endif /* CONFIG_COMPAT */ diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh deleted file mode 100644 index c7ac3ed657c4..000000000000 --- a/arch/powerpc/kernel/systbl_chk.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0-or-later -# -# Just process the CPP output from systbl_chk.c and complain -# if anything is out of order. -# -# Copyright © 2008 IBM Corporation -# - -awk 'BEGIN { num = -1; } # Ignore the beginning of the file - /^#/ { next; } - /^[ \t]*$/ { next; } - /^START_TABLE/ { num = 0; next; } - /^END_TABLE/ { - if (num != $2) { - printf "Error: NR_syscalls (%s) is not one more than the last syscall (%s)\n", - $2, num - 1; - exit(1); - } - num = -1; # Ignore the rest of the file - } - { - if (num == -1) next; - if (($1 != -1) && ($1 != num)) { - printf "Error: Syscall %s out of order (expected %s)\n", - $1, num; - exit(1); - }; - num++; - }' "$1" diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 587adcc12860..a2ab397065c6 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -178,92 +178,6 @@ static inline unsigned long read_spurr(unsigned long tb) return tb; } -#ifdef CONFIG_PPC_SPLPAR - -#include <asm/dtl.h> - -void (*dtl_consumer)(struct dtl_entry *, u64); - -/* - * Scan the dispatch trace log and count up the stolen time. - * Should be called with interrupts disabled. - */ -static u64 scan_dispatch_log(u64 stop_tb) -{ - u64 i = local_paca->dtl_ridx; - struct dtl_entry *dtl = local_paca->dtl_curr; - struct dtl_entry *dtl_end = local_paca->dispatch_log_end; - struct lppaca *vpa = local_paca->lppaca_ptr; - u64 tb_delta; - u64 stolen = 0; - u64 dtb; - - if (!dtl) - return 0; - - if (i == be64_to_cpu(vpa->dtl_idx)) - return 0; - while (i < be64_to_cpu(vpa->dtl_idx)) { - dtb = be64_to_cpu(dtl->timebase); - tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + - be32_to_cpu(dtl->ready_to_enqueue_time); - barrier(); - if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { - /* buffer has overflowed */ - i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; - dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); - continue; - } - if (dtb > stop_tb) - break; - if (dtl_consumer) - dtl_consumer(dtl, i); - stolen += tb_delta; - ++i; - ++dtl; - if (dtl == dtl_end) - dtl = local_paca->dispatch_log; - } - local_paca->dtl_ridx = i; - local_paca->dtl_curr = dtl; - return stolen; -} - -/* - * Accumulate stolen time by scanning the dispatch trace log. - * Called on entry from user mode. - */ -void notrace accumulate_stolen_time(void) -{ - u64 sst, ust; - struct cpu_accounting_data *acct = &local_paca->accounting; - - sst = scan_dispatch_log(acct->starttime_user); - ust = scan_dispatch_log(acct->starttime); - acct->stime -= sst; - acct->utime -= ust; - acct->steal_time += ust + sst; -} - -static inline u64 calculate_stolen_time(u64 stop_tb) -{ - if (!firmware_has_feature(FW_FEATURE_SPLPAR)) - return 0; - - if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) - return scan_dispatch_log(stop_tb); - - return 0; -} - -#else /* CONFIG_PPC_SPLPAR */ -static inline u64 calculate_stolen_time(u64 stop_tb) -{ - return 0; -} - -#endif /* CONFIG_PPC_SPLPAR */ - /* * Account time for a transition between system, hard irq * or soft irq state. @@ -322,7 +236,11 @@ static unsigned long vtime_delta(struct cpu_accounting_data *acct, *stime_scaled = vtime_delta_scaled(acct, now, stime); - *steal_time = calculate_stolen_time(now); + if (IS_ENABLED(CONFIG_PPC_SPLPAR) && + firmware_has_feature(FW_FEATURE_SPLPAR)) + *steal_time = pseries_calculate_stolen_time(now); + else + *steal_time = 0; return stime; } @@ -614,22 +532,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) return; } - /* - * Ensure a positive value is written to the decrementer, or - * else some CPUs will continue to take decrementer exceptions. - * When the PPC_WATCHDOG (decrementer based) is configured, - * keep this at most 31 bits, which is about 4 seconds on most - * systems, which gives the watchdog a chance of catching timer - * interrupt hard lockups. - */ - if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) - set_dec(0x7fffffff); - else - set_dec(decrementer_max); - /* Conditionally hard-enable interrupts. */ - if (should_hard_irq_enable()) + if (should_hard_irq_enable()) { + /* + * Ensure a positive value is written to the decrementer, or + * else some CPUs will continue to take decrementer exceptions. + * When the PPC_WATCHDOG (decrementer based) is configured, + * keep this at most 31 bits, which is about 4 seconds on most + * systems, which gives the watchdog a chance of catching timer + * interrupt hard lockups. + */ + if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) + set_dec(0x7fffffff); + else + set_dec(decrementer_max); + do_hard_irq_enable(); + } #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) diff --git a/arch/powerpc/kernel/trace/ftrace_low.S b/arch/powerpc/kernel/trace/ftrace_low.S index 0bddf1fa6636..294d1e05958a 100644 --- a/arch/powerpc/kernel/trace/ftrace_low.S +++ b/arch/powerpc/kernel/trace/ftrace_low.S @@ -48,7 +48,7 @@ _GLOBAL(return_to_handler) * We might be called from a module. * Switch to our TOC to run inside the core kernel. */ - ld r2, PACATOC(r13) + LOAD_PACA_TOC() #else stwu r1, -16(r1) stw r3, 8(r1) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 4fa23e260cab..d031093bc436 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -83,10 +83,8 @@ #ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ std r2, STK_GOT(r1) - ld r2,PACATOC(r13) /* get kernel TOC in r2 */ - - addis r3,r2,function_trace_op@toc@ha - addi r3,r3,function_trace_op@toc@l + LOAD_PACA_TOC() /* get kernel TOC in r2 */ + LOAD_REG_ADDR(r3, function_trace_op) ld r5,0(r3) #else lis r3,function_trace_op@ha diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index dadfcef5d6db..9bdd79aa51cf 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -68,6 +68,7 @@ #include <asm/stacktrace.h> #include <asm/nmi.h> #include <asm/disassemble.h> +#include <asm/udbg.h> #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -600,7 +601,7 @@ static inline int check_io_access(struct pt_regs *regs) #define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4) -#if defined(CONFIG_E500) +#if defined(CONFIG_PPC_E500) int machine_check_e500mc(struct pt_regs *regs) { unsigned long mcsr = mfspr(SPRN_MCSR); @@ -850,6 +851,19 @@ bail: } #ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_INTERRUPT_HANDLER_RAW(machine_check_early_boot) +{ + udbg_printf("Machine check (early boot)\n"); + udbg_printf("SRR0=0x%016lx SRR1=0x%016lx\n", regs->nip, regs->msr); + udbg_printf(" DAR=0x%016lx DSISR=0x%08lx\n", regs->dar, regs->dsisr); + udbg_printf(" LR=0x%016lx R1=0x%08lx\n", regs->link, regs->gpr[1]); + udbg_printf("------\n"); + die("Machine check (early boot)", regs, SIGBUS); + for (;;) + ; + return 0; +} + DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async) { __machine_check_exception(regs); @@ -2085,7 +2099,7 @@ DEFINE_INTERRUPT_HANDLER(altivec_assist_exception) } #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_85xx DEFINE_INTERRUPT_HANDLER(CacheLockingException) { unsigned long error_code = regs->dsisr; @@ -2098,12 +2112,11 @@ DEFINE_INTERRUPT_HANDLER(CacheLockingException) _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); return; } -#endif /* CONFIG_FSL_BOOKE */ +#endif /* CONFIG_PPC_85xx */ #ifdef CONFIG_SPE DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException) { - extern int do_spe_mathemu(struct pt_regs *regs); unsigned long spefscr; int fpexc_mode; int code = FPE_FLTUNK; @@ -2153,7 +2166,6 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException) DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) { - extern int speround_handler(struct pt_regs *regs); int err; interrupt_cond_local_irq_enable(regs); diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index b1544b2f6321..92b3fc258d11 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -67,6 +67,8 @@ void __init udbg_early_init(void) udbg_init_debug_opal_raw(); #elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI) udbg_init_debug_opal_hvsi(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_16550) + udbg_init_debug_16550(); #endif #ifdef CONFIG_PPC_EARLY_DEBUG diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index d3942de254c6..74ddf836f7a2 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -8,6 +8,7 @@ #include <asm/udbg.h> #include <asm/io.h> #include <asm/reg_a2.h> +#include <asm/early_ioremap.h> extern u8 real_readb(volatile u8 __iomem *addr); extern void real_writeb(u8 data, volatile u8 __iomem *addr); @@ -297,41 +298,34 @@ void __init udbg_init_40x_realmode(void) #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ -#ifdef CONFIG_PPC_EARLY_DEBUG_MICROWATT +#ifdef CONFIG_PPC_EARLY_DEBUG_16550 -#define UDBG_UART_MW_ADDR ((void __iomem *)0xc0002000) +static void __iomem *udbg_uart_early_addr; -static u8 udbg_uart_in_isa300_rm(unsigned int reg) +void __init udbg_init_debug_16550(void) { - uint64_t msr = mfmsr(); - uint8_t c; - - mtmsr(msr & ~(MSR_EE|MSR_DR)); - isync(); - eieio(); - c = __raw_rm_readb(UDBG_UART_MW_ADDR + (reg << 2)); - mtmsr(msr); - isync(); - return c; + udbg_uart_early_addr = early_ioremap(CONFIG_PPC_EARLY_DEBUG_16550_PHYSADDR, 0x1000); + udbg_uart_init_mmio(udbg_uart_early_addr, CONFIG_PPC_EARLY_DEBUG_16550_STRIDE); } -static void udbg_uart_out_isa300_rm(unsigned int reg, u8 val) +static int __init udbg_init_debug_16550_ioremap(void) { - uint64_t msr = mfmsr(); - - mtmsr(msr & ~(MSR_EE|MSR_DR)); - isync(); - eieio(); - __raw_rm_writeb(val, UDBG_UART_MW_ADDR + (reg << 2)); - mtmsr(msr); - isync(); -} + void __iomem *addr; -void __init udbg_init_debug_microwatt(void) -{ - udbg_uart_in = udbg_uart_in_isa300_rm; - udbg_uart_out = udbg_uart_out_isa300_rm; - udbg_use_uart(); + if (!udbg_uart_early_addr) + return 0; + + addr = ioremap(CONFIG_PPC_EARLY_DEBUG_16550_PHYSADDR, 0x1000); + if (WARN_ON(!addr)) + return -ENOMEM; + + udbg_uart_init_mmio(addr, CONFIG_PPC_EARLY_DEBUG_16550_STRIDE); + early_iounmap(udbg_uart_early_addr, 0x1000); + udbg_uart_early_addr = NULL; + + return 0; } -#endif /* CONFIG_PPC_EARLY_DEBUG_MICROWATT */ +early_initcall(udbg_init_debug_16550_ioremap); + +#endif /* CONFIG_PPC_EARLY_DEBUG_16550 */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 0da287544054..4abc01949702 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -39,6 +39,8 @@ extern char vdso32_start, vdso32_end; extern char vdso64_start, vdso64_end; +long sys_ni_syscall(void); + /* * The vdso data page (aka. systemcfg for old ppc64 fans) is here. * Once the early boot kernel code no longer needs to muck around @@ -113,18 +115,18 @@ struct vdso_data *arch_get_vdso_data(void *vvar_page) int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) { struct mm_struct *mm = task->mm; + VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *vma; mmap_read_lock(mm); - - for (vma = mm->mmap; vma; vma = vma->vm_next) { + for_each_vma(vmi, vma) { unsigned long size = vma->vm_end - vma->vm_start; if (vma_is_special_mapping(vma, &vvar_spec)) zap_page_range(vma, vma->vm_start, size); } - mmap_read_unlock(mm); + return 0; } @@ -200,28 +202,19 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int if (is_32bit_task()) { vdso_spec = &vdso32_spec; vdso_size = &vdso32_end - &vdso32_start; - vdso_base = VDSO32_MBASE; } else { vdso_spec = &vdso64_spec; vdso_size = &vdso64_end - &vdso64_start; - /* - * On 64bit we don't have a preferred map address. This - * allows get_unmapped_area to find an area near other mmaps - * and most likely share a SLB entry. - */ - vdso_base = 0; } mappings_size = vdso_size + vvar_size; mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK; /* - * pick a base address for the vDSO in process space. We try to put it - * at vdso_base which is the "natural" base for it, but we might fail - * and end up putting it elsewhere. + * Pick a base address for the vDSO in process space. * Add enough to the size so that the result can be aligned. */ - vdso_base = get_unmapped_area(NULL, vdso_base, mappings_size, 0, 0); + vdso_base = get_unmapped_area(NULL, 0, mappings_size, 0, 0); if (IS_ERR_VALUE(vdso_base)) return vdso_base; @@ -313,10 +306,10 @@ static void __init vdso_setup_syscall_map(void) unsigned int i; for (i = 0; i < NR_syscalls; i++) { - if (sys_call_table[i] != (unsigned long)&sys_ni_syscall) + if (sys_call_table[i] != (void *)&sys_ni_syscall) vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); if (IS_ENABLED(CONFIG_COMPAT) && - compat_sys_call_table[i] != (unsigned long)&sys_ni_syscall) + compat_sys_call_table[i] != (void *)&sys_ni_syscall) vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); } } diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 096b0bf1335f..a2e7b0ce5b19 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -92,13 +92,13 @@ include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE # actual build commands quiet_cmd_vdso32ld_and_check = VDSO32L $@ - cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) + cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) -z noexecstack ; $(cmd_vdso_check) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< quiet_cmd_vdso64ld_and_check = VDSO64L $@ - cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) + cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) -z noexecstack ; $(cmd_vdso_check) quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S index e0d19d74455f..bc0be274a9ac 100644 --- a/arch/powerpc/kernel/vdso/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso/vdso32.lds.S @@ -78,7 +78,6 @@ SECTIONS __end = .; PROVIDE(end = .); - STABS_DEBUG DWARF_DEBUG ELF_DETAILS diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S index 1a4a7bc4c815..744ae5363e6c 100644 --- a/arch/powerpc/kernel/vdso/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S @@ -76,7 +76,6 @@ SECTIONS _end = .; PROVIDE(end = .); - STABS_DEBUG DWARF_DEBUG ELF_DETAILS diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 5cc24d8cce94..5cf64740edb8 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -155,8 +155,8 @@ _GLOBAL(load_up_vsx) * usage of floating-point registers. These routines must be called * with preempt disabled. */ -#ifdef CONFIG_PPC32 .data +#ifdef CONFIG_PPC32 fpzero: .long 0 fpone: @@ -169,18 +169,17 @@ fphalf: lfs fr,name@l(r11) #else - .section ".toc","aw" fpzero: - .tc FD_0_0[TC],0 + .quad 0 fpone: - .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ + .quad 0x3ff0000000000000 /* 1.0 */ fphalf: - .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ + .quad 0x3fe0000000000000 /* 0.5 */ -#define LDCONST(fr, name) \ - lfd fr,name@toc(r2) +#define LDCONST(fr, name) \ + addis r11,r2,name@toc@ha; \ + lfd fr,name@toc@l(r11) #endif - .text /* * Internal routine to enable floating point and set FPSCR to 0. diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index fe22d940412f..7786e3ac7611 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -32,6 +32,10 @@ #define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT) +#if STRICT_ALIGN_SIZE < PAGE_SIZE +#error "CONFIG_DATA_SHIFT must be >= PAGE_SHIFT" +#endif + ENTRY(_stext) PHDRS { @@ -67,7 +71,7 @@ SECTIONS .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { #ifdef CONFIG_PPC64 KEEP(*(.head.text.first_256B)); -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 #else KEEP(*(.head.text.real_vectors)); *(.head.text.real_trampolines); @@ -122,14 +126,6 @@ SECTIONS *(.sfpr); MEM_KEEP(init.text) MEM_KEEP(exit.text) - -#ifdef CONFIG_PPC32 - *(.got1) - __got2_start = .; - *(.got2) - __got2_end = .; -#endif /* CONFIG_PPC32 */ - } :text . = ALIGN(PAGE_SIZE); @@ -139,15 +135,57 @@ SECTIONS /* Read-only data */ RO_DATA(PAGE_SIZE) -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC32 + .sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) { + *(.sdata2) + } +#endif + + .data.rel.ro : AT(ADDR(.data.rel.ro) - LOAD_OFFSET) { + *(.data.rel.ro*) + } + + .branch_lt : AT(ADDR(.branch_lt) - LOAD_OFFSET) { + *(.branch_lt) + } + +#ifdef CONFIG_PPC32 + .got1 : AT(ADDR(.got1) - LOAD_OFFSET) { + *(.got1) + } + .got2 : AT(ADDR(.got2) - LOAD_OFFSET) { + __got2_start = .; + *(.got2) + __got2_end = .; + } + .got : AT(ADDR(.got) - LOAD_OFFSET) { + *(.got) + *(.got.plt) + } + .plt : AT(ADDR(.plt) - LOAD_OFFSET) { + /* XXX: is .plt (and .got.plt) required? */ + *(.plt) + } + +#else /* CONFIG_PPC32 */ + .toc1 : AT(ADDR(.toc1) - LOAD_OFFSET) { + *(.toc1) + } + + .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) { + *(.got .toc) + } + SOFT_MASK_TABLE(8) RESTART_TABLE(8) +#ifdef CONFIG_PPC64_ELF_ABI_V1 .opd : AT(ADDR(.opd) - LOAD_OFFSET) { __start_opd = .; KEEP(*(.opd)) __end_opd = .; } +#endif . = ALIGN(8); __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) { @@ -190,7 +228,7 @@ SECTIONS *(__rfi_flush_fixup) __stop___rfi_flush_fixup = .; } -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC_BARRIER_NOSPEC . = ALIGN(8); @@ -201,7 +239,7 @@ SECTIONS } #endif /* CONFIG_PPC_BARRIER_NOSPEC */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 . = ALIGN(8); __spec_btb_flush_fixup : AT(ADDR(__spec_btb_flush_fixup) - LOAD_OFFSET) { __start__btb_flush_fixup = .; @@ -210,12 +248,17 @@ SECTIONS } #endif + /* + * Various code relies on __init_begin being at the strict RWX boundary. + */ + . = ALIGN(STRICT_ALIGN_SIZE); + __srwx_boundary = .; + __end_rodata = .; + __init_begin = .; + /* * Init sections discarded at runtime */ - . = ALIGN(STRICT_ALIGN_SIZE); - __init_begin = .; - . = ALIGN(PAGE_SIZE); .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT @@ -317,34 +360,13 @@ SECTIONS . = ALIGN(PAGE_SIZE); _sdata = .; -#ifdef CONFIG_PPC32 .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA *(.data.rel*) +#ifdef CONFIG_PPC32 *(SDATA_MAIN) - *(.sdata2) - *(.got.plt) *(.got) - *(.plt) - *(.branch_lt) - } -#else - .data : AT(ADDR(.data) - LOAD_OFFSET) { - DATA_DATA - *(.data.rel*) - *(.toc1) - *(.branch_lt) - } - - .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) { - *(.got) -#ifndef CONFIG_RELOCATABLE - __prom_init_toc_start = .; - arch/powerpc/kernel/prom_init.o*(.toc) - __prom_init_toc_end = .; #endif - *(.toc) } -#endif /* The initial task and kernel stack */ INIT_TASK_DATA_SECTION(THREAD_ALIGN) @@ -382,7 +404,6 @@ SECTIONS _end = . ; PROVIDE32 (end = .); - STABS_DEBUG DWARF_DEBUG ELF_DETAILS diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index cf84bfe9e27e..de64c7962991 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -136,7 +136,7 @@ void __init reserve_crashkernel(void) #ifdef CONFIG_PPC64 /* * On the LPAR platform place the crash kernel to mid of - * RMA size (512MB or more) to ensure the crash kernel + * RMA size (max. of 512MB) to ensure the crash kernel * gets enough space to place itself and some stack to be * in the first segment. At the same time normal kernel * also get enough space to allocate memory for essential @@ -144,9 +144,9 @@ void __init reserve_crashkernel(void) * kernel starts at 128MB offset on other platforms. */ if (firmware_has_feature(FW_FEATURE_LPAR)) - crashk_res.start = ppc64_rma_size / 2; + crashk_res.start = min_t(u64, ppc64_rma_size / 2, SZ_512M); else - crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); + crashk_res.start = min_t(u64, ppc64_rma_size / 2, SZ_128M); #else crashk_res.start = KDUMP_KERNELBASE; #endif diff --git a/arch/powerpc/kexec/core_32.c b/arch/powerpc/kexec/core_32.c index b50aed48d09d..c95f96850c9e 100644 --- a/arch/powerpc/kexec/core_32.c +++ b/arch/powerpc/kexec/core_32.c @@ -55,7 +55,7 @@ void default_machine_kexec(struct kimage *image) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); printk(KERN_INFO "Bye!\n"); - if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x)) + if (!IS_ENABLED(CONFIG_PPC_85xx) && !IS_ENABLED(CONFIG_44x)) relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start); /* now call it */ diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index c2bea9db1c1e..a79e28c91e2b 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -360,7 +360,7 @@ void default_machine_kexec(struct kimage *image) * the RMA. On BookE there is no real MMU off mode, so we have to * keep it enabled as well (but then we have bolted TLB entries). */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 copy_with_mmu_off = false; #else copy_with_mmu_off = radix_enabled() || diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S index cf6e52bdf8d8..d9f0dd9b34ff 100644 --- a/arch/powerpc/kexec/relocate_32.S +++ b/arch/powerpc/kexec/relocate_32.S @@ -25,14 +25,14 @@ relocate_new_kernel: /* r4 = reboot_code_buffer */ /* r5 = start_address */ -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_85xx mr r29, r3 mr r30, r4 mr r31, r5 #define ENTRY_MAPPING_KEXEC_SETUP -#include <kernel/fsl_booke_entry_mapping.S> +#include <kernel/85xx_entry_mapping.S> #undef ENTRY_MAPPING_KEXEC_SETUP mr r3, r29 diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index dcb398d5e009..61cdd782d3c5 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -189,7 +189,7 @@ config KVM_EXIT_TIMING config KVM_E500V2 bool "KVM support for PowerPC E500v2 processors" - depends on E500 && !PPC_E500MC + depends on PPC_E500 && !PPC_E500MC select KVM select KVM_MMIO select MMU_NOTIFIER @@ -220,7 +220,7 @@ config KVM_E500MC config KVM_MPIC bool "KVM in-kernel MPIC emulation" - depends on KVM && E500 + depends on KVM && PPC_E500 select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S index e43704547a1e..6c2b1d17cb63 100644 --- a/arch/powerpc/kvm/book3s_64_entry.S +++ b/arch/powerpc/kvm/book3s_64_entry.S @@ -315,7 +315,7 @@ kvmppc_p9_exit_interrupt: reg = reg + 1 .endr - ld r2,PACATOC(r13) + LOAD_PACA_TOC() mflr r4 std r4,VCPU_LR(r3) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 57d0835e56fd..6ba68dd6190b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -249,6 +249,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) /* * We use the vcpu_load/put functions to measure stolen time. + * * Stolen time is counted as time when either the vcpu is able to * run as part of a virtual core, but the task running the vcore * is preempted or sleeping, or when the vcpu needs something done @@ -278,6 +279,12 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) * lock. The stolen times are measured in units of timebase ticks. * (Note that the != TB_NIL checks below are purely defensive; * they should never fail.) + * + * The POWER9 path is simpler, one vcpu per virtual core so the + * former case does not exist. If a vcpu is preempted when it is + * BUSY_IN_HOST and not ceded or otherwise blocked, then accumulate + * the stolen cycles in busy_stolen. RUNNING is not a preemptible + * state in the P9 path. */ static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb) @@ -311,8 +318,14 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) unsigned long flags; u64 now; - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (vcpu->arch.busy_preempt != TB_NIL) { + WARN_ON_ONCE(vcpu->arch.state != KVMPPC_VCPU_BUSY_IN_HOST); + vc->stolen_tb += mftb() - vcpu->arch.busy_preempt; + vcpu->arch.busy_preempt = TB_NIL; + } return; + } now = mftb(); @@ -340,8 +353,21 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) unsigned long flags; u64 now; - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + /* + * In the P9 path, RUNNABLE is not preemptible + * (nor takes host interrupts) + */ + WARN_ON_ONCE(vcpu->arch.state == KVMPPC_VCPU_RUNNABLE); + /* + * Account stolen time when preempted while the vcpu task is + * running in the kernel (but not in qemu, which is INACTIVE). + */ + if (task_is_running(current) && + vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) + vcpu->arch.busy_preempt = mftb(); return; + } now = mftb(); @@ -707,16 +733,15 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) } static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, + struct lppaca *vpa, unsigned int pcpu, u64 now, unsigned long stolen) { struct dtl_entry *dt; - struct lppaca *vpa; dt = vcpu->arch.dtl_ptr; - vpa = vcpu->arch.vpa.pinned_addr; - if (!dt || !vpa) + if (!dt) return; dt->dispatch_reason = 7; @@ -737,17 +762,23 @@ static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, /* order writing *dt vs. writing vpa->dtl_idx */ smp_wmb(); vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index); - vcpu->arch.dtl.dirty = true; + + /* vcpu->arch.dtl.dirty is set by the caller */ } -static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, - struct kvmppc_vcore *vc) +static void kvmppc_update_vpa_dispatch(struct kvm_vcpu *vcpu, + struct kvmppc_vcore *vc) { + struct lppaca *vpa; unsigned long stolen; unsigned long core_stolen; u64 now; unsigned long flags; + vpa = vcpu->arch.vpa.pinned_addr; + if (!vpa) + return; + now = mftb(); core_stolen = vcore_stolen_time(vc, now); @@ -758,7 +789,34 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, vcpu->arch.busy_stolen = 0; spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); - __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen); + vpa->enqueue_dispatch_tb = cpu_to_be64(be64_to_cpu(vpa->enqueue_dispatch_tb) + stolen); + + __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now + vc->tb_offset, stolen); + + vcpu->arch.vpa.dirty = true; +} + +static void kvmppc_update_vpa_dispatch_p9(struct kvm_vcpu *vcpu, + struct kvmppc_vcore *vc, + u64 now) +{ + struct lppaca *vpa; + unsigned long stolen; + unsigned long stolen_delta; + + vpa = vcpu->arch.vpa.pinned_addr; + if (!vpa) + return; + + stolen = vc->stolen_tb; + stolen_delta = stolen - vcpu->arch.stolen_logged; + vcpu->arch.stolen_logged = stolen; + + vpa->enqueue_dispatch_tb = cpu_to_be64(stolen); + + __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now, stolen_delta); + + vcpu->arch.vpa.dirty = true; } /* See if there is a doorbell interrupt pending for a vcpu */ @@ -2517,10 +2575,24 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); break; case KVM_REG_PPC_TB_OFFSET: + { /* round up to multiple of 2^24 */ - vcpu->arch.vcore->tb_offset = - ALIGN(set_reg_val(id, *val), 1UL << 24); + u64 tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24); + + /* + * Now that we know the timebase offset, update the + * decrementer expiry with a guest timebase value. If + * the userspace does not set DEC_EXPIRY, this ensures + * a migrated vcpu at least starts with an expired + * decrementer, which is better than a large one that + * causes a hang. + */ + if (!vcpu->arch.dec_expires && tb_offset) + vcpu->arch.dec_expires = get_tb() + tb_offset; + + vcpu->arch.vcore->tb_offset = tb_offset; break; + } case KVM_REG_PPC_LPCR: kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true); break; @@ -3800,7 +3872,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * kvmppc_core_prepare_to_enter. */ kvmppc_start_thread(vcpu, pvc); - kvmppc_create_dtl_entry(vcpu, pvc); + kvmppc_update_vpa_dispatch(vcpu, pvc); trace_kvm_guest_enter(vcpu); if (!vcpu->arch.ptid) thr0_done = true; @@ -3840,23 +3912,17 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) for (sub = 0; sub < core_info.n_subcores; ++sub) spin_unlock(&core_info.vc[sub]->lock); - guest_enter_irqoff(); + guest_timing_enter_irqoff(); srcu_idx = srcu_read_lock(&vc->kvm->srcu); + guest_state_enter_irqoff(); this_cpu_disable_ftrace(); - /* - * Interrupts will be enabled once we get into the guest, - * so tell lockdep that we're about to enable interrupts. - */ - trace_hardirqs_on(); - trap = __kvmppc_vcore_entry(); - trace_hardirqs_off(); - this_cpu_enable_ftrace(); + guest_state_exit_irqoff(); srcu_read_unlock(&vc->kvm->srcu, srcu_idx); @@ -3891,11 +3957,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_set_host_core(pcpu); - context_tracking_guest_exit(); if (!vtime_accounting_enabled_this_cpu()) { local_irq_enable(); /* - * Service IRQs here before vtime_account_guest_exit() so any + * Service IRQs here before guest_timing_exit_irqoff() so any * ticks that occurred while running the guest are accounted to * the guest. If vtime accounting is enabled, accounting uses * TB rather than ticks, so it can be done without enabling @@ -3904,7 +3969,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ local_irq_disable(); } - vtime_account_guest_exit(); + guest_timing_exit_irqoff(); local_irq_enable(); @@ -4404,7 +4469,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) if ((vc->vcore_state == VCORE_PIGGYBACK || vc->vcore_state == VCORE_RUNNING) && !VCORE_IS_EXITING(vc)) { - kvmppc_create_dtl_entry(vcpu, vc); + kvmppc_update_vpa_dispatch(vcpu, vc); kvmppc_start_thread(vcpu, vc); trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { @@ -4520,7 +4585,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, vc = vcpu->arch.vcore; vcpu->arch.ceded = 0; vcpu->arch.run_task = current; - vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; vcpu->arch.last_inst = KVM_INST_FETCH_FAILED; /* See if the MMU is ready to go */ @@ -4547,6 +4611,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, /* flags save not required, but irq_pmu has no disable/enable API */ powerpc_local_irq_pmu_save(flags); + vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; + if (signal_pending(current)) goto sigpend; if (need_resched() || !kvm->arch.mmu_ready) @@ -4591,47 +4657,44 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, tb = mftb(); - __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0); + kvmppc_update_vpa_dispatch_p9(vcpu, vc, tb + vc->tb_offset); trace_kvm_guest_enter(vcpu); - guest_enter_irqoff(); + guest_timing_enter_irqoff(); srcu_idx = srcu_read_lock(&kvm->srcu); + guest_state_enter_irqoff(); this_cpu_disable_ftrace(); - /* Tell lockdep that we're about to enable interrupts */ - trace_hardirqs_on(); - trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb); vcpu->arch.trap = trap; - trace_hardirqs_off(); - this_cpu_enable_ftrace(); + guest_state_exit_irqoff(); srcu_read_unlock(&kvm->srcu, srcu_idx); set_irq_happened(trap); - context_tracking_guest_exit(); + vcpu->cpu = -1; + vcpu->arch.thread_cpu = -1; + vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; + if (!vtime_accounting_enabled_this_cpu()) { - local_irq_enable(); + powerpc_local_irq_pmu_restore(flags); /* - * Service IRQs here before vtime_account_guest_exit() so any + * Service IRQs here before guest_timing_exit_irqoff() so any * ticks that occurred while running the guest are accounted to * the guest. If vtime accounting is enabled, accounting uses * TB rather than ticks, so it can be done without enabling * interrupts here, which has the problem that it accounts * interrupt processing overhead to the host. */ - local_irq_disable(); + powerpc_local_irq_pmu_save(flags); } - vtime_account_guest_exit(); - - vcpu->cpu = -1; - vcpu->arch.thread_cpu = -1; + guest_timing_exit_irqoff(); powerpc_local_irq_pmu_restore(flags); @@ -4694,6 +4757,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, out: vcpu->cpu = -1; vcpu->arch.thread_cpu = -1; + vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; powerpc_local_irq_pmu_restore(flags); preempt_enable(); goto done; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 7ded202bf995..37f50861dd98 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1024,7 +1024,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Restore R1/R2 so we can handle faults */ ld r1, HSTATE_HOST_R1(r13) - ld r2, PACATOC(r13) + LOAD_PACA_TOC() mfspr r10, SPRN_SRR0 mfspr r11, SPRN_SRR1 @@ -2727,8 +2727,8 @@ kvmppc_bad_host_intr: std r4, _CTR(r1) std r5, _XER(r1) std r6, SOFTE(r1) - ld r2, PACATOC(r13) - LOAD_REG_IMMEDIATE(3, 0x7265677368657265) + LOAD_PACA_TOC() + LOAD_REG_IMMEDIATE(3, STACK_FRAME_REGS_MARKER) std r3, STACK_FRAME_OVERHEAD-16(r1) /* diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 598006301620..e2f11f9c3f2a 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -508,10 +508,10 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) static int __kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long page_shift, - struct kvm *kvm, unsigned long gpa) + struct kvm *kvm, unsigned long gpa, struct page *fault_page) { unsigned long src_pfn, dst_pfn = 0; - struct migrate_vma mig; + struct migrate_vma mig = { 0 }; struct page *dpage, *spage; struct kvmppc_uvmem_page_pvt *pvt; unsigned long pfn; @@ -525,6 +525,7 @@ static int __kvmppc_svm_page_out(struct vm_area_struct *vma, mig.dst = &dst_pfn; mig.pgmap_owner = &kvmppc_uvmem_pgmap; mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + mig.fault_page = fault_page; /* The requested page is already paged-out, nothing to do */ if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) @@ -580,12 +581,14 @@ out_finalize: static inline int kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long page_shift, - struct kvm *kvm, unsigned long gpa) + struct kvm *kvm, unsigned long gpa, + struct page *fault_page) { int ret; mutex_lock(&kvm->arch.uvmem_lock); - ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa); + ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa, + fault_page); mutex_unlock(&kvm->arch.uvmem_lock); return ret; @@ -634,7 +637,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot, pvt->remove_gfn = true; if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE, - PAGE_SHIFT, kvm, pvt->gpa)) + PAGE_SHIFT, kvm, pvt->gpa, NULL)) pr_err("Can't page out gpa:0x%lx addr:0x%lx\n", pvt->gpa, addr); } else { @@ -715,7 +718,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) dpage = pfn_to_page(uvmem_pfn); dpage->zone_device_data = pvt; - lock_page(dpage); + zone_device_page_init(dpage); return dpage; out_clear: spin_lock(&kvmppc_uvmem_bitmap_lock); @@ -736,7 +739,7 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, bool pagein) { unsigned long src_pfn, dst_pfn = 0; - struct migrate_vma mig; + struct migrate_vma mig = { 0 }; struct page *spage; unsigned long pfn; struct page *dpage; @@ -994,7 +997,7 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) if (kvmppc_svm_page_out(vmf->vma, vmf->address, vmf->address + PAGE_SIZE, PAGE_SHIFT, - pvt->kvm, pvt->gpa)) + pvt->kvm, pvt->gpa, vmf->page)) return VM_FAULT_SIGBUS; else return 0; @@ -1065,7 +1068,7 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa, if (!vma || vma->vm_start > start || vma->vm_end < end) goto out; - if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa)) + if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa, NULL)) ret = H_SUCCESS; out: mmap_read_unlock(kvm->mm); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d6abed6e51e6..9fc4dd8f66eb 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -499,7 +499,6 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) if (msr & MSR_POW) { if (!vcpu->arch.pending_exceptions) { kvm_vcpu_halt(vcpu); - kvm_clear_request(KVM_REQ_UNHALT, vcpu); vcpu->stat.generic.halt_wakeup++; /* Unset POW bit after we woke up */ diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index a1f2978b2a86..b2c89e850d7a 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -393,7 +393,6 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) case H_CEDE: kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE); kvm_vcpu_halt(vcpu); - kvm_clear_request(KVM_REQ_UNHALT, vcpu); vcpu->stat.generic.halt_wakeup++; return EMULATE_DONE; case H_LOGICAL_CI_LOAD: diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 06c5830a93f9..7b4920e9fd26 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -719,7 +719,6 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) if (vcpu->arch.shared->msr & MSR_WE) { local_irq_enable(); kvm_vcpu_halt(vcpu); - kvm_clear_request(KVM_REQ_UNHALT, vcpu); hard_irq_disable(); kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 6fa82efe833b..205545d820a1 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -223,7 +223,7 @@ _GLOBAL(kvmppc_resume_host) lwz r3, VCPU_HOST_PID(r4) mtspr SPRN_PID, r3 -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_85xx /* we cheat and know that Linux doesn't use PID1 which is always 0 */ lis r3, 0 mtspr SPRN_PID1, r3 @@ -406,7 +406,7 @@ lightweight_exit: lwz r3, VCPU_SHADOW_PID(r4) mtspr SPRN_PID, r3 -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_85xx lwz r3, VCPU_SHADOW_PID1(r4) mtspr SPRN_PID1, r3 #endif diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index c3ef751465fb..6d0d329cbb35 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -17,7 +17,7 @@ #define KVM_E500_H #include <linux/kvm_host.h> -#include <asm/nohash/mmu-book3e.h> +#include <asm/nohash/mmu-e500.h> #include <asm/tlb.h> #include <asm/cputhreads.h> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index fb1490761c87..b850b0efa201 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -239,7 +239,6 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) case EV_HCALL_TOKEN(EV_IDLE): r = EV_SUCCESS; kvm_vcpu_halt(vcpu); - kvm_clear_request(KVM_REQ_UNHALT, vcpu); break; default: r = EV_UNIMPLEMENTED; @@ -786,7 +785,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; - vcpu->arch.dec_expires = get_tb(); #ifdef CONFIG_KVM_EXIT_TIMING mutex_init(&vcpu->arch.exit_timing_lock); diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S index 3bf17c854be4..2158f61e317f 100644 --- a/arch/powerpc/kvm/tm.S +++ b/arch/powerpc/kvm/tm.S @@ -110,7 +110,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) mtmsrd r2, 1 /* Reload TOC pointer. */ - ld r2, PACATOC(r13) + LOAD_PACA_TOC() /* Save all but r0-r2, r9 & r13 */ reg = 3 diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 6edf0697a526..ad0cf3108dd0 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -94,17 +94,20 @@ void __init poking_init(void) static_branch_enable(&poking_init_done); } +static unsigned long get_patch_pfn(void *addr) +{ + if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr)) + return vmalloc_to_pfn(addr); + else + return __pa_symbol(addr) >> PAGE_SHIFT; +} + /* * This can be called for kernel text or a module. */ static int map_patch_area(void *addr, unsigned long text_poke_addr) { - unsigned long pfn; - - if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr)) - pfn = vmalloc_to_pfn(addr); - else - pfn = __pa_symbol(addr) >> PAGE_SHIFT; + unsigned long pfn = get_patch_pfn(addr); return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); } @@ -149,17 +152,22 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) int err; u32 *patch_addr; unsigned long text_poke_addr; + pte_t *pte; + unsigned long pfn = get_patch_pfn(addr); - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr; + text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); - err = map_patch_area(addr, text_poke_addr); - if (err) - return err; + pte = virt_to_kpte(text_poke_addr); + __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync": : :"memory"); err = __patch_instruction(addr, instr, patch_addr); - unmap_patch_area(text_poke_addr); + pte_clear(&init_mm, text_poke_addr, pte); + flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); return err; } diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index d1091b5ee5da..6812cb19d04a 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -9,11 +9,6 @@ #include <asm/export.h> #include <asm/feature-fixups.h> - .section ".toc","aw" -PPC64_CACHES: - .tc ppc64_caches[TC],ppc64_caches - .section ".text" - _GLOBAL_TOC(copy_page) BEGIN_FTR_SECTION lis r5,PAGE_SIZE@h @@ -24,7 +19,7 @@ FTR_SECTION_ELSE ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) ori r5,r5,PAGE_SIZE@l BEGIN_FTR_SECTION - ld r10,PPC64_CACHES@toc(r2) + LOAD_REG_ADDR(r10, ppc64_caches) lwz r11,DCACHEL1LOGBLOCKSIZE(r10) /* log2 of cache block size */ lwz r12,DCACHEL1BLOCKSIZE(r10) /* get cache block size */ li r9,0 diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 993d3f31832a..31f40f544de5 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -550,7 +550,7 @@ void do_barrier_nospec_fixups(bool enable) } #endif /* CONFIG_PPC_BARRIER_NOSPEC */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { unsigned int instr[2], *dest; @@ -602,7 +602,7 @@ void __init do_btb_flush_fixups(void) for (; start < end; start += 2) patch_btb_flush_section(start); } -#endif /* CONFIG_PPC_FSL_BOOK3E */ +#endif /* CONFIG_PPC_E500 */ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 169872bc0892..df41ce06f86b 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -11,11 +11,6 @@ #include <asm/asm-offsets.h> #include <asm/export.h> - .section ".toc","aw" -PPC64_CACHES: - .tc ppc64_caches[TC],ppc64_caches - .section ".text" - /** * __arch_clear_user: - Zero a block of memory in user space, with less checking. * @to: Destination address, in user space. @@ -133,7 +128,7 @@ err1; stb r0,0(r3) blr .Llong_clear: - ld r5,PPC64_CACHES@toc(r2) + LOAD_REG_ADDR(r5, ppc64_caches) bf cr7*4+0,11f err2; std r0,0(r3) diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile index a8794032f15f..603e59c3db10 100644 --- a/arch/powerpc/math-emu/Makefile +++ b/arch/powerpc/math-emu/Makefile @@ -17,4 +17,9 @@ obj-$(CONFIG_SPE) += math_efp.o CFLAGS_fabs.o = -fno-builtin-fabs CFLAGS_math.o = -fno-builtin-fabs -ccflags-y = -w +ccflags-remove-y = -Wmissing-prototypes -Wmissing-declarations -Wunused-but-set-variable + +ifdef KBUILD_EXTRA_WARN +CFLAGS_math.o += -Wmissing-prototypes -Wmissing-declarations -Wunused-but-set-variable +CFLAGS_math_efp.o += -Wmissing-prototypes -Wmissing-declarations -Wunused-but-set-variable +endif diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c index 36761bd00f38..936a9a149037 100644 --- a/arch/powerpc/math-emu/math.c +++ b/arch/powerpc/math-emu/math.c @@ -24,9 +24,9 @@ FLOATFUNC(mtfsf); FLOATFUNC(mtfsfi); #ifdef CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED -#undef FLOATFUNC(x) +#undef FLOATFUNC #define FLOATFUNC(x) static inline int x(void *op1, void *op2, void *op3, \ - void *op4) { } + void *op4) { return 0; } #endif FLOATFUNC(fadd); @@ -396,28 +396,28 @@ do_mathemu(struct pt_regs *regs) case XCR: op0 = (void *)®s->ccr; - op1 = (void *)((insn >> 23) & 0x7); + op1 = (void *)(long)((insn >> 23) & 0x7); op2 = (void *)¤t->thread.TS_FPR((insn >> 16) & 0x1f); op3 = (void *)¤t->thread.TS_FPR((insn >> 11) & 0x1f); break; case XCRL: op0 = (void *)®s->ccr; - op1 = (void *)((insn >> 23) & 0x7); - op2 = (void *)((insn >> 18) & 0x7); + op1 = (void *)(long)((insn >> 23) & 0x7); + op2 = (void *)(long)((insn >> 18) & 0x7); break; case XCRB: - op0 = (void *)((insn >> 21) & 0x1f); + op0 = (void *)(long)((insn >> 21) & 0x1f); break; case XCRI: - op0 = (void *)((insn >> 23) & 0x7); - op1 = (void *)((insn >> 12) & 0xf); + op0 = (void *)(long)((insn >> 23) & 0x7); + op1 = (void *)(long)((insn >> 12) & 0xf); break; case XFLB: - op0 = (void *)((insn >> 17) & 0xff); + op0 = (void *)(long)((insn >> 17) & 0xff); op1 = (void *)¤t->thread.TS_FPR((insn >> 11) & 0x1f); break; diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c index 39b84e7452e1..34f62aafe706 100644 --- a/arch/powerpc/math-emu/math_efp.c +++ b/arch/powerpc/math-emu/math_efp.c @@ -17,6 +17,7 @@ #include <linux/types.h> #include <linux/prctl.h> +#include <linux/module.h> #include <linux/uaccess.h> #include <asm/reg.h> @@ -218,6 +219,7 @@ int do_spe_mathemu(struct pt_regs *regs) case AB: case XCR: FP_UNPACK_SP(SA, va.wp + 1); + fallthrough; case XB: FP_UNPACK_SP(SB, vb.wp + 1); break; @@ -226,8 +228,8 @@ int do_spe_mathemu(struct pt_regs *regs) break; } - pr_debug("SA: %ld %08lx %ld (%ld)\n", SA_s, SA_f, SA_e, SA_c); - pr_debug("SB: %ld %08lx %ld (%ld)\n", SB_s, SB_f, SB_e, SB_c); + pr_debug("SA: %d %08x %d (%d)\n", SA_s, SA_f, SA_e, SA_c); + pr_debug("SB: %d %08x %d (%d)\n", SB_s, SB_f, SB_e, SB_c); switch (func) { case EFSABS: @@ -278,7 +280,7 @@ int do_spe_mathemu(struct pt_regs *regs) } else { SB_e += (func == EFSCTSF ? 31 : 32); FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, - (func == EFSCTSF)); + (func == EFSCTSF) ? 1 : 0); } goto update_regs; @@ -287,7 +289,7 @@ int do_spe_mathemu(struct pt_regs *regs) FP_CLEAR_EXCEPTIONS; FP_UNPACK_DP(DB, vb.dp); - pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n", + pr_debug("DB: %d %08x %08x %d (%d)\n", DB_s, DB_f1, DB_f0, DB_e, DB_c); FP_CONV(S, D, 1, 2, SR, DB); @@ -301,7 +303,7 @@ int do_spe_mathemu(struct pt_regs *regs) FP_SET_EXCEPTION(FP_EX_INVALID); } else { FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, - ((func & 0x3) != 0)); + ((func & 0x3) != 0) ? 1 : 0); } goto update_regs; @@ -312,7 +314,7 @@ int do_spe_mathemu(struct pt_regs *regs) FP_SET_EXCEPTION(FP_EX_INVALID); } else { FP_TO_INT_S(vc.wp[1], SB, 32, - ((func & 0x3) != 0)); + ((func & 0x3) != 0) ? 1 : 0); } goto update_regs; @@ -322,7 +324,7 @@ int do_spe_mathemu(struct pt_regs *regs) break; pack_s: - pr_debug("SR: %ld %08lx %ld (%ld)\n", SR_s, SR_f, SR_e, SR_c); + pr_debug("SR: %d %08x %d (%d)\n", SR_s, SR_f, SR_e, SR_c); FP_PACK_SP(vc.wp + 1, SR); goto update_regs; @@ -346,6 +348,7 @@ cmp_s: case AB: case XCR: FP_UNPACK_DP(DA, va.dp); + fallthrough; case XB: FP_UNPACK_DP(DB, vb.dp); break; @@ -354,9 +357,9 @@ cmp_s: break; } - pr_debug("DA: %ld %08lx %08lx %ld (%ld)\n", + pr_debug("DA: %d %08x %08x %d (%d)\n", DA_s, DA_f1, DA_f0, DA_e, DA_c); - pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n", + pr_debug("DB: %d %08x %08x %d (%d)\n", DB_s, DB_f1, DB_f0, DB_e, DB_c); switch (func) { @@ -408,7 +411,7 @@ cmp_s: } else { DB_e += (func == EFDCTSF ? 31 : 32); FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, - (func == EFDCTSF)); + (func == EFDCTSF) ? 1 : 0); } goto update_regs; @@ -417,7 +420,7 @@ cmp_s: FP_CLEAR_EXCEPTIONS; FP_UNPACK_SP(SB, vb.wp + 1); - pr_debug("SB: %ld %08lx %ld (%ld)\n", + pr_debug("SB: %d %08x %d (%d)\n", SB_s, SB_f, SB_e, SB_c); FP_CONV(D, S, 2, 1, DR, SB); @@ -431,7 +434,7 @@ cmp_s: FP_SET_EXCEPTION(FP_EX_INVALID); } else { FP_TO_INT_D(vc.dp[0], DB, 64, - ((func & 0x1) == 0)); + ((func & 0x1) == 0) ? 1 : 0); } goto update_regs; @@ -442,7 +445,7 @@ cmp_s: FP_SET_EXCEPTION(FP_EX_INVALID); } else { FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, - ((func & 0x3) != 0)); + ((func & 0x3) != 0) ? 1 : 0); } goto update_regs; @@ -453,7 +456,7 @@ cmp_s: FP_SET_EXCEPTION(FP_EX_INVALID); } else { FP_TO_INT_D(vc.wp[1], DB, 32, - ((func & 0x3) != 0)); + ((func & 0x3) != 0) ? 1 : 0); } goto update_regs; @@ -463,7 +466,7 @@ cmp_s: break; pack_d: - pr_debug("DR: %ld %08lx %08lx %ld (%ld)\n", + pr_debug("DR: %d %08x %08x %d (%d)\n", DR_s, DR_f1, DR_f0, DR_e, DR_c); FP_PACK_DP(vc.dp, DR); @@ -492,6 +495,7 @@ cmp_d: case XCR: FP_UNPACK_SP(SA0, va.wp); FP_UNPACK_SP(SA1, va.wp + 1); + fallthrough; case XB: FP_UNPACK_SP(SB0, vb.wp); FP_UNPACK_SP(SB1, vb.wp + 1); @@ -502,13 +506,13 @@ cmp_d: break; } - pr_debug("SA0: %ld %08lx %ld (%ld)\n", + pr_debug("SA0: %d %08x %d (%d)\n", SA0_s, SA0_f, SA0_e, SA0_c); - pr_debug("SA1: %ld %08lx %ld (%ld)\n", + pr_debug("SA1: %d %08x %d (%d)\n", SA1_s, SA1_f, SA1_e, SA1_c); - pr_debug("SB0: %ld %08lx %ld (%ld)\n", + pr_debug("SB0: %d %08x %d (%d)\n", SB0_s, SB0_f, SB0_e, SB0_c); - pr_debug("SB1: %ld %08lx %ld (%ld)\n", + pr_debug("SB1: %d %08x %d (%d)\n", SB1_s, SB1_f, SB1_e, SB1_c); switch (func) { @@ -567,7 +571,7 @@ cmp_d: } else { SB0_e += (func == EVFSCTSF ? 31 : 32); FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, - (func == EVFSCTSF)); + (func == EVFSCTSF) ? 1 : 0); } if (SB1_c == FP_CLS_NAN) { vc.wp[1] = 0; @@ -575,7 +579,7 @@ cmp_d: } else { SB1_e += (func == EVFSCTSF ? 31 : 32); FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, - (func == EVFSCTSF)); + (func == EVFSCTSF) ? 1 : 0); } goto update_regs; @@ -586,14 +590,14 @@ cmp_d: FP_SET_EXCEPTION(FP_EX_INVALID); } else { FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, - ((func & 0x3) != 0)); + ((func & 0x3) != 0) ? 1 : 0); } if (SB1_c == FP_CLS_NAN) { vc.wp[1] = 0; FP_SET_EXCEPTION(FP_EX_INVALID); } else { FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, - ((func & 0x3) != 0)); + ((func & 0x3) != 0) ? 1 : 0); } goto update_regs; @@ -604,14 +608,14 @@ cmp_d: FP_SET_EXCEPTION(FP_EX_INVALID); } else { FP_TO_INT_S(vc.wp[0], SB0, 32, - ((func & 0x3) != 0)); + ((func & 0x3) != 0) ? 1 : 0); } if (SB1_c == FP_CLS_NAN) { vc.wp[1] = 0; FP_SET_EXCEPTION(FP_EX_INVALID); } else { FP_TO_INT_S(vc.wp[1], SB1, 32, - ((func & 0x3) != 0)); + ((func & 0x3) != 0) ? 1 : 0); } goto update_regs; @@ -621,9 +625,9 @@ cmp_d: break; pack_vs: - pr_debug("SR0: %ld %08lx %ld (%ld)\n", + pr_debug("SR0: %d %08x %d (%d)\n", SR0_s, SR0_f, SR0_e, SR0_c); - pr_debug("SR1: %ld %08lx %ld (%ld)\n", + pr_debug("SR1: %d %08x %d (%d)\n", SR1_s, SR1_f, SR1_e, SR1_c); FP_PACK_SP(vc.wp, SR0); @@ -886,7 +890,7 @@ int speround_handler(struct pt_regs *regs) return 0; } -int __init spe_mathemu_init(void) +static int __init spe_mathemu_init(void) { u32 pvr, maj, min; diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index a96b73006dfb..850783cfa9c7 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -158,7 +158,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long done; - unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; + unsigned long border = (unsigned long)__srwx_boundary - PAGE_OFFSET; unsigned long size; size = roundup_pow_of_two((unsigned long)_einittext - PAGE_OFFSET); @@ -240,7 +240,7 @@ void mmu_mark_rodata_ro(void) for (i = 0; i < nb; i++) { struct ppc_bat *bat = BATS[i]; - if (bat_addrs[i].start < (unsigned long)__init_begin) + if (bat_addrs[i].start < (unsigned long)__end_rodata) bat[1].batl = (bat[1].batl & ~BPP_RW) | BPP_RX; } @@ -314,11 +314,9 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea) * * This must always be called with the pte lock held. */ -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, +void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) - return; /* * We don't need to worry about _PAGE_PRESENT here because we are * called with either mm->page_table_lock held or ptl lock held diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index 19f0ef950d77..9ad6b56bfec9 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -81,14 +81,15 @@ EXPORT_SYMBOL(hash__flush_range); void hash__flush_tlb_mm(struct mm_struct *mm) { struct vm_area_struct *mp; + VMA_ITERATOR(vmi, mm, 0); /* - * It is safe to go down the mm's list of vmas when called - * from dup_mmap, holding mmap_lock. It would also be safe from - * unmap_region or exit_mmap, but not from vmtruncate on SMP - - * but it seems dup_mmap is the only SMP case which gets here. + * It is safe to iterate the vmas when called from dup_mmap, + * holding mmap_lock. It would also be safe from unmap_region + * or exit_mmap, but not from vmtruncate on SMP - but it seems + * dup_mmap is the only SMP case which gets here. */ - for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) + for_each_vma(vmi, mp) hash__flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); } EXPORT_SYMBOL(hash__flush_tlb_mm); diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index ae008b9df0e6..747492edb75a 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -256,7 +256,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres * the __collapse_huge_page_copy can result in copying * the old content. */ - flush_tlb_pmd_range(vma->vm_mm, &pmd, address); + flush_hash_table_pmd_range(vma->vm_mm, &pmd, address); return pmd; } @@ -541,7 +541,7 @@ void hash__mark_rodata_ro(void) unsigned long start, end, pp; start = (unsigned long)_stext; - end = (unsigned long)__init_begin; + end = (unsigned long)__end_rodata; pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY); diff --git a/arch/powerpc/mm/book3s64/hash_tlb.c b/arch/powerpc/mm/book3s64/hash_tlb.c index eb0bccaf221e..a64ea0a7ef96 100644 --- a/arch/powerpc/mm/book3s64/hash_tlb.c +++ b/arch/powerpc/mm/book3s64/hash_tlb.c @@ -221,7 +221,7 @@ void __flush_hash_table_range(unsigned long start, unsigned long end) local_irq_restore(flags); } -void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) +void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) { pte_t *pte; pte_t *start_pte; diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 363a9447d63d..df008edf7be0 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -123,11 +123,8 @@ EXPORT_SYMBOL_GPL(mmu_slb_size); #ifdef CONFIG_PPC_64K_PAGES int mmu_ci_restrictions; #endif -#ifdef CONFIG_DEBUG_PAGEALLOC static u8 *linear_map_hash_slots; static unsigned long linear_map_hash_count; -static DEFINE_SPINLOCK(linear_map_hash_lock); -#endif /* CONFIG_DEBUG_PAGEALLOC */ struct mmu_hash_ops mmu_hash_ops; EXPORT_SYMBOL(mmu_hash_ops); @@ -427,11 +424,9 @@ repeat: break; cond_resched(); -#ifdef CONFIG_DEBUG_PAGEALLOC - if (debug_pagealloc_enabled() && + if (debug_pagealloc_enabled_or_kfence() && (paddr >> PAGE_SHIFT) < linear_map_hash_count) linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80; -#endif /* CONFIG_DEBUG_PAGEALLOC */ } return ret < 0 ? ret : 0; } @@ -778,7 +773,7 @@ static void __init htab_init_page_sizes(void) bool aligned = true; init_hpte_page_sizes(); - if (!debug_pagealloc_enabled()) { + if (!debug_pagealloc_enabled_or_kfence()) { /* * Pick a size for the linear mapping. Currently, we only * support 16M, 1M and 4K which is the default @@ -1066,8 +1061,7 @@ static void __init htab_initialize(void) prot = pgprot_val(PAGE_KERNEL); -#ifdef CONFIG_DEBUG_PAGEALLOC - if (debug_pagealloc_enabled()) { + if (debug_pagealloc_enabled_or_kfence()) { linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; linear_map_hash_slots = memblock_alloc_try_nid( linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, @@ -1076,7 +1070,6 @@ static void __init htab_initialize(void) panic("%s: Failed to allocate %lu bytes max_addr=%pa\n", __func__, linear_map_hash_count, &ppc64_rma_size); } -#endif /* CONFIG_DEBUG_PAGEALLOC */ /* create bolted the linear mapping in the hash table */ for_each_mem_range(i, &base, &end) { @@ -1781,7 +1774,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, * * This must always be called with the pte lock held. */ -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, +void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { /* @@ -1791,9 +1784,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, unsigned long trap; bool is_exec; - if (radix_enabled()) - return; - /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ if (!pte_young(*ptep) || address >= TASK_SIZE) return; @@ -1990,7 +1980,9 @@ repeat: return slot; } -#ifdef CONFIG_DEBUG_PAGEALLOC +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +static DEFINE_SPINLOCK(linear_map_hash_lock); + static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) { unsigned long hash; @@ -2005,6 +1997,9 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) if (!vsid) return; + if (linear_map_hash_slots[lmi] & 0x80) + return; + ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode, HPTE_V_BOLTED, mmu_linear_psize, mmu_kernel_ssize); @@ -2024,7 +2019,10 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); spin_lock(&linear_map_hash_lock); - BUG_ON(!(linear_map_hash_slots[lmi] & 0x80)); + if (!(linear_map_hash_slots[lmi] & 0x80)) { + spin_unlock(&linear_map_hash_lock); + return; + } hidx = linear_map_hash_slots[lmi] & 0x7f; linear_map_hash_slots[lmi] = 0; spin_unlock(&linear_map_hash_lock); @@ -2055,7 +2053,7 @@ void hash__kernel_map_pages(struct page *page, int numpages, int enable) } local_irq_restore(flags); } -#endif /* CONFIG_DEBUG_PAGEALLOC */ +#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_KFENCE */ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 7b9966402b25..f6151a589298 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -553,8 +553,15 @@ EXPORT_SYMBOL_GPL(memremap_compat_align); pgprot_t vm_get_page_prot(unsigned long vm_flags) { - unsigned long prot = pgprot_val(protection_map[vm_flags & - (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]); + unsigned long prot; + + /* Radix supports execute-only, but protection_map maps X -> RX */ + if (radix_enabled() && ((vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)) { + prot = pgprot_val(PAGE_EXECONLY); + } else { + prot = pgprot_val(protection_map[vm_flags & + (VM_ACCESS_FLAGS | VM_SHARED)]); + } if (vm_flags & VM_SAO) prot |= _PAGE_SAO; diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index e712f80fe189..cac727b01799 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -30,9 +30,12 @@ #include <asm/trace.h> #include <asm/uaccess.h> #include <asm/ultravisor.h> +#include <asm/set_memory.h> #include <trace/events/thp.h> +#include <mm/mmu_decl.h> + unsigned int mmu_base_pid; unsigned long radix_mem_block_size __ro_after_init; @@ -228,7 +231,7 @@ void radix__mark_rodata_ro(void) unsigned long start, end; start = (unsigned long)_stext; - end = (unsigned long)__init_begin; + end = (unsigned long)__end_rodata; radix__change_memory_range(start, end, _PAGE_WRITE); } @@ -259,21 +262,24 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e static unsigned long next_boundary(unsigned long addr, unsigned long end) { #ifdef CONFIG_STRICT_KERNEL_RWX - if (addr < __pa_symbol(__init_begin)) - return __pa_symbol(__init_begin); + if (addr < __pa_symbol(__srwx_boundary)) + return __pa_symbol(__srwx_boundary); #endif return end; } static int __meminit create_physical_mapping(unsigned long start, unsigned long end, - unsigned long max_mapping_size, int nid, pgprot_t _prot) { unsigned long vaddr, addr, mapping_size = 0; bool prev_exec, exec = false; pgprot_t prot; int psize; + unsigned long max_mapping_size = radix_mem_block_size; + + if (debug_pagealloc_enabled_or_kfence()) + max_mapping_size = PAGE_SIZE; start = ALIGN(start, PAGE_SIZE); end = ALIGN_DOWN(end, PAGE_SIZE); @@ -352,7 +358,6 @@ static void __init radix_init_pgtable(void) } WARN_ON(create_physical_mapping(start, end, - radix_mem_block_size, -1, PAGE_KERNEL)); } @@ -850,7 +855,7 @@ int __meminit radix__create_section_mapping(unsigned long start, } return create_physical_mapping(__pa(start), __pa(end), - radix_mem_block_size, nid, prot); + nid, prot); } int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) @@ -896,10 +901,17 @@ void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long #endif #endif -#ifdef CONFIG_DEBUG_PAGEALLOC +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) void radix__kernel_map_pages(struct page *page, int numpages, int enable) { - pr_warn_once("DEBUG_PAGEALLOC not supported in radix mode\n"); + unsigned long addr; + + addr = (unsigned long)page_address(page); + + if (enable) + set_memory_p(addr, numpages); + else + set_memory_np(addr, numpages); } #endif diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c index 60c6ea16a972..d73b3b4176e8 100644 --- a/arch/powerpc/mm/book3s64/subpage_prot.c +++ b/arch/powerpc/mm/book3s64/subpage_prot.c @@ -149,24 +149,15 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, unsigned long len) { struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, addr); /* * We don't try too hard, we just mark all the vma in that range * VM_NOHUGEPAGE and split them. */ - vma = find_vma(mm, addr); - /* - * If the range is in unmapped range, just return - */ - if (vma && ((addr + len) <= vma->vm_start)) - return; - - while (vma) { - if (vma->vm_start >= (addr + len)) - break; + for_each_vma_range(vmi, vma, addr + len) { vma->vm_flags |= VM_NOHUGEPAGE; walk_page_vma(vma, &subpage_walk_ops, NULL); - vma = vma->vm_next; } } #else diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 014005428687..2bef19cc1b98 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -270,7 +270,11 @@ static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma return false; } - if (unlikely(!vma_is_accessible(vma))) + /* + * Check for a read fault. This could be caused by a read on an + * inaccessible page (i.e. PROT_NONE), or a Radix MMU execute-only page. + */ + if (unlikely(!(vma->vm_flags & VM_READ))) return true; /* * We should ideally do the vma pkey access check here. But in the @@ -367,7 +371,22 @@ static void sanity_check_fault(bool is_write, bool is_user, #elif defined(CONFIG_PPC_8xx) #define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G) #elif defined(CONFIG_PPC64) -#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S) +static int page_fault_is_bad(unsigned long err) +{ + unsigned long flag = DSISR_BAD_FAULT_64S; + + /* + * PAPR+ v2.11 § 14.15.3.4.1 (unreleased) + * If byte 0, bit 3 of pi-attribute-specifier-type in + * ibm,pi-features property is defined, ignore the DSI error + * which is caused by the paste instruction on the + * suspended NX window. + */ + if (mmu_has_feature(MMU_FTR_NX_DSI)) + flag &= ~DSISR_BAD_COPYPASTE; + + return err & flag; +} #else #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S) #endif diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index bc84a594ca62..5852a86d990d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -392,7 +392,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, * single hugepage, but all of them point to * the same kmem cache that holds the hugepte. */ - more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); + more = addr + (1UL << hugepd_shift(*(hugepd_t *)pmd)); if (more > next) next = more; @@ -434,7 +434,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, * single hugepage, but all of them point to * the same kmem cache that holds the hugepte. */ - more = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); + more = addr + (1UL << hugepd_shift(*(hugepd_t *)pud)); if (more > next) next = more; @@ -496,7 +496,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, * for a single hugepage, but all of them point to the * same kmem cache that holds the hugepte. */ - more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); + more = addr + (1UL << hugepd_shift(*(hugepd_t *)pgd)); if (more > next) next = more; @@ -623,7 +623,7 @@ static int __init hugetlbpage_init(void) if (pdshift > shift) { if (!IS_ENABLED(CONFIG_PPC_8xx)) pgtable_cache_add(pdshift - shift); - } else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || + } else if (IS_ENABLED(CONFIG_PPC_E500) || IS_ENABLED(CONFIG_PPC_8xx)) { pgtable_cache_add(PTE_T_ORDER); } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 62d9af6606cd..d4cc3749e621 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -82,28 +82,15 @@ void __init MMU_init(void) if (ppc_md.progress) ppc_md.progress("MMU:enter", 0x111); - /* - * Reserve gigantic pages for hugetlb. This MUST occur before - * lowmem_end_addr is initialized below. - */ - if (memblock.memory.cnt > 1) { -#ifndef CONFIG_WII - memblock_enforce_memory_limit(memblock.memory.regions[0].size); - pr_warn("Only using first contiguous memory region\n"); -#else - wii_memory_fixups(); -#endif - } - total_lowmem = total_memory = memblock_end_of_DRAM() - memstart_addr; lowmem_end_addr = memstart_addr + total_lowmem; -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_85xx /* Freescale Book-E parts expect lowmem to be mapped by fixed TLB * entries, so we need to adjust lowmem to match the amount we can map * in the fixed entries */ adjust_total_lowmem(); -#endif /* CONFIG_FSL_BOOKE */ +#endif /* CONFIG_PPC_85xx */ if (total_lowmem > __max_low_memory) { total_lowmem = __max_low_memory; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 01772e79fd93..84d171953ba4 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -302,13 +302,13 @@ void __init mem_init(void) for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT; struct page *page = pfn_to_page(pfn); - if (!memblock_is_reserved(paddr)) + if (memblock_is_memory(paddr) && !memblock_is_reserved(paddr)) free_highmem_page(page); } } #endif /* CONFIG_HIGHMEM */ -#if defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_SMP) +#if defined(CONFIG_PPC_E500) && !defined(CONFIG_SMP) /* * If smp is enabled, next_tlbcam_idx is initialized in the cpu up * functions.... do it here for the non-smp case. diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 229c72e49198..bd9784f77f2e 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -38,7 +38,7 @@ static inline void _tlbil_pid(unsigned int pid) #else /* CONFIG_40x || CONFIG_PPC_8xx */ extern void _tlbil_all(void); extern void _tlbil_pid(unsigned int pid); -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 extern void _tlbil_pid_noind(unsigned int pid); #else #define _tlbil_pid_noind(pid) _tlbil_pid(pid) @@ -55,7 +55,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid, asm volatile ("tlbie %0; sync" : : "r" (address) : "memory"); trace_tlbie(0, 0, address, pid, 0, 0, 0); } -#elif defined(CONFIG_PPC_BOOK3E) +#elif defined(CONFIG_PPC_BOOK3E_64) extern void _tlbil_va(unsigned long address, unsigned int pid, unsigned int tsize, unsigned int ind); #else @@ -67,7 +67,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid, } #endif /* CONFIG_PPC_8xx */ -#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x) +#if defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_47x) extern void _tlbivax_bcast(unsigned long address, unsigned int pid, unsigned int tsize, unsigned int ind); #else @@ -92,28 +92,16 @@ extern void mapin_ram(void); extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot); -extern unsigned int rtas_data, rtas_size; - -struct hash_pte; extern u8 early_hash[]; #endif /* CONFIG_PPC32 */ extern unsigned long __max_low_memory; -extern phys_addr_t __initial_memory_limit_addr; extern phys_addr_t total_memory; extern phys_addr_t total_lowmem; extern phys_addr_t memstart_addr; extern phys_addr_t lowmem_end_addr; -#ifdef CONFIG_WII -extern unsigned long wii_hole_start; -extern unsigned long wii_hole_size; - -extern unsigned long wii_mmu_mapin_mem2(unsigned long top); -extern void wii_memory_fixups(void); -#endif - /* ...and now those things that may be slightly different between processor * architectures. -- Dan */ @@ -123,11 +111,9 @@ void MMU_init_hw_patch(void); unsigned long mmu_mapin_ram(unsigned long base, unsigned long top); #endif -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun, bool init); -extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, - phys_addr_t phys); #ifdef CONFIG_PPC32 extern void adjust_total_lowmem(void); extern int switch_to_as1(void); @@ -160,9 +146,9 @@ struct tlbcam { extern struct tlbcam TLBCAM[NUM_TLBCAMS]; #endif -#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx) +#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_8xx) /* 6xx have BATS */ -/* FSL_BOOKE have TLBCAM */ +/* PPC_85xx have TLBCAM */ /* 8xx have LTLB */ phys_addr_t v_block_mapped(unsigned long va); unsigned long p_block_mapped(phys_addr_t pa); @@ -171,7 +157,7 @@ static inline phys_addr_t v_block_mapped(unsigned long va) { return 0; } static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; } #endif -#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_FSL_BOOK3E) +#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_E500) void mmu_mark_initmem_nx(void); void mmu_mark_rodata_ro(void); #else diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile index b467a25ee155..f3894e79d5f7 100644 --- a/arch/powerpc/mm/nohash/Makefile +++ b/arch/powerpc/mm/nohash/Makefile @@ -7,13 +7,13 @@ obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o obj-$(CONFIG_40x) += 40x.o obj-$(CONFIG_44x) += 44x.o obj-$(CONFIG_PPC_8xx) += 8xx.o -obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_book3e.o +obj-$(CONFIG_PPC_E500) += e500.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_booke.o ifdef CONFIG_HUGETLB_PAGE -obj-$(CONFIG_PPC_FSL_BOOK3E) += book3e_hugetlbpage.o +obj-$(CONFIG_PPC_E500) += e500_hugetlbpage.o endif # Disable kcov instrumentation on sensitive code # This is necessary for booting with kcov enabled on book3e machines KCOV_INSTRUMENT_tlb.o := n -KCOV_INSTRUMENT_fsl_book3e.o := n +KCOV_INSTRUMENT_e500.o := n diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/e500.c index b8ae6c08c06f..40a4e69ae1a9 100644 --- a/arch/powerpc/mm/nohash/fsl_book3e.c +++ b/arch/powerpc/mm/nohash/e500.c @@ -59,7 +59,7 @@ static struct { phys_addr_t phys; } tlbcam_addrs[NUM_TLBCAMS]; -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_85xx /* * Return PA for this VA if it is mapped by a CAM, or 0 */ @@ -135,8 +135,8 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, tlbcam_addrs[index].phys = phys; } -unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, - phys_addr_t phys) +static unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, + phys_addr_t phys) { unsigned int camsize = __ilog2(ram); unsigned int align = __ffs(virt | phys); diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/e500_hugetlbpage.c index c7d4b317a823..c7d4b317a823 100644 --- a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c +++ b/arch/powerpc/mm/nohash/e500_hugetlbpage.c diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 5e7ccb48b79c..2c15c86c7015 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -49,8 +49,7 @@ * other sizes not listed here. The .ind field is only used on MMUs that have * indirect page table entries. */ -#if defined(CONFIG_PPC_BOOK3E_MMU) || defined(CONFIG_PPC_8xx) -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { .shift = 12, @@ -81,7 +80,20 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { .enc = BOOK3E_PAGESZ_1GB, }, }; -#elif defined(CONFIG_PPC_8xx) + +static inline int mmu_get_tsize(int psize) +{ + return mmu_psize_defs[psize].enc; +} +#else +static inline int mmu_get_tsize(int psize) +{ + /* This isn't used on !Book3E for now */ + return 0; +} +#endif + +#ifdef CONFIG_PPC_8xx struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { .shift = 12, @@ -96,53 +108,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { .shift = 23, }, }; -#else -struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { - [MMU_PAGE_4K] = { - .shift = 12, - .ind = 20, - .enc = BOOK3E_PAGESZ_4K, - }, - [MMU_PAGE_16K] = { - .shift = 14, - .enc = BOOK3E_PAGESZ_16K, - }, - [MMU_PAGE_64K] = { - .shift = 16, - .ind = 28, - .enc = BOOK3E_PAGESZ_64K, - }, - [MMU_PAGE_1M] = { - .shift = 20, - .enc = BOOK3E_PAGESZ_1M, - }, - [MMU_PAGE_16M] = { - .shift = 24, - .ind = 36, - .enc = BOOK3E_PAGESZ_16M, - }, - [MMU_PAGE_256M] = { - .shift = 28, - .enc = BOOK3E_PAGESZ_256M, - }, - [MMU_PAGE_1G] = { - .shift = 30, - .enc = BOOK3E_PAGESZ_1GB, - }, -}; -#endif /* CONFIG_FSL_BOOKE */ - -static inline int mmu_get_tsize(int psize) -{ - return mmu_psize_defs[psize].enc; -} -#else -static inline int mmu_get_tsize(int psize) -{ - /* This isn't used on !Book3E for now */ - return 0; -} -#endif /* CONFIG_PPC_BOOK3E_MMU */ +#endif /* The variables below are currently only used on 64-bit Book3E * though this will probably be made common with other nohash @@ -166,7 +132,7 @@ int extlb_level_exc; #endif /* CONFIG_PPC64 */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 /* next_tlbcam_idx is used to round-robin tlbcam entry assignment */ DEFINE_PER_CPU(int, next_tlbcam_idx); EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx); @@ -441,7 +407,7 @@ static void __init setup_page_sizes(void) unsigned int eptcfg; int i, psize; -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 unsigned int mmucfg = mfspr(SPRN_MMUCFG); int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E); @@ -584,7 +550,7 @@ static void __init setup_mmu_htw(void) patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e); patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e); break; -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 case PPC_HTW_E6500: extlb_level_exc = EX_TLB_SIZE; patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e); @@ -627,7 +593,7 @@ static void early_init_this_mmu(void) } mtspr(SPRN_MAS4, mas4); -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { unsigned int num_cams; bool map = true; @@ -680,7 +646,7 @@ static void __init early_init_mmu_global(void) /* Look for HW tablewalk support */ setup_mmu_htw(); -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { if (book3e_htw_mode == PPC_HTW_NONE) { extlb_level_exc = EX_TLB_SIZE; @@ -701,7 +667,7 @@ static void __init early_init_mmu_global(void) static void __init early_mmu_set_memory_limit(void) { -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { /* * Limit memory so we dont have linear faults. @@ -750,7 +716,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, * We crop it to the size of the first MEMBLOCK to * avoid going over total available memory just in case... */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { unsigned long linear_sz; unsigned int num_cams; diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index d62b613a0d5d..e1199608ff4d 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -221,7 +221,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_476_DD2) blr #endif /* CONFIG_PPC_47x */ -#elif defined(CONFIG_FSL_BOOKE) +#elif defined(CONFIG_PPC_85xx) /* * FSL BookE implementations. * @@ -294,7 +294,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX) isync 1: wrtee r10 blr -#elif defined(CONFIG_PPC_BOOK3E) +#elif defined(CONFIG_PPC_BOOK3E_64) /* * New Book3E (>= 2.06) implementation * @@ -364,7 +364,7 @@ _GLOBAL(_tlbivax_bcast) #error Unsupported processor type ! #endif -#if defined(CONFIG_PPC_FSL_BOOK3E) +#if defined(CONFIG_PPC_E500) /* * extern void loadcam_entry(unsigned int index) * diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 68ffbfdba894..76cf456d7976 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -61,7 +61,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) ld r14,PACAPGD(r13) std r15,EX_TLB_R15(r12) std r10,EX_TLB_CR(r12) -#ifdef CONFIG_PPC_FSL_BOOK3E START_BTB_FLUSH_SECTION mfspr r11, SPRN_SRR1 andi. r10,r11,MSR_PR @@ -70,14 +69,11 @@ START_BTB_FLUSH_SECTION 1: END_BTB_FLUSH_SECTION std r7,EX_TLB_R7(r12) -#endif .endm .macro tlb_epilog_bolted ld r14,EX_TLB_CR(r12) -#ifdef CONFIG_PPC_FSL_BOOK3E ld r7,EX_TLB_R7(r12) -#endif ld r10,EX_TLB_R10(r12) ld r11,EX_TLB_R11(r12) ld r13,EX_TLB_R13(r12) @@ -248,7 +244,6 @@ itlb_miss_fault_bolted: beq tlb_miss_user_bolted b itlb_miss_kernel_bolted -#ifdef CONFIG_PPC_FSL_BOOK3E /* * TLB miss handling for e6500 and derivatives, using hardware tablewalk. * @@ -515,7 +510,6 @@ dtlb_miss_fault_e6500: itlb_miss_fault_e6500: tlb_epilog_bolted b exc_instruction_storage_book3e -#endif /* CONFIG_PPC_FSL_BOOK3E */ /********************************************************************** * * @@ -1124,8 +1118,8 @@ tlb_load_linear: * we only use 1G pages for now. That might have to be changed in a * final implementation, especially when dealing with hypervisors */ - ld r11,PACATOC(r13) - ld r11,linear_map_top@got(r11) + __LOAD_PACA_TOC(r11) + LOAD_REG_ADDR_ALTTOC(r11, r11, linear_map_top) ld r10,0(r11) tovirt(10,10) cmpld cr0,r16,r10 diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 0801b2ce9b7d..b44ce71917d7 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1160,6 +1160,9 @@ void __init mem_topology_setup(void) { int cpu; + max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; + min_low_pfn = MEMORY_START >> PAGE_SHIFT; + /* * Linux/mm assumes node 0 to be online at boot. However this is not * true on PowerPC, where node 0 is similar to any other node, it @@ -1204,9 +1207,6 @@ void __init initmem_init(void) { int nid; - max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; - max_pfn = max_low_pfn; - memblock_dump_all(); for_each_online_node(nid) { diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 3ac73f9fb5d5..5c02fd08d61e 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -158,10 +158,11 @@ void mark_rodata_ro(void) } /* - * mark .text and .rodata as read only. Use __init_begin rather than - * __end_rodata to cover NOTES and EXCEPTION_TABLE. + * mark text and rodata as read only. __end_rodata is set by + * powerpc's linker script and includes tables and data + * requiring relocation which are not put in RO_DATA. */ - numpages = PFN_UP((unsigned long)__init_begin) - + numpages = PFN_UP((unsigned long)__end_rodata) - PFN_DOWN((unsigned long)_stext); set_memory_ro((unsigned long)_stext, numpages); diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile index b533caaf0910..dc896d2874f3 100644 --- a/arch/powerpc/mm/ptdump/Makefile +++ b/arch/powerpc/mm/ptdump/Makefile @@ -4,7 +4,7 @@ obj-y += ptdump.o obj-$(CONFIG_4xx) += shared.o obj-$(CONFIG_PPC_8xx) += 8xx.o -obj-$(CONFIG_PPC_BOOK3E_MMU) += shared.o +obj-$(CONFIG_PPC_E500) += shared.o obj-$(CONFIG_PPC_BOOK3S_32) += shared.o obj-$(CONFIG_PPC_BOOK3S_64) += book3s64.o diff --git a/arch/powerpc/perf/bhrb.S b/arch/powerpc/perf/bhrb.S index 1aa3259716b8..47ba05d5ae76 100644 --- a/arch/powerpc/perf/bhrb.S +++ b/arch/powerpc/perf/bhrb.S @@ -21,7 +21,7 @@ _GLOBAL(read_bhrb) cmpldi r3,31 bgt 1f - ld r4,bhrb_table@got(r2) + LOAD_REG_ADDR(r4, bhrb_table) sldi r3,r3,3 add r3,r4,r3 mtctr r3 diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index b83c47b7947f..ea8cfe3806dc 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -19,7 +19,7 @@ #include "callchain.h" #ifdef CONFIG_PPC64 -#include "../kernel/ppc32.h" +#include <asm/syscalls_32.h> #else /* CONFIG_PPC64 */ #define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 13919eb96931..942aa830e110 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2131,6 +2131,23 @@ static int power_pmu_event_init(struct perf_event *event) if (has_branch_stack(event)) { u64 bhrb_filter = -1; + /* + * Currently no PMU supports having multiple branch filters + * at the same time. Branch filters are set via MMCRA IFM[32:33] + * bits for Power8 and above. Return EOPNOTSUPP when multiple + * branch filters are requested in the event attr. + * + * When opening event via perf_event_open(), branch_sample_type + * gets adjusted in perf_copy_attr(). Kernel will automatically + * adjust the branch_sample_type based on the event modifier + * settings to include PERF_SAMPLE_BRANCH_PLM_ALL. Hence drop + * the check for PERF_SAMPLE_BRANCH_PLM_ALL. + */ + if (hweight64(event->attr.branch_sample_type & ~PERF_SAMPLE_BRANCH_PLM_ALL) > 1) { + local_irq_restore(irq_flags); + return -EOPNOTSUPP; + } + if (ppmu->bhrb_filter_map) bhrb_filter = ppmu->bhrb_filter_map( event->attr.branch_sample_type); @@ -2297,16 +2314,20 @@ static void record_and_restart(struct perf_event *event, unsigned long val, cpuhw = this_cpu_ptr(&cpu_hw_events); power_pmu_bhrb_read(event, cpuhw); data.br_stack = &cpuhw->bhrb_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; } if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && - ppmu->get_mem_data_src) + ppmu->get_mem_data_src) { ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs); + data.sample_flags |= PERF_SAMPLE_DATA_SRC; + } if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE && - ppmu->get_mem_weight) + ppmu->get_mem_weight) { ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type); - + data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); } else if (period) { diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index d7976ab40d38..d517aba94d1b 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -240,8 +240,10 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) ct = of_get_child_count(pmu_events); /* Get the event prefix */ - if (of_property_read_string(node, "events-prefix", &prefix)) + if (of_property_read_string(node, "events-prefix", &prefix)) { + of_node_put(pmu_events); return 0; + } /* Get a global unit and scale data if available */ if (of_property_read_string(node, "scale", &g_scale)) @@ -255,8 +257,10 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) /* Allocate memory for the events */ pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL); - if (!pmu->events) + if (!pmu->events) { + of_node_put(pmu_events); return -ENOMEM; + } ct = 0; /* Parse the events and update the struct */ @@ -266,6 +270,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) ct++; } + of_node_put(pmu_events); + /* Allocate memory for attribute group */ attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL); if (!attr_group) { diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index 20cc8f80b086..7c91ac5a5241 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -140,6 +140,8 @@ static void __init ppc47x_init_irq(void) ppc_md.get_irq = mpic_get_irq; } else panic("Unrecognized top level interrupt controller"); + + of_node_put(np); } #ifdef CONFIG_SMP diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c index 0652c7e69225..42abeba4f698 100644 --- a/arch/powerpc/platforms/512x/clock-commonclk.c +++ b/arch/powerpc/platforms/512x/clock-commonclk.c @@ -950,7 +950,7 @@ static void __init mpc5121_clk_register_of_provider(struct device_node *np) */ static void __init mpc5121_clk_provide_migration_support(void) { - + struct device_node *np; /* * pre-enable those clock items which are not yet appropriately * acquired by their peripheral driver @@ -970,7 +970,9 @@ static void __init mpc5121_clk_provide_migration_support(void) * unused and so it gets disabled */ clk_prepare_enable(clks[MPC512x_CLK_PSC3_MCLK]);/* serial console */ - if (of_find_compatible_node(NULL, "pci", "fsl,mpc5121-pci")) + np = of_find_compatible_node(NULL, "pci", "fsl,mpc5121-pci"); + of_node_put(np); + if (np) clk_prepare_enable(clks[MPC512x_CLK_PCI]); } @@ -1208,6 +1210,8 @@ int __init mpc5121_clk_init(void) /* register as an OF clock provider */ mpc5121_clk_register_of_provider(clk_np); + of_node_put(clk_np); + /* * unbreak not yet adjusted peripheral drivers during migration * towards fully operational common clock support, and allow diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index ee367ff3ec8a..33a35fff11b5 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -174,6 +174,8 @@ static void __init media5200_init_irq(void) goto out; pr_debug("%s: allocated irqhost\n", __func__); + of_node_put(fpga_np); + irq_set_handler_data(cascade_virq, &media5200_irq); irq_set_chained_handler(cascade_virq, media5200_irq_cascade); @@ -181,6 +183,7 @@ static void __init media5200_init_irq(void) out: pr_err("Could not find Media5200 FPGA; PCI interrupts will not work\n"); + of_node_put(fpga_np); } /* diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index bb8caa5071f8..e12cb44e717f 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -162,6 +162,8 @@ static struct spi_board_info mpc832x_spi_boardinfo = { static int __init mpc832x_spi_init(void) { + struct device_node *np; + par_io_config_pin(3, 0, 3, 0, 1, 0); /* SPI1 MOSI, I/O */ par_io_config_pin(3, 1, 3, 0, 1, 0); /* SPI1 MISO, I/O */ par_io_config_pin(3, 2, 3, 0, 1, 0); /* SPI1 CLK, I/O */ @@ -175,7 +177,9 @@ static int __init mpc832x_spi_init(void) * Don't bother with legacy stuff when device tree contains * mmc-spi-slot node. */ - if (of_find_compatible_node(NULL, NULL, "mmc-spi-slot")) + np = of_find_compatible_node(NULL, NULL, "mmc-spi-slot"); + of_node_put(np); + if (np) return 0; return fsl_spi_init(&mpc832x_spi_boardinfo, 1, mpc83xx_spi_cs_control); } diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index be16eba0f704..b92cb2b4d54d 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 menuconfig FSL_SOC_BOOKE bool "Freescale Book-E Machine Type" - depends on PPC_85xx || PPC_BOOK3E + depends on PPC_E500 select FSL_SOC select PPC_UDBG_16550 select MPIC @@ -241,8 +241,6 @@ endif # PPC32 config PPC_QEMU_E500 bool "QEMU generic e500 platform" select DEFAULT_UIMAGE - select E500 - select PPC_E500MC if PPC64 help This option enables support for running as a QEMU guest using QEMU's generic e500 machine. This is not required if you're @@ -258,7 +256,6 @@ config PPC_QEMU_E500 config CORENET_GENERIC bool "Freescale CoreNet Generic" select DEFAULT_UIMAGE - select E500 select PPC_E500MC select PHYS_64BIT select SWIOTLB diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 28d6b36f1ccd..2c539de2d629 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -200,9 +200,5 @@ define_machine(corenet_generic) { #endif .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, -#ifdef CONFIG_PPC64 - .power_save = book3e_idle, -#else .power_save = e500_idle, -#endif }; diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c index 8e827376d97b..e3e8f18825a1 100644 --- a/arch/powerpc/platforms/85xx/ge_imp3a.c +++ b/arch/powerpc/platforms/85xx/ge_imp3a.c @@ -89,8 +89,10 @@ static void __init ge_imp3a_pci_assign_primary(void) of_device_is_compatible(np, "fsl,mpc8548-pcie") || of_device_is_compatible(np, "fsl,p2020-pcie")) { of_address_to_resource(np, 0, &rsrc); - if ((rsrc.start & 0xfffff) == 0x9000) - fsl_pci_primary = np; + if ((rsrc.start & 0xfffff) == 0x9000) { + of_node_put(fsl_pci_primary); + fsl_pci_primary = of_node_get(np); + } } } #endif diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c index bdf9d42f8521..a22f02b0fc77 100644 --- a/arch/powerpc/platforms/85xx/ksi8560.c +++ b/arch/powerpc/platforms/85xx/ksi8560.c @@ -133,6 +133,8 @@ static void __init ksi8560_setup_arch(void) else printk(KERN_ERR "Can't find CPLD in device tree\n"); + of_node_put(cpld); + if (ppc_md.progress) ppc_md.progress("ksi8560_setup_arch()", 0); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 48f3acfece0b..0b8f2101c5fb 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -159,6 +159,7 @@ static void __init mpc85xx_cds_pci_irq_fixup(struct pci_dev *dev) else dev->irq = 10; pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + break; default: break; } diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index 64109ad6736c..1639e222cc33 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -68,9 +68,5 @@ define_machine(qemu_e500) { .get_irq = mpic_get_coreint_irq, .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, -#ifdef CONFIG_PPC64 - .power_save = book3e_idle, -#else .power_save = e500_idle, -#endif }; diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index 98ae64075193..e14d1b74d4e4 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -71,6 +71,7 @@ static int gpio_halt_probe(struct platform_device *pdev) { enum of_gpio_flags flags; struct device_node *node = pdev->dev.of_node; + struct device_node *child_node; int gpio, err, irq; int trigger; @@ -78,26 +79,29 @@ static int gpio_halt_probe(struct platform_device *pdev) return -ENODEV; /* If there's no matching child, this isn't really an error */ - halt_node = of_find_matching_node(node, child_match); - if (!halt_node) + child_node = of_find_matching_node(node, child_match); + if (!child_node) return 0; /* Technically we could just read the first one, but punish * DT writers for invalid form. */ - if (of_gpio_count(halt_node) != 1) - return -EINVAL; + if (of_gpio_count(child_node) != 1) { + err = -EINVAL; + goto err_put; + } /* Get the gpio number relative to the dynamic base. */ - gpio = of_get_gpio_flags(halt_node, 0, &flags); - if (!gpio_is_valid(gpio)) - return -EINVAL; + gpio = of_get_gpio_flags(child_node, 0, &flags); + if (!gpio_is_valid(gpio)) { + err = -EINVAL; + goto err_put; + } err = gpio_request(gpio, "gpio-halt"); if (err) { printk(KERN_ERR "gpio-halt: error requesting GPIO %d.\n", gpio); - halt_node = NULL; - return err; + goto err_put; } trigger = (flags == OF_GPIO_ACTIVE_LOW); @@ -105,15 +109,14 @@ static int gpio_halt_probe(struct platform_device *pdev) gpio_direction_output(gpio, !trigger); /* Now get the IRQ which tells us when the power button is hit */ - irq = irq_of_parse_and_map(halt_node, 0); + irq = irq_of_parse_and_map(child_node, 0); err = request_irq(irq, gpio_halt_irq, IRQF_TRIGGER_RISING | - IRQF_TRIGGER_FALLING, "gpio-halt", halt_node); + IRQF_TRIGGER_FALLING, "gpio-halt", child_node); if (err) { printk(KERN_ERR "gpio-halt: error requesting IRQ %d for " "GPIO %d.\n", irq, gpio); gpio_free(gpio); - halt_node = NULL; - return err; + goto err_put; } /* Register our halt function */ @@ -123,7 +126,12 @@ static int gpio_halt_probe(struct platform_device *pdev) printk(KERN_INFO "gpio-halt: registered GPIO %d (%d trigger, %d" " irq).\n", gpio, trigger, irq); + halt_node = child_node; return 0; + +err_put: + of_node_put(child_node); + return err; } static int gpio_halt_remove(struct platform_device *pdev) @@ -139,6 +147,7 @@ static int gpio_halt_remove(struct platform_device *pdev) gpio_free(gpio); + of_node_put(halt_node); halt_node = NULL; } diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c index 3725d51248df..ffcfd17a5fa3 100644 --- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -105,6 +105,9 @@ static void __init init_ioports(void) if (dnode == NULL) return; prop = of_find_property(dnode, "ethernet1", &len); + + of_node_put(dnode); + if (prop == NULL) return; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 5185d942b455..0c4eed9aea80 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -33,7 +33,7 @@ config PPC_BOOK3S_32 config PPC_85xx bool "Freescale 85xx" - select E500 + select PPC_E500 config PPC_8xx bool "Freescale 8xx" @@ -107,7 +107,8 @@ config PPC_BOOK3S_64 config PPC_BOOK3E_64 bool "Embedded processors" - select PPC_FSL_BOOK3E + select PPC_E500 + select PPC_E500MC select PPC_FPU # Make it a choice ? select PPC_SMP_MUXED_IPI select PPC_DOORBELL @@ -125,7 +126,7 @@ choice If unsure, select Generic. config GENERIC_CPU - bool "Generic (POWER4 and above)" + bool "Generic (POWER5 and PowerPC 970 and above)" depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN select PPC_64S_HASH_MMU @@ -144,8 +145,8 @@ config CELL_CPU depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN select PPC_64S_HASH_MMU -config POWER5_CPU - bool "POWER5" +config PPC_970_CPU + bool "PowerPC 970 (including PowerPC G5)" depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN select PPC_64S_HASH_MMU @@ -171,13 +172,18 @@ config POWER9_CPU depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER +config POWER10_CPU + bool "POWER10" + depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER + config E5500_CPU bool "Freescale e5500" - depends on PPC64 && E500 + depends on PPC64 && PPC_E500 config E6500_CPU bool "Freescale e6500" - depends on PPC64 && E500 + depends on PPC64 && PPC_E500 config 405_CPU bool "40x family" @@ -234,11 +240,12 @@ config TARGET_CPU string depends on TARGET_CPU_BOOL default "cell" if CELL_CPU - default "power5" if POWER5_CPU + default "970" if PPC_970_CPU default "power6" if POWER6_CPU default "power7" if POWER7_CPU default "power8" if POWER8_CPU default "power9" if POWER9_CPU + default "power10" if POWER10_CPU default "405" if 405_CPU default "440" if 440_CPU default "464" if 464_CPU @@ -255,20 +262,19 @@ config PPC_BOOK3S def_bool y depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 -config PPC_BOOK3E - def_bool y - depends on PPC_BOOK3E_64 - -config E500 +config PPC_E500 select FSL_EMB_PERFMON - select PPC_FSL_BOOK3E bool + select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 + select PPC_SMP_MUXED_IPI + select PPC_DOORBELL + select PPC_KUEP config PPC_E500MC bool "e500mc Support" select PPC_FPU select COMMON_CLK - depends on E500 + depends on PPC_E500 help This must be enabled for running on e500mc (and derivatives such as e5500/e6500), and must be disabled for running on @@ -291,7 +297,7 @@ config PPC_FPU config FSL_EMB_PERFMON bool "Freescale Embedded Perfmon" - depends on E500 || PPC_83xx + depends on PPC_E500 || PPC_83xx help This is the Performance Monitor support found on the e500 core and some e300 cores (c3 and c4). Select this only if your @@ -304,7 +310,7 @@ config FSL_EMB_PERF_EVENT config FSL_EMB_PERF_EVENT_E500 bool - depends on FSL_EMB_PERF_EVENT && E500 + depends on FSL_EMB_PERF_EVENT && PPC_E500 default y config 4xx @@ -314,7 +320,7 @@ config 4xx config BOOKE bool - depends on E500 || 44x || PPC_BOOK3E + depends on PPC_E500 || 44x default y config BOOKE_OR_40x @@ -322,29 +328,14 @@ config BOOKE_OR_40x depends on BOOKE || 40x default y -config FSL_BOOKE - bool - depends on E500 && PPC32 - default y - -# this is for common code between PPC32 & PPC64 FSL BOOKE -config PPC_FSL_BOOK3E - bool - select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 - imply FSL_EMB_PERFMON - select PPC_SMP_MUXED_IPI - select PPC_DOORBELL - select PPC_KUEP - default y if FSL_BOOKE - config PTE_64BIT bool - depends on 44x || E500 || PPC_86xx + depends on 44x || PPC_E500 || PPC_86xx default y if PHYS_64BIT config PHYS_64BIT - bool 'Large physical address support' if E500 || PPC_86xx - depends on (44x || E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx + bool 'Large physical address support' if PPC_E500 || PPC_86xx + depends on (44x || PPC_E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx select PHYS_ADDR_T_64BIT help This option enables kernel support for larger than 32-bit physical @@ -391,7 +382,7 @@ config VSX config SPE_POSSIBLE def_bool y - depends on E500 && !PPC_E500MC + depends on PPC_E500 && !PPC_E500MC config SPE bool "SPE Support" @@ -481,10 +472,6 @@ config PPC_MMU_NOHASH def_bool y depends on !PPC_BOOK3S -config PPC_BOOK3E_MMU - def_bool y - depends on FSL_BOOKE || PPC_BOOK3E - config PPC_HAVE_PMU_SUPPORT bool @@ -506,7 +493,7 @@ config FORCE_SMP select SMP config SMP - depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE || PPC_47x + depends on PPC_BOOK3S || PPC_E500 || PPC_47x select GENERIC_IRQ_MIGRATION bool "Symmetric multi-processing support" if !FORCE_SMP help diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index c0799fb26b6d..40f5ae5e1238 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -431,7 +431,7 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) * The window may be inactive due to lost credit (Ex: core * removal with DLPAR). If the window is active again when * the credit is available, map the new paste address at the - * the window virtual address. + * window virtual address. */ if (txwin->status == VAS_WIN_ACTIVE) { paste_addr = cp_inst->coproc->vops->paste_addr(txwin); diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index 316e533afc00..fb4023f9ea6b 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -182,9 +182,16 @@ static struct device_node *__init cbe_get_be_node(int cpu_id) if (WARN_ON_ONCE(!cpu_handle)) return np; - for (i=0; i<len; i++) - if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL)) + for (i = 0; i < len; i++) { + struct device_node *ch_np = of_find_node_by_phandle(cpu_handle[i]); + struct device_node *ci_np = of_get_cpu_node(cpu_id, NULL); + + of_node_put(ch_np); + of_node_put(ci_np); + + if (ch_np == ci_np) return np; + } } return NULL; @@ -193,21 +200,30 @@ static struct device_node *__init cbe_get_be_node(int cpu_id) static void __init cbe_fill_regs_map(struct cbe_regs_map *map) { if(map->be_node) { - struct device_node *be, *np; + struct device_node *be, *np, *parent_np; be = map->be_node; - for_each_node_by_type(np, "pervasive") - if (of_get_parent(np) == be) + for_each_node_by_type(np, "pervasive") { + parent_np = of_get_parent(np); + if (parent_np == be) map->pmd_regs = of_iomap(np, 0); + of_node_put(parent_np); + } - for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller") - if (of_get_parent(np) == be) + for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller") { + parent_np = of_get_parent(np); + if (parent_np == be) map->iic_regs = of_iomap(np, 2); + of_node_put(parent_np); + } - for_each_node_by_type(np, "mic-tm") - if (of_get_parent(np) == be) + for_each_node_by_type(np, "mic-tm") { + parent_np = of_get_parent(np); + if (parent_np == be) map->mic_tm_regs = of_iomap(np, 0); + of_node_put(parent_np); + } } else { struct device_node *cpu; /* That hack must die die die ! */ @@ -261,7 +277,8 @@ void __init cbe_regs_init(void) of_node_put(cpu); return; } - map->cpu_node = cpu; + of_node_put(map->cpu_node); + map->cpu_node = of_node_get(cpu); for_each_possible_cpu(i) { struct cbe_thread_map *thread = &cbe_thread_map[i]; diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 0ca3efeef293..8c7133039566 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -720,8 +720,10 @@ static int __init cell_iommu_init_disabled(void) cell_disable_iommus(); /* If we have no Axon, we set up the spider DMA magic offset */ - if (of_find_node_by_name(NULL, "axon") == NULL) + np = of_find_node_by_name(NULL, "axon"); + if (!np) cell_dma_nommu_offset = SPIDER_DMA_OFFSET; + of_node_put(np); /* Now we need to check to see where the memory is mapped * in PCI space. We assume that all busses use the same dma diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 52de014983c9..47eaf75349f2 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -167,6 +167,8 @@ static int __init cell_publish_devices(void) of_platform_device_create(np, NULL, NULL); } + of_node_put(root); + /* There is no device for the MIC memory controller, thus we create * a platform device for it to attach the EDAC driver to. */ diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index fe0d8797a00a..e780c14c5733 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -34,15 +34,15 @@ * mbind, mq_open, ipc, ... */ -static void *spu_syscall_table[] = { +static const syscall_fn spu_syscall_table[] = { #define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry) -#define __SYSCALL(nr, entry) [nr] = entry, +#define __SYSCALL(nr, entry) [nr] = (void *) entry, #include <asm/syscall_table_spu.h> }; long spu_sys_callback(struct spu_syscall_block *s) { - long (*syscall)(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6); + syscall_fn syscall; if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) { pr_debug("%s: invalid syscall #%lld", __func__, s->nr_ret); diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index ae09c5a91b40..f1ac4c742069 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -488,6 +488,8 @@ static void __init init_affinity_node(int cbe) avoid_ph = vic_dn->phandle; } + of_node_put(vic_dn); + list_add_tail(&spu->aff_list, &last_spu->aff_list); last_spu = spu; break; diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index af048b6dd30a..84958487f696 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -333,7 +333,6 @@ void spufs_stop_callback(struct spu *spu, int irq); void spufs_mfc_callback(struct spu *spu); void spufs_dma_callback(struct spu *spu, int type); -extern struct spu_coredump_calls spufs_coredump_calls; struct spufs_coredump_reader { char *name; ssize_t (*dump)(struct spu_context *ctx, struct coredump_params *cprm); @@ -341,7 +340,6 @@ struct spufs_coredump_reader { size_t size; }; extern const struct spufs_coredump_reader spufs_coredump_read[]; -extern int spufs_coredump_num_notes; extern int spu_init_csa(struct spu_state *csa); extern void spu_fini_csa(struct spu_state *csa); diff --git a/arch/powerpc/platforms/chrp/chrp.h b/arch/powerpc/platforms/chrp/chrp.h index a5a7c338caf9..6ff4631d9db4 100644 --- a/arch/powerpc/platforms/chrp/chrp.h +++ b/arch/powerpc/platforms/chrp/chrp.h @@ -9,4 +9,3 @@ extern int chrp_set_rtc_time(struct rtc_time *); extern long chrp_time_init(void); extern void chrp_find_bridges(void); -extern void chrp_event_scan(unsigned long); diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index 78f2378d9223..bebc5a972694 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -123,6 +123,8 @@ static void __init holly_init_pci(void) if (np) tsi108_setup_pci(np, HOLLY_PCI_CFG_PHYS, 1); + of_node_put(np); + ppc_md.pci_exclude_device = holly_exclude_device; if (ppc_md.progress) ppc_md.progress("tsi108: resources set", 0x100); @@ -184,6 +186,9 @@ static void __init holly_init_IRQ(void) tsi108_pci_int_init(cascade_node); irq_set_handler_data(cascade_pci_irq, mpic); irq_set_chained_handler(cascade_pci_irq, tsi108_irq_cascade); + + of_node_put(tsi_pci); + of_node_put(cascade_node); #endif /* Configure MPIC outputs to CPU0 */ tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0); @@ -210,6 +215,7 @@ static void __noreturn holly_restart(char *cmd) if (bridge) { prop = of_get_property(bridge, "reg", &size); addr = of_translate_address(bridge, prop); + of_node_put(bridge); } addr += (TSI108_PB_OFFSET + 0x414); diff --git a/arch/powerpc/platforms/embedded6xx/ls_uart.c b/arch/powerpc/platforms/embedded6xx/ls_uart.c index 0133e175a0fc..4ecbc55b37c0 100644 --- a/arch/powerpc/platforms/embedded6xx/ls_uart.c +++ b/arch/powerpc/platforms/embedded6xx/ls_uart.c @@ -124,6 +124,8 @@ static int __init ls_uarts_init(void) avr_clock = *(u32*)of_get_property(avr, "clock-frequency", &len); phys_addr = ((u32*)of_get_property(avr, "reg", &len))[0]; + of_node_put(avr); + if (!avr_clock || !phys_addr) return -EINVAL; diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index 8b2b42210356..ddf0c652af80 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -135,6 +135,9 @@ static void __init mpc7448_hpc2_init_IRQ(void) tsi108_pci_int_init(cascade_node); irq_set_handler_data(cascade_pci_irq, mpic); irq_set_chained_handler(cascade_pci_irq, tsi108_irq_cascade); + + of_node_put(tsi_pci); + of_node_put(cascade_node); #endif /* Configure MPIC outputs to CPU0 */ tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0); diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 9e03ff8f631c..f4e654a9d4ff 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -15,8 +15,6 @@ #include <linux/seq_file.h> #include <linux/of_address.h> #include <linux/of_platform.h> -#include <linux/memblock.h> -#include <mm/mmu_decl.h> #include <asm/io.h> #include <asm/machdep.h> @@ -49,19 +47,6 @@ static void __iomem *hw_ctrl; static void __iomem *hw_gpio; -static int __init page_aligned(unsigned long x) -{ - return !(x & (PAGE_SIZE-1)); -} - -void __init wii_memory_fixups(void) -{ - struct memblock_region *p = memblock.memory.regions; - - BUG_ON(memblock.memory.cnt != 2); - BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base)); -} - static void __noreturn wii_spin(void) { local_irq_disable(); diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c index 823e219ef8ee..91606411d2e0 100644 --- a/arch/powerpc/platforms/maple/time.c +++ b/arch/powerpc/platforms/maple/time.c @@ -153,6 +153,7 @@ time64_t __init maple_get_boot_time(void) maple_rtc_addr); } bail: + of_node_put(rtcs); if (maple_rtc_addr == 0) { maple_rtc_addr = RTC_PORT(0); /* legacy address */ printk(KERN_INFO "Maple: No device node for RTC, assuming " diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c index f859ada29074..9e9a7e46288a 100644 --- a/arch/powerpc/platforms/pasemi/misc.c +++ b/arch/powerpc/platforms/pasemi/misc.c @@ -36,8 +36,7 @@ static int __init find_i2c_driver(struct device_node *node, for (i = 0; i < ARRAY_SIZE(i2c_devices); i++) { if (!of_device_is_compatible(node, i2c_devices[i].of_device)) continue; - if (strlcpy(info->type, i2c_devices[i].i2c_type, - I2C_NAME_SIZE) >= I2C_NAME_SIZE) + if (strscpy(info->type, i2c_devices[i].i2c_type, I2C_NAME_SIZE) < 0) return -ENOMEM; return 0; } diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c index 55f0160910bf..f27d31414737 100644 --- a/arch/powerpc/platforms/pasemi/pci.c +++ b/arch/powerpc/platforms/pasemi/pci.c @@ -270,18 +270,12 @@ static int __init pas_add_bridge(struct device_node *dev) void __init pas_pci_init(void) { - struct device_node *np, *root; + struct device_node *np; int res; - root = of_find_node_by_path("/"); - if (!root) { - pr_crit("pas_pci_init: can't find root of device tree\n"); - return; - } - pci_set_flags(PCI_SCAN_ALL_PCIE_DEVS); - np = of_find_compatible_node(root, NULL, "pasemi,rootbus"); + np = of_find_compatible_node(of_root, NULL, "pasemi,rootbus"); if (np) { res = pas_add_bridge(np); of_node_put(np); diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 5cc958adba13..0382d20b5619 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -2632,31 +2632,31 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ if (!macio_chips[i].of_node) break; if (macio_chips[i].of_node == node) - return; + goto out_put; } if (i >= MAX_MACIO_CHIPS) { printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n"); printk(KERN_ERR "pmac_feature: %pOF skipped\n", node); - return; + goto out_put; } addrp = of_get_pci_address(node, 0, &size, NULL); if (addrp == NULL) { printk(KERN_ERR "pmac_feature: %pOF: can't find base !\n", node); - return; + goto out_put; } addr = of_translate_address(node, addrp); if (addr == 0) { printk(KERN_ERR "pmac_feature: %pOF, can't translate base !\n", node); - return; + goto out_put; } base = ioremap(addr, (unsigned long)size); if (!base) { printk(KERN_ERR "pmac_feature: %pOF, can't map mac-io chip !\n", node); - return; + goto out_put; } if (type == macio_keylargo || type == macio_keylargo2) { const u32 *did = of_get_property(node, "device-id", NULL); @@ -2677,6 +2677,11 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ macio_chips[i].rev = *revp; printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n", macio_names[type], macio_chips[i].rev, macio_chips[i].base); + + return; + +out_put: + of_node_put(node); } static int __init diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index c1c430c66dc9..40f3aa432fba 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -627,6 +627,7 @@ static void __init kw_i2c_probe(void) if (parent == NULL) continue; chans = parent->name[0] == 'u' ? 2 : 1; + of_node_put(parent); for (i = 0; i < chans; i++) kw_i2c_add(host, np, np, i); } else { diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index 9c2947a3edd5..085e0ad20eba 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -136,6 +136,8 @@ static void __init macio_gpio_init_one(struct macio_chip *macio) for_each_child_of_node(gparent, gp) pmf_do_functions(gp, NULL, 0, PMF_FLAGS_ON_INIT, NULL); + of_node_put(gparent); + /* Note: We do not at this point implement the "at sleep" or "at wake" * functions. I yet to find any for GPIOs anyway */ diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index d9df45741ece..5b26a9012d2e 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -711,6 +711,7 @@ static void __init smp_core99_setup(int ncpus) printk(KERN_INFO "Processor timebase sync using" " platform function\n"); } + of_node_put(cpus); } #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c index 734df5a32f99..1b7c39e841ee 100644 --- a/arch/powerpc/platforms/powermac/udbg_scc.c +++ b/arch/powerpc/platforms/powermac/udbg_scc.c @@ -81,10 +81,14 @@ void __init udbg_scc_init(int force_scc) if (path != NULL) stdout = of_find_node_by_path(path); for_each_child_of_node(escc, ch) { - if (ch == stdout) + if (ch == stdout) { + of_node_put(ch_def); ch_def = of_node_get(ch); - if (of_node_name_eq(ch, "ch-a")) + } + if (of_node_name_eq(ch, "ch-a")) { + of_node_put(ch_a); ch_a = of_node_get(ch); + } } if (ch_def == NULL && !force_scc) goto bail; diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 6f94b808dd39..841cb7f31f4f 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -1411,7 +1411,7 @@ static int __init pnv_parse_cpuidle_dt(void) goto out; } for (i = 0; i < nr_idle_states; i++) - strlcpy(pnv_idle_states[i].name, temp_string[i], + strscpy(pnv_idle_states[i].name, temp_string[i], PNV_IDLE_NAME_LEN); nr_pnv_idle_states = nr_idle_states; rc = 0; @@ -1419,6 +1419,7 @@ out: kfree(temp_u32); kfree(temp_u64); kfree(temp_string); + of_node_put(np); return rc; } diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index 27c936075031..629067781cec 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -478,10 +478,8 @@ EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release); int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) { struct spa_data *data = (struct spa_data *) platform_data; - int rc; - rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle); - return rc; + return opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle); } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index adcb1a1a2bfe..bb7657115f1d 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -348,6 +348,8 @@ static int __init create_opalcore(void) if (!dn || ret) pr_warn("WARNING: Failed to read OPAL base & entry values\n"); + of_node_put(dn); + /* Use count to keep track of the program headers */ count = 0; diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c index 64506b46e77b..7bfe4cbeb35a 100644 --- a/arch/powerpc/platforms/powernv/opal-powercap.c +++ b/arch/powerpc/platforms/powernv/opal-powercap.c @@ -153,7 +153,7 @@ void __init opal_powercap_init(void) pcaps = kcalloc(of_get_child_count(powercap), sizeof(*pcaps), GFP_KERNEL); if (!pcaps) - return; + goto out_put_powercap; powercap_kobj = kobject_create_and_add("powercap", opal_kobj); if (!powercap_kobj) { @@ -226,6 +226,7 @@ void __init opal_powercap_init(void) } i++; } + of_node_put(powercap); return; @@ -236,6 +237,9 @@ out_pcaps_pattrs: kfree(pcaps[i].pg.name); } kobject_put(powercap_kobj); + of_node_put(node); out_pcaps: kfree(pcaps); +out_put_powercap: + of_node_put(powercap); } diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c index 69d7e75950d1..6441e17b6996 100644 --- a/arch/powerpc/platforms/powernv/opal-psr.c +++ b/arch/powerpc/platforms/powernv/opal-psr.c @@ -135,7 +135,7 @@ void __init opal_psr_init(void) psr_attrs = kcalloc(of_get_child_count(psr), sizeof(*psr_attrs), GFP_KERNEL); if (!psr_attrs) - return; + goto out_put_psr; psr_kobj = kobject_create_and_add("psr", opal_kobj); if (!psr_kobj) { @@ -162,10 +162,14 @@ void __init opal_psr_init(void) } i++; } + of_node_put(psr); return; out_kobj: + of_node_put(node); kobject_put(psr_kobj); out: kfree(psr_attrs); +out_put_psr: + of_node_put(psr); } diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c index 8fba7d25ae56..9944376b115c 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -170,7 +170,7 @@ void __init opal_sensor_groups_init(void) sgs = kcalloc(of_get_child_count(sg), sizeof(*sgs), GFP_KERNEL); if (!sgs) - return; + goto out_sg_put; sg_kobj = kobject_create_and_add("sensor_groups", opal_kobj); if (!sg_kobj) { @@ -222,6 +222,7 @@ void __init opal_sensor_groups_init(void) } i++; } + of_node_put(sg); return; @@ -231,6 +232,9 @@ out_sgs_sgattrs: kfree(sgs[i].sg.attrs); } kobject_put(sg_kobj); + of_node_put(node); out_sgs: kfree(sgs); +out_sg_put: + of_node_put(sg); } diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index e5acc33b3b20..0ed95f753416 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -57,7 +57,7 @@ opal_return: .long 0xa64b7b7d /* mthsrr1 r11 */ .long 0x2402004c /* hrfid */ #endif - ld r2,PACATOC(r13) + LOAD_PACA_TOC() ld r0,PPC_LR_STKOFF(r1) mtlr r0 blr diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 55a8fbfdb5b2..cdf3838f08d3 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -892,6 +892,7 @@ static void opal_export_attrs(void) kobj = kobject_create_and_add("exports", opal_kobj); if (!kobj) { pr_warn("kobject_create_and_add() of exports failed\n"); + of_node_put(np); return; } @@ -952,6 +953,8 @@ static void __init opal_imc_init_dev(void) np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT); if (np) of_platform_device_create(np, NULL, NULL); + + of_node_put(np); } static int kopald(void *unused) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9de9b2fb163d..5c144c05cbfd 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -67,7 +67,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, vaf.va = &args; if (pe->flags & PNV_IODA_PE_DEV) - strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); + strscpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) sprintf(pfix, "%04x:%02x ", pci_domain_nr(pe->pbus), pe->pbus->number); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index dac545aa0308..61ab2d38ff4b 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -17,6 +17,7 @@ #include <linux/console.h> #include <linux/delay.h> #include <linux/irq.h> +#include <linux/seq_buf.h> #include <linux/seq_file.h> #include <linux/of.h> #include <linux/of_fdt.h> @@ -207,8 +208,29 @@ static void __init pnv_setup_arch(void) pnv_rng_init(); } +static void __init pnv_add_hw_description(void) +{ + struct device_node *dn; + const char *s; + + dn = of_find_node_by_path("/ibm,opal/firmware"); + if (!dn) + return; + + if (of_property_read_string(dn, "version", &s) == 0 || + of_property_read_string(dn, "git-id", &s) == 0) + seq_buf_printf(&ppc_hw_desc, "opal:%s ", s); + + if (of_property_read_string(dn, "mi-version", &s) == 0) + seq_buf_printf(&ppc_hw_desc, "mi:%s ", s); + + of_node_put(dn); +} + static void __init pnv_init(void) { + pnv_add_hw_description(); + /* * Initialize the LPC bus now so that legacy serial * ports can be found on it diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index fb6499977f99..a3b4d99567cb 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -23,13 +23,21 @@ config PPC_PSERIES select SWIOTLB default y +config PARAVIRT + bool + config PARAVIRT_SPINLOCKS bool +config PARAVIRT_TIME_ACCOUNTING + select PARAVIRT + bool + config PPC_SPLPAR bool "Support for shared-processor logical partitions" depends on PPC_PSERIES select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS + select PARAVIRT_TIME_ACCOUNTING if VIRT_CPU_ACCOUNTING_GEN default y help Enabling this option will make the kernel run more efficiently diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 14e143b946a3..92310202bdd7 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -7,7 +7,7 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o rng.o \ pci.o pci_dlpar.o eeh_pseries.o msi.o \ - papr_platform_attributes.o + papr_platform_attributes.o dtl.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KEXEC_CORE) += kexec.o obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o @@ -19,7 +19,6 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_HVCS) += hvcserver.o obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o obj-$(CONFIG_CMM) += cmm.o -obj-$(CONFIG_DTL) += dtl.o obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_IBMVIO) += vio.o diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 352af5b14a0f..3f1cdccebc9c 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -18,6 +18,7 @@ #include <asm/plpar_wrappers.h> #include <asm/machdep.h> +#ifdef CONFIG_DTL struct dtl { struct dtl_entry *buf; int cpu; @@ -37,6 +38,15 @@ static u8 dtl_event_mask = DTL_LOG_ALL; static int dtl_buf_entries = N_DISPATCH_LOG; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + +/* + * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls + * reading from the dispatch trace log. If other code wants to consume + * DTL entries, it can set this pointer to a function that will get + * called once for each DTL entry that gets processed. + */ +static void (*dtl_consumer)(struct dtl_entry *entry, u64 index); + struct dtl_ring { u64 write_index; struct dtl_entry *write_ptr; @@ -355,3 +365,81 @@ static int dtl_init(void) return 0; } machine_arch_initcall(pseries, dtl_init); +#endif /* CONFIG_DTL */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +/* + * Scan the dispatch trace log and count up the stolen time. + * Should be called with interrupts disabled. + */ +static notrace u64 scan_dispatch_log(u64 stop_tb) +{ + u64 i = local_paca->dtl_ridx; + struct dtl_entry *dtl = local_paca->dtl_curr; + struct dtl_entry *dtl_end = local_paca->dispatch_log_end; + struct lppaca *vpa = local_paca->lppaca_ptr; + u64 tb_delta; + u64 stolen = 0; + u64 dtb; + + if (!dtl) + return 0; + + if (i == be64_to_cpu(vpa->dtl_idx)) + return 0; + while (i < be64_to_cpu(vpa->dtl_idx)) { + dtb = be64_to_cpu(dtl->timebase); + tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + + be32_to_cpu(dtl->ready_to_enqueue_time); + barrier(); + if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { + /* buffer has overflowed */ + i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; + dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); + continue; + } + if (dtb > stop_tb) + break; +#ifdef CONFIG_DTL + if (dtl_consumer) + dtl_consumer(dtl, i); +#endif + stolen += tb_delta; + ++i; + ++dtl; + if (dtl == dtl_end) + dtl = local_paca->dispatch_log; + } + local_paca->dtl_ridx = i; + local_paca->dtl_curr = dtl; + return stolen; +} + +/* + * Accumulate stolen time by scanning the dispatch trace log. + * Called on entry from user mode. + */ +void notrace pseries_accumulate_stolen_time(void) +{ + u64 sst, ust; + struct cpu_accounting_data *acct = &local_paca->accounting; + + sst = scan_dispatch_log(acct->starttime_user); + ust = scan_dispatch_log(acct->starttime); + acct->stime -= sst; + acct->utime -= ust; + acct->steal_time += ust + sst; +} + +u64 pseries_calculate_stolen_time(u64 stop_tb) +{ + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return 0; + + if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) + return scan_dispatch_log(stop_tb); + + return 0; +} + +#endif diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 0f8cd8b06432..e0a7ac5db15d 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -619,17 +619,21 @@ static ssize_t dlpar_cpu_add(u32 drc_index) static unsigned int pseries_cpuhp_cache_use_count(const struct device_node *cachedn) { unsigned int use_count = 0; - struct device_node *dn; + struct device_node *dn, *tn; WARN_ON(!of_node_is_type(cachedn, "cache")); for_each_of_cpu_node(dn) { - if (of_find_next_cache_node(dn) == cachedn) + tn = of_find_next_cache_node(dn); + of_node_put(tn); + if (tn == cachedn) use_count++; } for_each_node_by_type(dn, "cache") { - if (of_find_next_cache_node(dn) == cachedn) + tn = of_find_next_cache_node(dn); + of_node_put(tn); + if (tn == cachedn) use_count++; } @@ -649,10 +653,13 @@ static int pseries_cpuhp_detach_nodes(struct device_node *cpudn) dn = cpudn; while ((dn = of_find_next_cache_node(dn))) { - if (pseries_cpuhp_cache_use_count(dn) > 1) + if (pseries_cpuhp_cache_use_count(dn) > 1) { + of_node_put(dn); break; + } ret = of_changeset_detach_node(&cs, dn); + of_node_put(dn); if (ret) goto out; } diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index ab9fc6506861..762eb15d3bd4 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -16,7 +16,7 @@ #ifdef CONFIG_TRACEPOINTS #ifndef CONFIG_JUMP_LABEL - .section ".toc","aw" + .data .globl hcall_tracepoint_refcount hcall_tracepoint_refcount: @@ -88,7 +88,7 @@ hcall_tracepoint_refcount: BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r12,hcall_tracepoint_refcount@toc(r2); \ + LOAD_REG_ADDR(r12, hcall_tracepoint_refcount) ; \ std r12,32(r1); \ cmpdi r12,0; \ bne- LABEL; \ diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c index 96e18d3b2fcf..d48c9c7ce10f 100644 --- a/arch/powerpc/platforms/pseries/hvcserver.c +++ b/arch/powerpc/platforms/pseries/hvcserver.c @@ -176,7 +176,7 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head, = (unsigned int)last_p_partition_ID; /* copy the Null-term char too */ - strlcpy(&next_partner_info->location_code[0], + strscpy(&next_partner_info->location_code[0], (char *)&pi_buff[2], sizeof(next_partner_info->location_code)); diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 7ee3ed7d6cc2..a870cada7acd 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -152,7 +152,11 @@ static const struct dma_map_ops ibmebus_dma_ops = { static int ibmebus_match_path(struct device *dev, const void *data) { struct device_node *dn = to_platform_device(dev)->dev.of_node; - return (of_find_node_by_path(data) == dn); + struct device_node *tn = of_find_node_by_path(data); + + of_node_put(tn); + + return (tn == dn); } static int ibmebus_match_node(struct device *dev, const void *data) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index e6c117fb6491..97ef6499e501 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -660,6 +660,17 @@ static int __init vcpudispatch_stats_procfs_init(void) } machine_device_initcall(pseries, vcpudispatch_stats_procfs_init); + +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING +u64 pseries_paravirt_steal_clock(int cpu) +{ + struct lppaca *lppaca = &lppaca_of(cpu); + + return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + + be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)); +} +#endif + #endif /* CONFIG_PPC_SPLPAR */ void vpa_init(int cpu) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 3d36a8955eaf..634fac5db3f9 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -216,7 +216,7 @@ static int update_dt_node(struct device_node *dn, s32 scope) nprops = be32_to_cpu(upwa->nprops); /* On the first call to ibm,update-properties for a node the - * the first property value descriptor contains an empty + * first property value descriptor contains an empty * property name, the property value length encoded as u32, * and the property value is the node path being updated. */ @@ -740,11 +740,19 @@ static int pseries_migrate_partition(u64 handle) #ifdef CONFIG_PPC_WATCHDOG factor = nmi_wd_lpm_factor; #endif + /* + * When the migration is initiated, the hypervisor changes VAS + * mappings to prepare before OS gets the notification and + * closes all VAS windows. NX generates continuous faults during + * this time and the user space can not differentiate these + * faults from the migration event. So reduce this time window + * by closing VAS windows at the beginning of this function. + */ + vas_migration_handler(VAS_SUSPEND); + ret = wait_for_vasi_session_suspending(handle); if (ret) - return ret; - - vas_migration_handler(VAS_SUSPEND); + goto out; if (factor) watchdog_nmi_set_timeout_pct(factor); @@ -765,6 +773,7 @@ static int pseries_migrate_partition(u64 handle) if (factor) watchdog_nmi_set_timeout_pct(0); +out: vas_migration_handler(VAS_RESUME); return ret; diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index cad7a0c93117..599bd2c78514 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/notifier.h> #include <linux/proc_fs.h> +#include <linux/security.h> #include <linux/slab.h> #include <linux/of.h> @@ -361,6 +362,10 @@ static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t coun char *kbuf; char *tmp; + rv = security_locked_down(LOCKDOWN_DEVICE_TREE); + if (rv) + return rv; + kbuf = memdup_user_nul(buf, count); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 489f4c4df468..8ef3270515a9 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -41,6 +41,7 @@ #include <linux/of_pci.h> #include <linux/memblock.h> #include <linux/swiotlb.h> +#include <linux/seq_buf.h> #include <asm/mmu.h> #include <asm/processor.h> @@ -80,6 +81,20 @@ DEFINE_STATIC_KEY_FALSE(shared_processor); EXPORT_SYMBOL(shared_processor); +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +static bool steal_acc = true; +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} + +early_param("no-steal-acc", parse_no_stealacc); +#endif + int CMO_PrPSP = -1; int CMO_SecPSP = -1; unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); @@ -834,6 +849,11 @@ static void __init pSeries_setup_arch(void) if (lppaca_shared_proc(get_lppaca())) { static_branch_enable(&shared_processor); pv_spinlocks_init(); +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + static_key_slow_inc(¶virt_steal_enabled); + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); +#endif } ppc_md.power_save = pseries_lpar_idle; @@ -992,6 +1012,33 @@ static void __init pSeries_cmo_feature_init(void) pr_debug(" <- fw_cmo_feature_init()\n"); } +static void __init pseries_add_hw_description(void) +{ + struct device_node *dn; + const char *s; + + dn = of_find_node_by_path("/openprom"); + if (dn) { + if (of_property_read_string(dn, "model", &s) == 0) + seq_buf_printf(&ppc_hw_desc, "of:%s ", s); + + of_node_put(dn); + } + + dn = of_find_node_by_path("/hypervisor"); + if (dn) { + if (of_property_read_string(dn, "compatible", &s) == 0) + seq_buf_printf(&ppc_hw_desc, "hv:%s ", s); + + of_node_put(dn); + return; + } + + if (of_property_read_bool(of_root, "ibm,powervm-partition") || + of_property_read_bool(of_root, "ibm,fw-net-version")) + seq_buf_printf(&ppc_hw_desc, "hv:phyp "); +} + /* * Early initialization. Relocation is on but do not reference unbolted pages */ @@ -999,6 +1046,8 @@ static void __init pseries_init(void) { pr_debug(" -> pseries_init()\n"); + pseries_add_hw_description(); + #ifdef CONFIG_HVC_CONSOLE if (firmware_has_feature(FW_FEATURE_LPAR)) hvc_vio_init_early(); diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 7e6e6dd2e33e..0e0524cbe20c 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -333,7 +333,7 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * So no unpacking needs to be done. */ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, - VPHN_FLAG_VCPU, smp_processor_id()); + VPHN_FLAG_VCPU, hard_smp_processor_id()); if (rc != H_SUCCESS) { pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); goto out; @@ -501,14 +501,10 @@ static const struct vas_user_win_ops vops_pseries = { int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, const char *name) { - int rc; - if (!copypaste_feat) return -ENOTSUPP; - rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); - - return rc; + return vas_register_coproc_api(mod, cop_type, name, &vops_pseries); } EXPORT_SYMBOL_GPL(vas_register_api_pseries); diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index ef9a5999fa93..73c2d70706c0 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -209,8 +209,10 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) dev_err(&pdev->dev, "node %pOF has an invalid fsl,msi phandle %u\n", hose->dn, np->phandle); + of_node_put(np); return -EINVAL; } + of_node_put(np); } msi_for_each_desc(entry, &pdev->dev, MSI_DESC_NOTASSOCIATED) { diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index af6c8ca824d3..974d3db6faab 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -181,6 +181,7 @@ static int setup_one_atmu(struct ccsr_pci __iomem *pci, static bool is_kdump(void) { struct device_node *node; + bool ret; node = of_find_node_by_type(NULL, "memory"); if (!node) { @@ -188,7 +189,10 @@ static bool is_kdump(void) return false; } - return of_property_read_bool(node, "linux,usable-memory"); + ret = of_property_read_bool(node, "linux,usable-memory"); + of_node_put(node); + + return ret; } /* atmu setup for fsl pci/pcie controller */ @@ -939,7 +943,7 @@ u64 fsl_pci_immrbar_base(struct pci_controller *hose) return 0; } -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 static int mcheck_handle_load(struct pt_regs *regs, u32 inst) { unsigned int rd, ra, rb, d; @@ -1142,7 +1146,6 @@ void __init fsl_pci_assign_primary(void) for_each_matching_node(np, pci_ids) { if (of_device_is_available(np)) { fsl_pci_primary = np; - of_node_put(np); return; } } diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 4647c6074f3b..c8f044d62fe2 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -98,7 +98,7 @@ resource_size_t rio_law_start; struct fsl_rio_dbell *dbell; struct fsl_rio_pw *pw; -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 int fsl_rio_mcheck_exception(struct pt_regs *regs) { const struct exception_table_entry *entry; diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index 698fefaaa6dd..a439e33eae06 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -121,6 +121,7 @@ static unsigned int mpic_msgr_number_of_blocks(void) count += 1; } + of_node_put(aliases); } return count; @@ -144,12 +145,18 @@ static int mpic_msgr_block_number(struct device_node *node) for (index = 0; index < number_of_blocks; ++index) { struct property *prop; + struct device_node *tn; snprintf(buf, sizeof(buf), "mpic-msgr-block%d", index); prop = of_find_property(aliases, buf, NULL); - if (node == of_find_node_by_path(prop->value)) + tn = of_find_node_by_path(prop->value); + if (node == tn) { + of_node_put(tn); break; + } + of_node_put(tn); } + of_node_put(aliases); return index == number_of_blocks ? -1 : index; } diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index 9e7007f9aca5..f8320f8e5bc7 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -36,8 +36,8 @@ static void ics_rtas_unmask_irq(struct irq_data *d) server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0); - call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq, - server, DEFAULT_PRIORITY); + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server, + DEFAULT_PRIORITY); if (call_status != 0) { printk(KERN_ERR "%s: ibm_set_xive irq %u server %x returned %d\n", @@ -46,7 +46,7 @@ static void ics_rtas_unmask_irq(struct irq_data *d) } /* Now unmask the interrupt (often a no-op) */ - call_status = rtas_call_reentrant(ibm_int_on, 1, 1, NULL, hw_irq); + call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq); if (call_status != 0) { printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n", __func__, hw_irq, call_status); @@ -68,7 +68,7 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq) if (hw_irq == XICS_IPI) return; - call_status = rtas_call_reentrant(ibm_int_off, 1, 1, NULL, hw_irq); + call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq); if (call_status != 0) { printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n", __func__, hw_irq, call_status); @@ -76,8 +76,8 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq) } /* Have to set XIVE to 0xff to be able to remove a slot */ - call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq, - xics_default_server, 0xff); + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, + xics_default_server, 0xff); if (call_status != 0) { printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n", __func__, hw_irq, call_status); @@ -108,7 +108,7 @@ static int ics_rtas_set_affinity(struct irq_data *d, if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) return -1; - status = rtas_call_reentrant(ibm_get_xive, 1, 3, xics_status, hw_irq); + status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq); if (status) { printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", @@ -126,8 +126,8 @@ static int ics_rtas_set_affinity(struct irq_data *d, pr_debug("%s: irq %d [hw 0x%x] server: 0x%x\n", __func__, d->irq, hw_irq, irq_server); - status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, - hw_irq, irq_server, xics_status[1]); + status = rtas_call(ibm_set_xive, 3, 1, NULL, + hw_irq, irq_server, xics_status[1]); if (status) { printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", @@ -158,7 +158,7 @@ static int ics_rtas_check(struct ics *ics, unsigned int hw_irq) return -EINVAL; /* Check if RTAS knows about this interrupt */ - rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, hw_irq); + rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq); if (rc) return -ENXIO; @@ -174,7 +174,7 @@ static long ics_rtas_get_server(struct ics *ics, unsigned long vec) { int rc, status[2]; - rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, vec); + rc = rtas_call(ibm_get_xive, 1, 3, status, vec); if (rc) return -1; return status[0]; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 61b9f98dfd4a..a289cb97c1d7 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -783,7 +783,7 @@ static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type) * the corresponding descriptor bits mind you but those will in turn * affect the resend function when re-enabling an edge interrupt. * - * Set set the default to edge as explained in map(). + * Set the default to edge as explained in map(). */ if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE) flow_type = IRQ_TYPE_EDGE_RISING; diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index d25d8c692909..3925825954bc 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -579,12 +579,12 @@ bool __init xive_native_init(void) /* Resource 1 is HV window */ if (of_address_to_resource(np, 1, &r)) { pr_err("Failed to get thread mgmnt area resource\n"); - return false; + goto err_put; } tima = ioremap(r.start, resource_size(&r)); if (!tima) { pr_err("Failed to map thread mgmnt area\n"); - return false; + goto err_put; } /* Read number of priorities */ @@ -612,7 +612,7 @@ bool __init xive_native_init(void) /* Resource 2 is OS window */ if (of_address_to_resource(np, 2, &r)) { pr_err("Failed to get thread mgmnt area resource\n"); - return false; + goto err_put; } xive_tima_os = r.start; @@ -624,7 +624,7 @@ bool __init xive_native_init(void) rc = opal_xive_reset(OPAL_XIVE_MODE_EXPL); if (rc) { pr_err("Switch to exploitation mode failed with error %lld\n", rc); - return false; + goto err_put; } /* Setup some dummy HV pool VPs */ @@ -634,10 +634,15 @@ bool __init xive_native_init(void) if (!xive_core_init(np, &xive_native_ops, tima, TM_QW3_HV_PHYS, max_prio)) { opal_xive_reset(OPAL_XIVE_MODE_EMU); - return false; + goto err_put; } + of_node_put(np); pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); return true; + +err_put: + of_node_put(np); + return false; } static bool xive_native_provision_pages(void) diff --git a/arch/powerpc/xmon/ppc.h b/arch/powerpc/xmon/ppc.h index d00f33dcf192..1d98b8dd134e 100644 --- a/arch/powerpc/xmon/ppc.h +++ b/arch/powerpc/xmon/ppc.h @@ -435,8 +435,6 @@ struct powerpc_macro extern const struct powerpc_macro powerpc_macros[]; extern const int powerpc_num_macros; -extern ppc_cpu_t ppc_parse_cpu (ppc_cpu_t, ppc_cpu_t *, const char *); - static inline long ppc_optional_operand_value (const struct powerpc_operand *operand) { diff --git a/arch/powerpc/xmon/spr_access.S b/arch/powerpc/xmon/spr_access.S index 720a52afdd58..c308ddf268fb 100644 --- a/arch/powerpc/xmon/spr_access.S +++ b/arch/powerpc/xmon/spr_access.S @@ -4,12 +4,12 @@ /* unsigned long xmon_mfspr(sprn, default_value) */ _GLOBAL(xmon_mfspr) - PPC_LL r5, .Lmfspr_table@got(r2) + LOAD_REG_ADDR(r5, .Lmfspr_table) b xmon_mxspr /* void xmon_mtspr(sprn, new_value) */ _GLOBAL(xmon_mtspr) - PPC_LL r5, .Lmtspr_table@got(r2) + LOAD_REG_ADDR(r5, .Lmtspr_table) b xmon_mxspr /* diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 26ef3388c24c..f51c882bf902 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -195,7 +195,7 @@ static int do_spu_cmd(void); #ifdef CONFIG_44x static void dump_tlb_44x(void); #endif -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 static void dump_tlb_book3e(void); #endif @@ -288,11 +288,11 @@ Commands:\n\ t print backtrace\n\ x exit monitor and recover\n\ X exit monitor and don't recover\n" -#if defined(CONFIG_PPC64) && !defined(CONFIG_PPC_BOOK3E) +#if defined(CONFIG_PPC_BOOK3S_64) " u dump segment table or SLB\n" #elif defined(CONFIG_PPC_BOOK3S_32) " u dump segment registers\n" -#elif defined(CONFIG_44x) || defined(CONFIG_PPC_BOOK3E) +#elif defined(CONFIG_44x) || defined(CONFIG_PPC_BOOK3E_64) " u dump TLB\n" #endif " U show uptime information\n" @@ -1166,7 +1166,7 @@ cmds(struct pt_regs *excp) case 'u': dump_tlb_44x(); break; -#elif defined(CONFIG_PPC_BOOK3E) +#elif defined(CONFIG_PPC_BOOK3E_64) case 'u': dump_tlb_book3e(); break; @@ -2686,7 +2686,7 @@ static void dump_one_paca(int cpu) DUMP(p, rfi_flush_fallback_area, "%-*px"); #endif DUMP(p, dscr_default, "%#-*llx"); -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 DUMP(p, pgd, "%-*px"); DUMP(p, kernel_pgd, "%-*px"); DUMP(p, tcd_ptr, "%-*px"); @@ -2701,7 +2701,7 @@ static void dump_one_paca(int cpu) DUMP(p, canary, "%#-*lx"); #endif DUMP(p, saved_r1, "%#-*llx"); -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 DUMP(p, trap_save, "%#-*x"); #endif DUMP(p, irq_soft_mask, "%#-*x"); @@ -3823,7 +3823,7 @@ static void dump_tlb_44x(void) } #endif /* CONFIG_44x */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 static void dump_tlb_book3e(void) { u32 mmucfg, pidmask, lpidmask; @@ -3965,7 +3965,7 @@ static void dump_tlb_book3e(void) } } } -#endif /* CONFIG_PPC_BOOK3E */ +#endif /* CONFIG_PPC_BOOK3E_64 */ static void xmon_init(int enable) { diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 59d18881f35b..6b48a3ae9843 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -45,6 +45,7 @@ config RISCV select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_GENERAL_HUGETLB select ARCH_WANT_HUGE_PMD_SHARE if 64BIT + select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU select BUILDTIME_TABLE_SORT if MMU select CLONE_BACKWARDS @@ -52,7 +53,7 @@ config RISCV select COMMON_CLK select CPU_PM if CPU_IDLE select EDAC_SUPPORT - select GENERIC_ARCH_TOPOLOGY if SMP + select GENERIC_ARCH_TOPOLOGY select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_EARLY_IOREMAP @@ -69,6 +70,7 @@ config RISCV select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL if MMU && 64BIT select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO + select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL @@ -103,6 +105,7 @@ config RISCV select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_STACKPROTECTOR @@ -227,6 +230,9 @@ config RISCV_DMA_NONCOHERENT select ARCH_HAS_SETUP_DMA_OPS select DMA_DIRECT_REMAP +config AS_HAS_INSN + def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero) + source "arch/riscv/Kconfig.socs" source "arch/riscv/Kconfig.erratas" @@ -309,10 +315,13 @@ config SMP If you don't know what to do here, say N. config NR_CPUS - int "Maximum number of CPUs (2-32)" - range 2 32 + int "Maximum number of CPUs (2-512)" depends on SMP - default "8" + range 2 512 if !SBI_V01 + range 2 32 if SBI_V01 && 32BIT + range 2 64 if SBI_V01 && 64BIT + default "32" if 32BIT + default "64" if 64BIT config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 3fa8ef336822..1c8ec656e916 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -37,6 +37,7 @@ else endif ifeq ($(CONFIG_LD_IS_LLD),y) +ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 150000; echo $$?),0) KBUILD_CFLAGS += -mno-relax KBUILD_AFLAGS += -mno-relax ifndef CONFIG_AS_IS_LLVM @@ -44,6 +45,7 @@ ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS += -Wa,-mno-relax endif endif +endif # ISA string setting riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima @@ -110,8 +112,6 @@ else KBUILD_IMAGE := $(boot)/Image.gz endif -head-y := arch/riscv/kernel/head.o - libs-y += arch/riscv/lib/ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a @@ -136,10 +136,14 @@ ifneq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy) KBUILD_IMAGE := $(boot)/loader.bin else +ifeq ($(CONFIG_EFI_ZBOOT),) KBUILD_IMAGE := $(boot)/Image.gz +else +KBUILD_IMAGE := $(boot)/vmlinuz.efi +endif endif endif -BOOT_TARGETS := Image Image.gz loader loader.bin xipImage +BOOT_TARGETS := Image Image.gz loader loader.bin xipImage vmlinuz.efi all: $(notdir $(KBUILD_IMAGE)) diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore index 0cea9f7fa9d5..e1bc507e8cb2 100644 --- a/arch/riscv/boot/.gitignore +++ b/arch/riscv/boot/.gitignore @@ -4,4 +4,5 @@ Image.* loader loader.lds loader.bin +vmlinuz* xipImage diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index becd0621071c..d1a49adcb1d7 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -58,3 +58,9 @@ $(obj)/Image.lzo: $(obj)/Image FORCE $(obj)/loader.bin: $(obj)/loader FORCE $(call if_changed,objcopy) + +EFI_ZBOOT_PAYLOAD := Image +EFI_ZBOOT_BFD_TARGET := elf$(BITS)-littleriscv +EFI_ZBOOT_MACH_TYPE := RISCV$(BITS) + +include $(srctree)/drivers/firmware/efi/libstub/Makefile.zboot diff --git a/arch/riscv/boot/dts/microchip/Makefile b/arch/riscv/boot/dts/microchip/Makefile index 39aae7b04f1c..7427a20934f3 100644 --- a/arch/riscv/boot/dts/microchip/Makefile +++ b/arch/riscv/boot/dts/microchip/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += mpfs-icicle-kit.dtb +dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += mpfs-m100pfsevp.dtb dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += mpfs-polarberry.dtb +dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += mpfs-sev-kit.dtb obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi index 0d28858b83f2..24b1cfb9a73e 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi @@ -2,20 +2,21 @@ /* Copyright (c) 2020-2021 Microchip Technology Inc */ / { - compatible = "microchip,mpfs-icicle-reference-rtlv2203", "microchip,mpfs"; + compatible = "microchip,mpfs-icicle-reference-rtlv2210", "microchip,mpfs-icicle-kit", + "microchip,mpfs"; - core_pwm0: pwm@41000000 { + core_pwm0: pwm@40000000 { compatible = "microchip,corepwm-rtl-v4"; - reg = <0x0 0x41000000 0x0 0xF0>; + reg = <0x0 0x40000000 0x0 0xF0>; microchip,sync-update-mask = /bits/ 32 <0>; #pwm-cells = <2>; clocks = <&fabric_clk3>; status = "disabled"; }; - i2c2: i2c@44000000 { + i2c2: i2c@40000200 { compatible = "microchip,corei2c-rtl-v7"; - reg = <0x0 0x44000000 0x0 0x1000>; + reg = <0x0 0x40000200 0x0 0x100>; #address-cells = <1>; #size-cells = <0>; clocks = <&fabric_clk3>; @@ -28,7 +29,7 @@ fabric_clk3: fabric-clk3 { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <62500000>; + clock-frequency = <50000000>; }; fabric_clk1: fabric-clk1 { @@ -36,4 +37,34 @@ #clock-cells = <0>; clock-frequency = <125000000>; }; + + pcie: pcie@3000000000 { + compatible = "microchip,pcie-host-1.0"; + #address-cells = <0x3>; + #interrupt-cells = <0x1>; + #size-cells = <0x2>; + device_type = "pci"; + reg = <0x30 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; + reg-names = "cfg", "apb"; + bus-range = <0x0 0x7f>; + interrupt-parent = <&plic>; + interrupts = <119>; + interrupt-map = <0 0 0 1 &pcie_intc 0>, + <0 0 0 2 &pcie_intc 1>, + <0 0 0 3 &pcie_intc 2>, + <0 0 0 4 &pcie_intc 3>; + interrupt-map-mask = <0 0 0 7>; + clocks = <&fabric_clk1>, <&fabric_clk3>; + clock-names = "fic1", "fic3"; + ranges = <0x3000000 0x0 0x8000000 0x30 0x8000000 0x0 0x80000000>; + dma-ranges = <0x02000000 0x0 0x00000000 0x0 0x00000000 0x1 0x00000000>; + msi-parent = <&pcie>; + msi-controller; + status = "disabled"; + pcie_intc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + }; }; diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts index f3f87ed2007f..ec7b7c2a3ce2 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts @@ -11,7 +11,8 @@ / { model = "Microchip PolarFire-SoC Icicle Kit"; - compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs"; + compatible = "microchip,mpfs-icicle-reference-rtlv2210", "microchip,mpfs-icicle-kit", + "microchip,mpfs"; aliases { ethernet0 = &mac1; @@ -32,15 +33,26 @@ ddrc_cache_lo: memory@80000000 { device_type = "memory"; - reg = <0x0 0x80000000 0x0 0x2e000000>; + reg = <0x0 0x80000000 0x0 0x40000000>; status = "okay"; }; ddrc_cache_hi: memory@1000000000 { device_type = "memory"; - reg = <0x10 0x0 0x0 0x40000000>; + reg = <0x10 0x40000000 0x0 0x40000000>; status = "okay"; }; + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + hss_payload: region@BFC00000 { + reg = <0x0 0xBFC00000 0x0 0x400000>; + no-map; + }; + }; }; &core_pwm0 { diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi new file mode 100644 index 000000000000..7b9ee13b6a3a --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2022 Microchip Technology Inc */ + +/ { + fabric_clk3: fabric-clk3 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <62500000>; + }; + + fabric_clk1: fabric-clk1 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <125000000>; + }; + + pcie: pcie@2000000000 { + compatible = "microchip,pcie-host-1.0"; + #address-cells = <0x3>; + #interrupt-cells = <0x1>; + #size-cells = <0x2>; + device_type = "pci"; + reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; + reg-names = "cfg", "apb"; + bus-range = <0x0 0x7f>; + interrupt-parent = <&plic>; + interrupts = <119>; + interrupt-map = <0 0 0 1 &pcie_intc 0>, + <0 0 0 2 &pcie_intc 1>, + <0 0 0 3 &pcie_intc 2>, + <0 0 0 4 &pcie_intc 3>; + interrupt-map-mask = <0 0 0 7>; + clocks = <&fabric_clk1>, <&fabric_clk1>, <&fabric_clk3>; + clock-names = "fic0", "fic1", "fic3"; + ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; + msi-parent = <&pcie>; + msi-controller; + status = "disabled"; + pcie_intc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + }; +}; diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts new file mode 100644 index 000000000000..184cb36a175e --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Original all-in-one devicetree: + * Copyright (C) 2021-2022 - Wolfgang Grandegger <wg@aries-embedded.de> + * Rewritten to use includes: + * Copyright (C) 2022 - Conor Dooley <conor.dooley@microchip.com> + */ +/dts-v1/; + +#include "mpfs.dtsi" +#include "mpfs-m100pfs-fabric.dtsi" + +/* Clock frequency (in Hz) of the rtcclk */ +#define MTIMER_FREQ 1000000 + +/ { + model = "Aries Embedded M100PFEVPS"; + compatible = "aries,m100pfsevp", "microchip,mpfs"; + + aliases { + ethernet0 = &mac0; + ethernet1 = &mac1; + serial0 = &mmuart0; + serial1 = &mmuart1; + serial2 = &mmuart2; + serial3 = &mmuart3; + serial4 = &mmuart4; + gpio0 = &gpio0; + gpio1 = &gpio2; + }; + + chosen { + stdout-path = "serial1:115200n8"; + }; + + cpus { + timebase-frequency = <MTIMER_FREQ>; + }; + + ddrc_cache_lo: memory@80000000 { + device_type = "memory"; + reg = <0x0 0x80000000 0x0 0x40000000>; + }; + ddrc_cache_hi: memory@1040000000 { + device_type = "memory"; + reg = <0x10 0x40000000 0x0 0x40000000>; + }; +}; + +&can0 { + status = "okay"; +}; + +&i2c0 { + status = "okay"; +}; + +&i2c1 { + status = "okay"; +}; + +&gpio0 { + interrupts = <13>, <14>, <15>, <16>, + <17>, <18>, <19>, <20>, + <21>, <22>, <23>, <24>, + <25>, <26>; + ngpios = <14>; + status = "okay"; + + pmic-irq-hog { + gpio-hog; + gpios = <13 0>; + input; + }; + + /* Set to low for eMMC, high for SD-card */ + mmc-sel-hog { + gpio-hog; + gpios = <12 0>; + output-high; + }; +}; + +&gpio2 { + interrupts = <13>, <14>, <15>, <16>, + <17>, <18>, <19>, <20>, + <21>, <22>, <23>, <24>, + <25>, <26>, <27>, <28>, + <29>, <30>, <31>, <32>, + <33>, <34>, <35>, <36>, + <37>, <38>, <39>, <40>, + <41>, <42>, <43>, <44>; + status = "okay"; +}; + +&mac0 { + status = "okay"; + phy-mode = "gmii"; + phy-handle = <&phy0>; + phy0: ethernet-phy@0 { + reg = <0>; + }; +}; + +&mac1 { + status = "okay"; + phy-mode = "gmii"; + phy-handle = <&phy1>; + phy1: ethernet-phy@0 { + reg = <0>; + }; +}; + +&mbox { + status = "okay"; +}; + +&mmc { + max-frequency = <50000000>; + bus-width = <4>; + cap-mmc-highspeed; + cap-sd-highspeed; + no-1-8-v; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + sd-uhs-sdr104; + disable-wp; + status = "okay"; +}; + +&mmuart1 { + status = "okay"; +}; + +&mmuart2 { + status = "okay"; +}; + +&mmuart3 { + status = "okay"; +}; + +&mmuart4 { + status = "okay"; +}; + +&pcie { + status = "okay"; +}; + +&qspi { + status = "okay"; +}; + +&refclk { + clock-frequency = <125000000>; +}; + +&rtc { + status = "okay"; +}; + +&spi0 { + status = "okay"; +}; + +&spi1 { + status = "okay"; +}; + +&syscontroller { + status = "okay"; +}; + +&usb { + status = "okay"; + dr_mode = "host"; +}; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi index 49380c428ec9..67303bc0e451 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi @@ -13,4 +13,33 @@ #clock-cells = <0>; clock-frequency = <125000000>; }; + + pcie: pcie@2000000000 { + compatible = "microchip,pcie-host-1.0"; + #address-cells = <0x3>; + #interrupt-cells = <0x1>; + #size-cells = <0x2>; + device_type = "pci"; + reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; + reg-names = "cfg", "apb"; + bus-range = <0x0 0x7f>; + interrupt-parent = <&plic>; + interrupts = <119>; + interrupt-map = <0 0 0 1 &pcie_intc 0>, + <0 0 0 2 &pcie_intc 1>, + <0 0 0 3 &pcie_intc 2>, + <0 0 0 4 &pcie_intc 3>; + interrupt-map-mask = <0 0 0 7>; + clocks = <&fabric_clk1>, <&fabric_clk1>, <&fabric_clk3>; + clock-names = "fic0", "fic1", "fic3"; + ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; + msi-parent = <&pcie>; + msi-controller; + status = "disabled"; + pcie_intc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + }; }; diff --git a/arch/riscv/boot/dts/microchip/mpfs-sev-kit-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-sev-kit-fabric.dtsi new file mode 100644 index 000000000000..8545baf4d129 --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs-sev-kit-fabric.dtsi @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2022 Microchip Technology Inc */ + +/ { + fabric_clk3: fabric-clk3 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <0>; + }; + + fabric_clk1: fabric-clk1 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <125000000>; + }; + + pcie: pcie@2000000000 { + compatible = "microchip,pcie-host-1.0"; + #address-cells = <0x3>; + #interrupt-cells = <0x1>; + #size-cells = <0x2>; + device_type = "pci"; + reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; + reg-names = "cfg", "apb"; + bus-range = <0x0 0x7f>; + interrupt-parent = <&plic>; + interrupts = <119>; + interrupt-map = <0 0 0 1 &pcie_intc 0>, + <0 0 0 2 &pcie_intc 1>, + <0 0 0 3 &pcie_intc 2>, + <0 0 0 4 &pcie_intc 3>; + interrupt-map-mask = <0 0 0 7>; + clocks = <&fabric_clk1>, <&fabric_clk1>, <&fabric_clk3>; + clock-names = "fic0", "fic1", "fic3"; + ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; + msi-parent = <&pcie>; + msi-controller; + status = "disabled"; + pcie_intc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + }; +}; diff --git a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts new file mode 100644 index 000000000000..013cb666c72d --- /dev/null +++ b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2022 Microchip Technology Inc */ + +/dts-v1/; + +#include "mpfs.dtsi" +#include "mpfs-sev-kit-fabric.dtsi" + +/* Clock frequency (in Hz) of the rtcclk */ +#define MTIMER_FREQ 1000000 + +/ { + #address-cells = <2>; + #size-cells = <2>; + model = "Microchip PolarFire-SoC SEV Kit"; + compatible = "microchip,mpfs-sev-kit", "microchip,mpfs"; + + aliases { + ethernet0 = &mac1; + serial0 = &mmuart0; + serial1 = &mmuart1; + serial2 = &mmuart2; + serial3 = &mmuart3; + serial4 = &mmuart4; + }; + + chosen { + stdout-path = "serial1:115200n8"; + }; + + cpus { + timebase-frequency = <MTIMER_FREQ>; + }; + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + fabricbuf0ddrc: buffer@80000000 { + compatible = "shared-dma-pool"; + reg = <0x0 0x80000000 0x0 0x2000000>; + }; + + fabricbuf1ddrnc: buffer@c4000000 { + compatible = "shared-dma-pool"; + reg = <0x0 0xc4000000 0x0 0x4000000>; + }; + + fabricbuf2ddrncwcb: buffer@d4000000 { + compatible = "shared-dma-pool"; + reg = <0x0 0xd4000000 0x0 0x4000000>; + }; + }; + + ddrc_cache: memory@1000000000 { + device_type = "memory"; + reg = <0x10 0x0 0x0 0x76000000>; + }; +}; + +&i2c0 { + status = "okay"; +}; + +&gpio2 { + interrupts = <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>; + status = "okay"; +}; + +&mac0 { + status = "okay"; + phy-mode = "sgmii"; + phy-handle = <&phy0>; + phy1: ethernet-phy@9 { + reg = <9>; + }; + phy0: ethernet-phy@8 { + reg = <8>; + }; +}; + +&mac1 { + status = "okay"; + phy-mode = "sgmii"; + phy-handle = <&phy1>; +}; + +&mbox { + status = "okay"; +}; + +&mmc { + status = "okay"; + bus-width = <4>; + disable-wp; + cap-sd-highspeed; + cap-mmc-highspeed; + mmc-ddr-1_8v; + mmc-hs200-1_8v; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + sd-uhs-sdr104; +}; + +&mmuart1 { + status = "okay"; +}; + +&mmuart2 { + status = "okay"; +}; + +&mmuart3 { + status = "okay"; +}; + +&mmuart4 { + status = "okay"; +}; + +&refclk { + clock-frequency = <125000000>; +}; + +&rtc { + status = "okay"; +}; + +&syscontroller { + status = "okay"; +}; + +&usb { + status = "okay"; + dr_mode = "otg"; +}; diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index 6d9d455fa160..8f463399a568 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -330,7 +330,7 @@ }; qspi: spi@21000000 { - compatible = "microchip,mpfs-qspi"; + compatible = "microchip,mpfs-qspi", "microchip,coreqspi-rtl-v2"; #address-cells = <1>; #size-cells = <0>; reg = <0x0 0x21000000 0x0 0x1000>; @@ -464,35 +464,6 @@ status = "disabled"; }; - pcie: pcie@2000000000 { - compatible = "microchip,pcie-host-1.0"; - #address-cells = <0x3>; - #interrupt-cells = <0x1>; - #size-cells = <0x2>; - device_type = "pci"; - reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; - reg-names = "cfg", "apb"; - bus-range = <0x0 0x7f>; - interrupt-parent = <&plic>; - interrupts = <119>; - interrupt-map = <0 0 0 1 &pcie_intc 0>, - <0 0 0 2 &pcie_intc 1>, - <0 0 0 3 &pcie_intc 2>, - <0 0 0 4 &pcie_intc 3>; - interrupt-map-mask = <0 0 0 7>; - clocks = <&fabric_clk1>, <&fabric_clk1>, <&fabric_clk3>; - clock-names = "fic0", "fic1", "fic3"; - ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; - msi-parent = <&pcie>; - msi-controller; - status = "disabled"; - pcie_intc: interrupt-controller { - #address-cells = <0>; - #interrupt-cells = <1>; - interrupt-controller; - }; - }; - mbox: mailbox@37020000 { compatible = "microchip,mpfs-mailbox"; reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318C 0x0 0x40>; diff --git a/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts b/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts index c9af67f7a0d2..f7a230110512 100644 --- a/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts +++ b/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts @@ -8,7 +8,7 @@ #include "jh7100.dtsi" #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/leds/common.h> -#include <dt-bindings/pinctrl/pinctrl-starfive.h> +#include <dt-bindings/pinctrl/pinctrl-starfive-jh7100.h> / { model = "BeagleV Starlight Beta"; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index aed332a9d4ea..05fd5fcf24f9 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -166,6 +166,9 @@ CONFIG_BTRFS_FS=m CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y CONFIG_OVERLAY_FS=m +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index 96648c176f37..21546937db39 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -17,6 +17,9 @@ static bool errata_probe_pbmt(unsigned int stage, unsigned long arch_id, unsigned long impid) { + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_PBMT)) + return false; + if (arch_id != 0 || impid != 0) return false; @@ -30,7 +33,9 @@ static bool errata_probe_pbmt(unsigned int stage, static bool errata_probe_cmo(unsigned int stage, unsigned long arch_id, unsigned long impid) { -#ifdef CONFIG_ERRATA_THEAD_CMO + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_CMO)) + return false; + if (arch_id != 0 || impid != 0) return false; @@ -40,9 +45,6 @@ static bool errata_probe_cmo(unsigned int stage, riscv_cbom_block_size = L1_CACHE_BYTES; riscv_noncoherent_supported(); return true; -#else - return false; -#endif } static u32 thead_errata_probe(unsigned int stage, @@ -51,10 +53,10 @@ static u32 thead_errata_probe(unsigned int stage, u32 cpu_req_errata = 0; if (errata_probe_pbmt(stage, archid, impid)) - cpu_req_errata |= (1U << ERRATA_THEAD_PBMT); + cpu_req_errata |= BIT(ERRATA_THEAD_PBMT); if (errata_probe_cmo(stage, archid, impid)) - cpu_req_errata |= (1U << ERRATA_THEAD_CMO); + cpu_req_errata |= BIT(ERRATA_THEAD_CMO); return cpu_req_errata; } diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 273ece6b622f..8a5c246b0a21 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -55,6 +55,8 @@ static inline void riscv_init_cbom_blocksize(void) { } #ifdef CONFIG_RISCV_DMA_NONCOHERENT void riscv_noncoherent_supported(void); +#else +static inline void riscv_noncoherent_supported(void) {} #endif /* diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index 14fc7342490b..e7acffdf21d2 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -99,6 +99,10 @@ do { \ get_cache_size(2, CACHE_TYPE_UNIFIED)); \ NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, \ get_cache_geometry(2, CACHE_TYPE_UNIFIED)); \ + NEW_AUX_ENT(AT_L3_CACHESIZE, \ + get_cache_size(3, CACHE_TYPE_UNIFIED)); \ + NEW_AUX_ENT(AT_L3_CACHEGEOMETRY, \ + get_cache_geometry(3, CACHE_TYPE_UNIFIED)); \ } while (0) #define ARCH_HAS_SETUP_ADDITIONAL_PAGES struct linux_binprm; diff --git a/arch/riscv/include/asm/gpr-num.h b/arch/riscv/include/asm/gpr-num.h index dfee2829fc7c..efeb5edf8a3a 100644 --- a/arch/riscv/include/asm/gpr-num.h +++ b/arch/riscv/include/asm/gpr-num.h @@ -3,6 +3,11 @@ #define __ASM_GPR_NUM_H #ifdef __ASSEMBLY__ + + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + .equ .L__gpr_num_x\num, \num + .endr + .equ .L__gpr_num_zero, 0 .equ .L__gpr_num_ra, 1 .equ .L__gpr_num_sp, 2 @@ -39,6 +44,9 @@ #else /* __ASSEMBLY__ */ #define __DEFINE_ASM_GPR_NUMS \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \ +" .equ .L__gpr_num_x\\num, \\num\n" \ +" .endr\n" \ " .equ .L__gpr_num_zero, 0\n" \ " .equ .L__gpr_num_ra, 1\n" \ " .equ .L__gpr_num_sp, 2\n" \ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 6f59ec64175e..b22525290073 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -58,6 +58,7 @@ enum riscv_isa_ext_id { RISCV_ISA_EXT_ZICBOM, RISCV_ISA_EXT_ZIHINTPAUSE, RISCV_ISA_EXT_SSTC, + RISCV_ISA_EXT_SVINVAL, RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, }; @@ -69,6 +70,7 @@ enum riscv_isa_ext_id { enum riscv_isa_ext_key { RISCV_ISA_EXT_KEY_FPU, /* For 'F' and 'D' */ RISCV_ISA_EXT_KEY_ZIHINTPAUSE, + RISCV_ISA_EXT_KEY_SVINVAL, RISCV_ISA_EXT_KEY_MAX, }; @@ -90,6 +92,8 @@ static __always_inline int riscv_isa_ext2key(int num) return RISCV_ISA_EXT_KEY_FPU; case RISCV_ISA_EXT_ZIHINTPAUSE: return RISCV_ISA_EXT_KEY_ZIHINTPAUSE; + case RISCV_ISA_EXT_SVINVAL: + return RISCV_ISA_EXT_KEY_SVINVAL; default: return -EINVAL; } diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h new file mode 100644 index 000000000000..16044affa57c --- /dev/null +++ b/arch/riscv/include/asm/insn-def.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_INSN_DEF_H +#define __ASM_INSN_DEF_H + +#include <asm/asm.h> + +#define INSN_R_FUNC7_SHIFT 25 +#define INSN_R_RS2_SHIFT 20 +#define INSN_R_RS1_SHIFT 15 +#define INSN_R_FUNC3_SHIFT 12 +#define INSN_R_RD_SHIFT 7 +#define INSN_R_OPCODE_SHIFT 0 + +#ifdef __ASSEMBLY__ + +#ifdef CONFIG_AS_HAS_INSN + + .macro insn_r, opcode, func3, func7, rd, rs1, rs2 + .insn r \opcode, \func3, \func7, \rd, \rs1, \rs2 + .endm + +#else + +#include <asm/gpr-num.h> + + .macro insn_r, opcode, func3, func7, rd, rs1, rs2 + .4byte ((\opcode << INSN_R_OPCODE_SHIFT) | \ + (\func3 << INSN_R_FUNC3_SHIFT) | \ + (\func7 << INSN_R_FUNC7_SHIFT) | \ + (.L__gpr_num_\rd << INSN_R_RD_SHIFT) | \ + (.L__gpr_num_\rs1 << INSN_R_RS1_SHIFT) | \ + (.L__gpr_num_\rs2 << INSN_R_RS2_SHIFT)) + .endm + +#endif + +#define __INSN_R(...) insn_r __VA_ARGS__ + +#else /* ! __ASSEMBLY__ */ + +#ifdef CONFIG_AS_HAS_INSN + +#define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \ + ".insn r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" + +#else + +#include <linux/stringify.h> +#include <asm/gpr-num.h> + +#define DEFINE_INSN_R \ + __DEFINE_ASM_GPR_NUMS \ +" .macro insn_r, opcode, func3, func7, rd, rs1, rs2\n" \ +" .4byte ((\\opcode << " __stringify(INSN_R_OPCODE_SHIFT) ") |" \ +" (\\func3 << " __stringify(INSN_R_FUNC3_SHIFT) ") |" \ +" (\\func7 << " __stringify(INSN_R_FUNC7_SHIFT) ") |" \ +" (.L__gpr_num_\\rd << " __stringify(INSN_R_RD_SHIFT) ") |" \ +" (.L__gpr_num_\\rs1 << " __stringify(INSN_R_RS1_SHIFT) ") |" \ +" (.L__gpr_num_\\rs2 << " __stringify(INSN_R_RS2_SHIFT) "))\n" \ +" .endm\n" + +#define UNDEFINE_INSN_R \ +" .purgem insn_r\n" + +#define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \ + DEFINE_INSN_R \ + "insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \ + UNDEFINE_INSN_R + +#endif + +#endif /* ! __ASSEMBLY__ */ + +#define INSN_R(opcode, func3, func7, rd, rs1, rs2) \ + __INSN_R(RV_##opcode, RV_##func3, RV_##func7, \ + RV_##rd, RV_##rs1, RV_##rs2) + +#define RV_OPCODE(v) __ASM_STR(v) +#define RV_FUNC3(v) __ASM_STR(v) +#define RV_FUNC7(v) __ASM_STR(v) +#define RV_RD(v) __ASM_STR(v) +#define RV_RS1(v) __ASM_STR(v) +#define RV_RS2(v) __ASM_STR(v) +#define __RV_REG(v) __ASM_STR(x ## v) +#define RV___RD(v) __RV_REG(v) +#define RV___RS1(v) __RV_REG(v) +#define RV___RS2(v) __RV_REG(v) + +#define RV_OPCODE_SYSTEM RV_OPCODE(115) + +#define HFENCE_VVMA(vaddr, asid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ + __RD(0), RS1(vaddr), RS2(asid)) + +#define HFENCE_GVMA(gaddr, vmid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(49), \ + __RD(0), RS1(gaddr), RS2(vmid)) + +#define HLVX_HU(dest, addr) \ + INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(50), \ + RD(dest), RS1(addr), __RS2(3)) + +#define HLV_W(dest, addr) \ + INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(52), \ + RD(dest), RS1(addr), __RS2(0)) + +#ifdef CONFIG_64BIT +#define HLV_D(dest, addr) \ + INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(54), \ + RD(dest), RS1(addr), __RS2(0)) +#else +#define HLV_D(dest, addr) \ + __ASM_STR(.error "hlv.d requires 64-bit support") +#endif + +#define SINVAL_VMA(vaddr, asid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(11), \ + __RD(0), RS1(vaddr), RS2(asid)) + +#define SFENCE_W_INVAL() \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(12), \ + __RD(0), __RS1(0), __RS2(0)) + +#define SFENCE_INVAL_IR() \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(12), \ + __RD(0), __RS1(0), __RS2(1)) + +#define HINVAL_VVMA(vaddr, asid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(19), \ + __RD(0), RS1(vaddr), RS2(asid)) + +#define HINVAL_GVMA(gaddr, vmid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(51), \ + __RD(0), RS1(gaddr), RS2(vmid)) + +#endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 69605a474270..92080a227937 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -101,9 +101,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar(addr)) __io_reads_ins(ins, u8, b, __io_pbr(), __io_par(addr)) __io_reads_ins(ins, u16, w, __io_pbr(), __io_par(addr)) __io_reads_ins(ins, u32, l, __io_pbr(), __io_par(addr)) -#define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count) -#define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count) -#define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count) +#define insb(addr, buffer, count) __insb(PCI_IOBASE + (addr), buffer, count) +#define insw(addr, buffer, count) __insw(PCI_IOBASE + (addr), buffer, count) +#define insl(addr, buffer, count) __insl(PCI_IOBASE + (addr), buffer, count) __io_writes_outs(writes, u8, b, __io_bw(), __io_aw()) __io_writes_outs(writes, u16, w, __io_bw(), __io_aw()) @@ -115,22 +115,22 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw()) __io_writes_outs(outs, u8, b, __io_pbw(), __io_paw()) __io_writes_outs(outs, u16, w, __io_pbw(), __io_paw()) __io_writes_outs(outs, u32, l, __io_pbw(), __io_paw()) -#define outsb(addr, buffer, count) __outsb((void __iomem *)(long)addr, buffer, count) -#define outsw(addr, buffer, count) __outsw((void __iomem *)(long)addr, buffer, count) -#define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count) +#define outsb(addr, buffer, count) __outsb(PCI_IOBASE + (addr), buffer, count) +#define outsw(addr, buffer, count) __outsw(PCI_IOBASE + (addr), buffer, count) +#define outsl(addr, buffer, count) __outsl(PCI_IOBASE + (addr), buffer, count) #ifdef CONFIG_64BIT __io_reads_ins(reads, u64, q, __io_br(), __io_ar(addr)) #define readsq(addr, buffer, count) __readsq(addr, buffer, count) __io_reads_ins(ins, u64, q, __io_pbr(), __io_par(addr)) -#define insq(addr, buffer, count) __insq((void __iomem *)addr, buffer, count) +#define insq(addr, buffer, count) __insq(PCI_IOBASE + (addr), buffer, count) __io_writes_outs(writes, u64, q, __io_bw(), __io_aw()) #define writesq(addr, buffer, count) __writesq(addr, buffer, count) __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw()) -#define outsq(addr, buffer, count) __outsq((void __iomem *)addr, buffer, count) +#define outsq(addr, buffer, count) __outsq(PCI_IOBASE + (addr), buffer, count) #endif #include <asm-generic/io.h> diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 60c517e4d576..dbbf43d52623 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -67,6 +67,7 @@ struct kvm_vcpu_stat { u64 mmio_exit_kernel; u64 csr_exit_user; u64 csr_exit_kernel; + u64 signal_exits; u64 exits; }; diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index 26a446a34057..d4e3e600beef 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -11,8 +11,8 @@ #define KVM_SBI_IMPID 3 -#define KVM_SBI_VERSION_MAJOR 0 -#define KVM_SBI_VERSION_MINOR 3 +#define KVM_SBI_VERSION_MAJOR 1 +#define KVM_SBI_VERSION_MINOR 0 struct kvm_vcpu_sbi_extension { unsigned long extid_start; diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index cedcf8ea3c76..0099dc116168 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -16,7 +16,6 @@ typedef struct { atomic_long_t id; #endif void *vdso; - void *vdso_info; #ifdef CONFIG_SMP /* A local icache flush is needed before user execution can resume. */ cpumask_t icache_stale_mask; diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 19eedd4af4cd..94a0590c6971 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -65,11 +65,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, extern void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp); -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *dead_task) -{ -} - extern unsigned long __get_wchan(struct task_struct *p); diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index 32c73ba1d531..fb187a33ce58 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -30,8 +30,10 @@ #define AT_L1D_CACHEGEOMETRY 43 #define AT_L2_CACHESIZE 44 #define AT_L2_CACHEGEOMETRY 45 +#define AT_L3_CACHESIZE 46 +#define AT_L3_CACHEGEOMETRY 47 /* entries in ARCH_DLINFO */ -#define AT_VECTOR_SIZE_ARCH 7 +#define AT_VECTOR_SIZE_ARCH 9 #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 7351417afd62..8985ff234c01 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -48,6 +48,7 @@ struct kvm_sregs { /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ struct kvm_riscv_config { unsigned long isa; + unsigned long zicbom_block_size; }; /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ @@ -98,6 +99,9 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_M, KVM_RISCV_ISA_EXT_SVPBMT, KVM_RISCV_ISA_EXT_SSTC, + KVM_RISCV_ISA_EXT_SVINVAL, + KVM_RISCV_ISA_EXT_ZIHINTPAUSE, + KVM_RISCV_ISA_EXT_ZICBOM, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 33bb60a354cd..db6e4b1294ba 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -28,9 +28,9 @@ KASAN_SANITIZE_cpufeature.o := n endif endif -extra-y += head.o extra-y += vmlinux.lds +obj-y += head.o obj-y += soc.o obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o obj-y += cpu.o diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index 0be8a2403212..fa427bdcf773 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -3,10 +3,13 @@ * Copyright (C) 2012 Regents of the University of California */ +#include <linux/cpu.h> #include <linux/init.h> #include <linux/seq_file.h> #include <linux/of.h> +#include <asm/csr.h> #include <asm/hwcap.h> +#include <asm/sbi.h> #include <asm/smp.h> #include <asm/pgtable.h> @@ -68,6 +71,50 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) } #ifdef CONFIG_PROC_FS + +struct riscv_cpuinfo { + unsigned long mvendorid; + unsigned long marchid; + unsigned long mimpid; +}; +static DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); + +static int riscv_cpuinfo_starting(unsigned int cpu) +{ + struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); + +#if IS_ENABLED(CONFIG_RISCV_SBI) + ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); + ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); + ci->mimpid = sbi_spec_is_0_1() ? 0 : sbi_get_mimpid(); +#elif IS_ENABLED(CONFIG_RISCV_M_MODE) + ci->mvendorid = csr_read(CSR_MVENDORID); + ci->marchid = csr_read(CSR_MARCHID); + ci->mimpid = csr_read(CSR_MIMPID); +#else + ci->mvendorid = 0; + ci->marchid = 0; + ci->mimpid = 0; +#endif + + return 0; +} + +static int __init riscv_cpuinfo_init(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "riscv/cpuinfo:starting", + riscv_cpuinfo_starting, NULL); + if (ret < 0) { + pr_err("cpuinfo: failed to register hotplug callbacks.\n"); + return ret; + } + + return 0; +} +device_initcall(riscv_cpuinfo_init); + #define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \ { \ .uprop = #UPROP, \ @@ -92,10 +139,11 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) */ static struct riscv_isa_ext_data isa_ext_arr[] = { __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), + __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), + __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), - __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX), }; @@ -185,6 +233,7 @@ static int c_show(struct seq_file *m, void *v) { unsigned long cpu_id = (unsigned long)v - 1; struct device_node *node = of_get_cpu_node(cpu_id, NULL); + struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); const char *compat, *isa; seq_printf(m, "processor\t: %lu\n", cpu_id); @@ -195,6 +244,9 @@ static int c_show(struct seq_file *m, void *v) if (!of_property_read_string(node, "compatible", &compat) && strcmp(compat, "riscv")) seq_printf(m, "uarch\t\t: %s\n", compat); + seq_printf(m, "mvendorid\t: 0x%lx\n", ci->mvendorid); + seq_printf(m, "marchid\t\t: 0x%lx\n", ci->marchid); + seq_printf(m, "mimpid\t\t: 0x%lx\n", ci->mimpid); seq_puts(m, "\n"); of_node_put(node); diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 3b5583db9d80..694267d1fe81 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -204,6 +204,7 @@ void __init riscv_fill_hwcap(void) SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM); SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE); SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC); + SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL); } #undef SET_ISA_EXT_MAP } @@ -253,35 +254,28 @@ void __init riscv_fill_hwcap(void) #ifdef CONFIG_RISCV_ALTERNATIVE static bool __init_or_module cpufeature_probe_svpbmt(unsigned int stage) { -#ifdef CONFIG_RISCV_ISA_SVPBMT - switch (stage) { - case RISCV_ALTERNATIVES_EARLY_BOOT: + if (!IS_ENABLED(CONFIG_RISCV_ISA_SVPBMT)) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return false; - default: - return riscv_isa_extension_available(NULL, SVPBMT); - } -#endif - return false; + return riscv_isa_extension_available(NULL, SVPBMT); } static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage) { -#ifdef CONFIG_RISCV_ISA_ZICBOM - switch (stage) { - case RISCV_ALTERNATIVES_EARLY_BOOT: + if (!IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM)) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + if (!riscv_isa_extension_available(NULL, ZICBOM)) return false; - default: - if (riscv_isa_extension_available(NULL, ZICBOM)) { - riscv_noncoherent_supported(); - return true; - } else { - return false; - } - } -#endif - return false; + riscv_noncoherent_supported(); + return true; } /* @@ -296,10 +290,10 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage) u32 cpu_req_feature = 0; if (cpufeature_probe_svpbmt(stage)) - cpu_req_feature |= (1U << CPUFEATURE_SVPBMT); + cpu_req_feature |= BIT(CPUFEATURE_SVPBMT); if (cpufeature_probe_zicbom(stage)) - cpu_req_feature |= (1U << CPUFEATURE_ZICBOM); + cpu_req_feature |= BIT(CPUFEATURE_ZICBOM); return cpu_req_feature; } diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h index 71a76a623257..d6e5f739905e 100644 --- a/arch/riscv/kernel/image-vars.h +++ b/arch/riscv/kernel/image-vars.h @@ -25,21 +25,12 @@ */ __efistub_memcmp = memcmp; __efistub_memchr = memchr; -__efistub_memcpy = memcpy; -__efistub_memmove = memmove; -__efistub_memset = memset; __efistub_strlen = strlen; __efistub_strnlen = strnlen; __efistub_strcmp = strcmp; __efistub_strncmp = strncmp; __efistub_strrchr = strrchr; -#ifdef CONFIG_KASAN -__efistub___memcpy = memcpy; -__efistub___memmove = memmove; -__efistub___memset = memset; -#endif - __efistub__start = _start; __efistub__start_kernel = _start_kernel; __efistub__end = _end; diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index ceb9ebab6558..b0c63e8e867e 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -105,7 +105,7 @@ static int __init compat_mode_detect(void) csr_write(CSR_STATUS, tmp); pr_info("riscv: ELF compat mode %s", - compat_mode_supported ? "supported" : "failed"); + compat_mode_supported ? "supported" : "unsupported"); return 0; } diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 2dfc463b86bb..ad76bb59b059 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -252,10 +252,10 @@ static void __init parse_dtb(void) pr_info("Machine model: %s\n", name); dump_stack_set_arch_desc("%s (DT)", name); } - return; + } else { + pr_err("No DTB passed to the kernel\n"); } - pr_err("No DTB passed to the kernel\n"); #ifdef CONFIG_CMDLINE_FORCE strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); pr_info("Forcing kernel command line to: %s\n", boot_command_line); diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index a752c7b41683..3373df413c88 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -49,6 +49,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) unsigned int curr_cpuid; curr_cpuid = smp_processor_id(); + store_cpu_topology(curr_cpuid); numa_store_cpu_info(curr_cpuid); numa_add_cpu(curr_cpuid); @@ -162,9 +163,9 @@ asmlinkage __visible void smp_callin(void) mmgrab(mm); current->active_mm = mm; + store_cpu_topology(curr_cpuid); notify_cpu_starting(curr_cpuid); numa_add_cpu(curr_cpuid); - update_siblings_masks(curr_cpuid); set_cpu_online(curr_cpuid, 1); /* diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 571556bb9261..5d3f2fbeb33c 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -18,9 +18,6 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) return -EINVAL; - if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) - return -EINVAL; - return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> (PAGE_SHIFT - page_shift_offset)); } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 635e6ec26938..f3e96d60a2ff 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -33,6 +33,7 @@ void die(struct pt_regs *regs, const char *str) { static int die_counter; int ret; + long cause; oops_enter(); @@ -42,11 +43,13 @@ void die(struct pt_regs *regs, const char *str) pr_emerg("%s [#%d]\n", str, ++die_counter); print_modules(); - show_regs(regs); + if (regs) + show_regs(regs); - ret = notify_die(DIE_OOPS, str, regs, 0, regs->cause, SIGSEGV); + cause = regs ? regs->cause : -1; + ret = notify_die(DIE_OOPS, str, regs, 0, cause, SIGSEGV); - if (regs && kexec_should_crash(current)) + if (kexec_should_crash(current)) crash_kexec(regs); bust_spinlocks(0); diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 69b05b6c181b..123d05255fcf 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -60,6 +60,11 @@ struct __vdso_info { struct vm_special_mapping *cm; }; +static struct __vdso_info vdso_info; +#ifdef CONFIG_COMPAT +static struct __vdso_info compat_vdso_info; +#endif + static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { @@ -114,15 +119,19 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) { struct mm_struct *mm = task->mm; struct vm_area_struct *vma; - struct __vdso_info *vdso_info = mm->context.vdso_info; + VMA_ITERATOR(vmi, mm, 0); mmap_read_lock(mm); - for (vma = mm->mmap; vma; vma = vma->vm_next) { + for_each_vma(vmi, vma) { unsigned long size = vma->vm_end - vma->vm_start; - if (vma_is_special_mapping(vma, vdso_info->dm)) + if (vma_is_special_mapping(vma, vdso_info.dm)) zap_page_range(vma, vma->vm_start, size); +#ifdef CONFIG_COMPAT + if (vma_is_special_mapping(vma, compat_vdso_info.dm)) + zap_page_range(vma, vma->vm_start, size); +#endif } mmap_read_unlock(mm); @@ -264,7 +273,6 @@ static int __setup_additional_pages(struct mm_struct *mm, vdso_base += VVAR_SIZE; mm->context.vdso = (void *)vdso_base; - mm->context.vdso_info = (void *)vdso_info; ret = _install_special_mapping(mm, vdso_base, vdso_text_len, diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index f5a342fa1b1d..f36a737d5f96 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -24,6 +24,7 @@ config KVM select PREEMPT_NOTIFIERS select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT + select KVM_XFER_TO_GUEST_WORK select HAVE_KVM_VCPU_ASYNC_IOCTL select HAVE_KVM_EVENTFD select SRCU diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 1549205fe5fe..df2d8716851f 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -122,7 +122,7 @@ void kvm_arch_exit(void) { } -static int riscv_kvm_init(void) +static int __init riscv_kvm_init(void) { return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); } diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c index 1a76d0b1907d..309d79b3e5cd 100644 --- a/arch/riscv/kvm/tlb.c +++ b/arch/riscv/kvm/tlb.c @@ -12,22 +12,11 @@ #include <linux/kvm_host.h> #include <asm/cacheflush.h> #include <asm/csr.h> +#include <asm/hwcap.h> +#include <asm/insn-def.h> -/* - * Instruction encoding of hfence.gvma is: - * HFENCE.GVMA rs1, rs2 - * HFENCE.GVMA zero, rs2 - * HFENCE.GVMA rs1 - * HFENCE.GVMA - * - * rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2 - * rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2 - * rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1 - * rs1==zero and rs2==zero ==> HFENCE.GVMA - * - * Instruction encoding of HFENCE.GVMA is: - * 0110001 rs2(5) rs1(5) 000 00000 1110011 - */ +#define has_svinval() \ + static_branch_unlikely(&riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_SVINVAL]) void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, gpa_t gpa, gpa_t gpsz, @@ -40,32 +29,22 @@ void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, return; } - for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) { - /* - * rs1 = a0 (GPA >> 2) - * rs2 = a1 (VMID) - * HFENCE.GVMA a0, a1 - * 0110001 01011 01010 000 00000 1110011 - */ - asm volatile ("srli a0, %0, 2\n" - "add a1, %1, zero\n" - ".word 0x62b50073\n" - :: "r" (pos), "r" (vmid) - : "a0", "a1", "memory"); + if (has_svinval()) { + asm volatile (SFENCE_W_INVAL() ::: "memory"); + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) + asm volatile (HINVAL_GVMA(%0, %1) + : : "r" (pos >> 2), "r" (vmid) : "memory"); + asm volatile (SFENCE_INVAL_IR() ::: "memory"); + } else { + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) + asm volatile (HFENCE_GVMA(%0, %1) + : : "r" (pos >> 2), "r" (vmid) : "memory"); } } void kvm_riscv_local_hfence_gvma_vmid_all(unsigned long vmid) { - /* - * rs1 = zero - * rs2 = a0 (VMID) - * HFENCE.GVMA zero, a0 - * 0110001 01010 00000 000 00000 1110011 - */ - asm volatile ("add a0, %0, zero\n" - ".word 0x62a00073\n" - :: "r" (vmid) : "a0", "memory"); + asm volatile(HFENCE_GVMA(zero, %0) : : "r" (vmid) : "memory"); } void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz, @@ -78,46 +57,24 @@ void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz, return; } - for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) { - /* - * rs1 = a0 (GPA >> 2) - * rs2 = zero - * HFENCE.GVMA a0 - * 0110001 00000 01010 000 00000 1110011 - */ - asm volatile ("srli a0, %0, 2\n" - ".word 0x62050073\n" - :: "r" (pos) : "a0", "memory"); + if (has_svinval()) { + asm volatile (SFENCE_W_INVAL() ::: "memory"); + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) + asm volatile(HINVAL_GVMA(%0, zero) + : : "r" (pos >> 2) : "memory"); + asm volatile (SFENCE_INVAL_IR() ::: "memory"); + } else { + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) + asm volatile(HFENCE_GVMA(%0, zero) + : : "r" (pos >> 2) : "memory"); } } void kvm_riscv_local_hfence_gvma_all(void) { - /* - * rs1 = zero - * rs2 = zero - * HFENCE.GVMA - * 0110001 00000 00000 000 00000 1110011 - */ - asm volatile (".word 0x62000073" ::: "memory"); + asm volatile(HFENCE_GVMA(zero, zero) : : : "memory"); } -/* - * Instruction encoding of hfence.gvma is: - * HFENCE.VVMA rs1, rs2 - * HFENCE.VVMA zero, rs2 - * HFENCE.VVMA rs1 - * HFENCE.VVMA - * - * rs1!=zero and rs2!=zero ==> HFENCE.VVMA rs1, rs2 - * rs1==zero and rs2!=zero ==> HFENCE.VVMA zero, rs2 - * rs1!=zero and rs2==zero ==> HFENCE.VVMA rs1 - * rs1==zero and rs2==zero ==> HFENCE.VVMA - * - * Instruction encoding of HFENCE.VVMA is: - * 0010001 rs2(5) rs1(5) 000 00000 1110011 - */ - void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid, unsigned long asid, unsigned long gva, @@ -133,18 +90,16 @@ void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid, hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); - for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) { - /* - * rs1 = a0 (GVA) - * rs2 = a1 (ASID) - * HFENCE.VVMA a0, a1 - * 0010001 01011 01010 000 00000 1110011 - */ - asm volatile ("add a0, %0, zero\n" - "add a1, %1, zero\n" - ".word 0x22b50073\n" - :: "r" (pos), "r" (asid) - : "a0", "a1", "memory"); + if (has_svinval()) { + asm volatile (SFENCE_W_INVAL() ::: "memory"); + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) + asm volatile(HINVAL_VVMA(%0, %1) + : : "r" (pos), "r" (asid) : "memory"); + asm volatile (SFENCE_INVAL_IR() ::: "memory"); + } else { + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) + asm volatile(HFENCE_VVMA(%0, %1) + : : "r" (pos), "r" (asid) : "memory"); } csr_write(CSR_HGATP, hgatp); @@ -157,15 +112,7 @@ void kvm_riscv_local_hfence_vvma_asid_all(unsigned long vmid, hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); - /* - * rs1 = zero - * rs2 = a0 (ASID) - * HFENCE.VVMA zero, a0 - * 0010001 01010 00000 000 00000 1110011 - */ - asm volatile ("add a0, %0, zero\n" - ".word 0x22a00073\n" - :: "r" (asid) : "a0", "memory"); + asm volatile(HFENCE_VVMA(zero, %0) : : "r" (asid) : "memory"); csr_write(CSR_HGATP, hgatp); } @@ -183,16 +130,16 @@ void kvm_riscv_local_hfence_vvma_gva(unsigned long vmid, hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); - for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) { - /* - * rs1 = a0 (GVA) - * rs2 = zero - * HFENCE.VVMA a0 - * 0010001 00000 01010 000 00000 1110011 - */ - asm volatile ("add a0, %0, zero\n" - ".word 0x22050073\n" - :: "r" (pos) : "a0", "memory"); + if (has_svinval()) { + asm volatile (SFENCE_W_INVAL() ::: "memory"); + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) + asm volatile(HINVAL_VVMA(%0, zero) + : : "r" (pos) : "memory"); + asm volatile (SFENCE_INVAL_IR() ::: "memory"); + } else { + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) + asm volatile(HFENCE_VVMA(%0, zero) + : : "r" (pos) : "memory"); } csr_write(CSR_HGATP, hgatp); @@ -204,13 +151,7 @@ void kvm_riscv_local_hfence_vvma_all(unsigned long vmid) hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); - /* - * rs1 = zero - * rs2 = zero - * HFENCE.VVMA - * 0010001 00000 00000 000 00000 1110011 - */ - asm volatile (".word 0x22000073" ::: "memory"); + asm volatile(HFENCE_VVMA(zero, zero) : : : "memory"); csr_write(CSR_HGATP, hgatp); } diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index d0f08d5b4282..a032c4f0d600 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -7,6 +7,7 @@ */ #include <linux/bitops.h> +#include <linux/entry-kvm.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kdebug.h> @@ -18,6 +19,7 @@ #include <linux/fs.h> #include <linux/kvm_host.h> #include <asm/csr.h> +#include <asm/cacheflush.h> #include <asm/hwcap.h> const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { @@ -28,6 +30,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), STATS_DESC_COUNTER(VCPU, csr_exit_user), STATS_DESC_COUNTER(VCPU, csr_exit_kernel), + STATS_DESC_COUNTER(VCPU, signal_exits), STATS_DESC_COUNTER(VCPU, exits) }; @@ -42,17 +45,23 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { #define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) +#define KVM_ISA_EXT_ARR(ext) [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext + /* Mapping between KVM ISA Extension ID & Host ISA extension ID */ static const unsigned long kvm_isa_ext_arr[] = { - RISCV_ISA_EXT_a, - RISCV_ISA_EXT_c, - RISCV_ISA_EXT_d, - RISCV_ISA_EXT_f, - RISCV_ISA_EXT_h, - RISCV_ISA_EXT_i, - RISCV_ISA_EXT_m, - RISCV_ISA_EXT_SVPBMT, - RISCV_ISA_EXT_SSTC, + [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a, + [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c, + [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d, + [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f, + [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h, + [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, + [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, + + KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVINVAL), + KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(ZIHINTPAUSE), + KVM_ISA_EXT_ARR(ZICBOM), }; static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext) @@ -87,6 +96,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: case KVM_RISCV_ISA_EXT_SSTC: + case KVM_RISCV_ISA_EXT_SVINVAL: + case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: return false; default: break; @@ -254,6 +265,11 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu, case KVM_REG_RISCV_CONFIG_REG(isa): reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK; break; + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): + if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + return -EINVAL; + reg_val = riscv_cbom_block_size; + break; default: return -EINVAL; } @@ -311,6 +327,8 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, return -EOPNOTSUPP; } break; + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): + return -EOPNOTSUPP; default: return -EINVAL; } @@ -784,11 +802,15 @@ static void kvm_riscv_vcpu_update_config(const unsigned long *isa) { u64 henvcfg = 0; - if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SVPBMT)) + if (riscv_isa_extension_available(isa, SVPBMT)) henvcfg |= ENVCFG_PBMTE; - if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SSTC)) + if (riscv_isa_extension_available(isa, SSTC)) henvcfg |= ENVCFG_STCE; + + if (riscv_isa_extension_available(isa, ZICBOM)) + henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE); + csr_write(CSR_HENVCFG, henvcfg); #ifdef CONFIG_32BIT csr_write(CSR_HENVCFGH, henvcfg >> 32); @@ -958,7 +980,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_UNKNOWN; while (ret > 0) { /* Check conditions before entering the guest */ - cond_resched(); + ret = xfer_to_guest_mode_handle_work(vcpu); + if (!ret) + ret = 1; kvm_riscv_gstage_vmid_update(vcpu); @@ -967,15 +991,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) local_irq_disable(); /* - * Exit if we have a signal pending so that we can deliver - * the signal to user space. - */ - if (signal_pending(current)) { - ret = -EINTR; - run->exit_reason = KVM_EXIT_INTR; - } - - /* * Ensure we set mode to IN_GUEST_MODE after we disable * interrupts and before the final VCPU requests check. * See the comment in kvm_vcpu_exiting_guest_mode() and @@ -997,7 +1012,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (ret <= 0 || kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) || - kvm_request_pending(vcpu)) { + kvm_request_pending(vcpu) || + xfer_to_guest_mode_work_pending()) { vcpu->mode = OUTSIDE_GUEST_MODE; local_irq_enable(); kvm_vcpu_srcu_read_lock(vcpu); diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index d5c36386878a..c9f741ab26f5 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -8,6 +8,7 @@ #include <linux/kvm_host.h> #include <asm/csr.h> +#include <asm/insn-def.h> static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_cpu_trap *trap) @@ -62,11 +63,7 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, { register unsigned long taddr asm("a0") = (unsigned long)trap; register unsigned long ttmp asm("a1"); - register unsigned long val asm("t0"); - register unsigned long tmp asm("t1"); - register unsigned long addr asm("t2") = guest_addr; - unsigned long flags; - unsigned long old_stvec, old_hstatus; + unsigned long flags, val, tmp, old_stvec, old_hstatus; local_irq_save(flags); @@ -82,29 +79,19 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, ".option push\n" ".option norvc\n" "add %[ttmp], %[taddr], 0\n" - /* - * HLVX.HU %[val], (%[addr]) - * HLVX.HU t0, (t2) - * 0110010 00011 00111 100 00101 1110011 - */ - ".word 0x6433c2f3\n" + HLVX_HU(%[val], %[addr]) "andi %[tmp], %[val], 3\n" "addi %[tmp], %[tmp], -3\n" "bne %[tmp], zero, 2f\n" "addi %[addr], %[addr], 2\n" - /* - * HLVX.HU %[tmp], (%[addr]) - * HLVX.HU t1, (t2) - * 0110010 00011 00111 100 00110 1110011 - */ - ".word 0x6433c373\n" + HLVX_HU(%[tmp], %[addr]) "sll %[tmp], %[tmp], 16\n" "add %[val], %[val], %[tmp]\n" "2:\n" ".option pop" : [val] "=&r" (val), [tmp] "=&r" (tmp), [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp), - [addr] "+&r" (addr) : : "memory"); + [addr] "+&r" (guest_addr) : : "memory"); if (trap->scause == EXC_LOAD_PAGE_FAULT) trap->scause = EXC_INST_PAGE_FAULT; @@ -121,24 +108,14 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, ".option norvc\n" "add %[ttmp], %[taddr], 0\n" #ifdef CONFIG_64BIT - /* - * HLV.D %[val], (%[addr]) - * HLV.D t0, (t2) - * 0110110 00000 00111 100 00101 1110011 - */ - ".word 0x6c03c2f3\n" + HLV_D(%[val], %[addr]) #else - /* - * HLV.W %[val], (%[addr]) - * HLV.W t0, (t2) - * 0110100 00000 00111 100 00101 1110011 - */ - ".word 0x6803c2f3\n" + HLV_W(%[val], %[addr]) #endif ".option pop" : [val] "=&r" (val), [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp) - : [addr] "r" (addr) : "memory"); + : [addr] "r" (guest_addr) : "memory"); } csr_write(CSR_STVEC, old_stvec); diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c index 7eb90a47b571..0bb52761a3f7 100644 --- a/arch/riscv/kvm/vcpu_insn.c +++ b/arch/riscv/kvm/vcpu_insn.c @@ -191,7 +191,6 @@ void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu) kvm_vcpu_srcu_read_unlock(vcpu); kvm_vcpu_halt(vcpu); kvm_vcpu_srcu_read_lock(vcpu); - kvm_clear_request(KVM_REQ_UNHALT, vcpu); } } diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index e3f9bdf47c5f..b0add983530a 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -13,6 +13,8 @@ #include <asm/cacheflush.h> unsigned int riscv_cbom_block_size; +EXPORT_SYMBOL_GPL(riscv_cbom_block_size); + static bool noncoherent_supported; void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index f2fbd1400b7c..d86f7cebd4a7 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -184,7 +184,8 @@ static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) } break; case EXC_LOAD_PAGE_FAULT: - if (!(vma->vm_flags & VM_READ)) { + /* Write implies read */ + if (!(vma->vm_flags & (VM_READ | VM_WRITE))) { return true; } break; diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 4cb5d17e7ead..de6d8b2ea4d8 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -119,8 +119,6 @@ export KBUILD_CFLAGS_DECOMPRESSOR OBJCOPYFLAGS := -O binary -head-y := arch/s390/kernel/head64.o - libs-y += arch/s390/lib/ drivers-y += drivers/s390/ diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index bc48fe82d949..6e7f01ca53e6 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -10,11 +10,14 @@ #include <asm/sclp.h> #include <asm/diag.h> #include <asm/uv.h> +#include <asm/abs_lowcore.h> #include "decompressor.h" #include "boot.h" #include "uv.h" unsigned long __bootdata_preserved(__kaslr_offset); +unsigned long __bootdata_preserved(__abs_lowcore); +unsigned long __bootdata_preserved(__memcpy_real_area); unsigned long __bootdata(__amode31_base); unsigned long __bootdata_preserved(VMALLOC_START); unsigned long __bootdata_preserved(VMALLOC_END); @@ -180,7 +183,10 @@ static void setup_kernel_memory_layout(void) /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); #endif - MODULES_END = vmax; + __memcpy_real_area = round_down(vmax - PAGE_SIZE, PAGE_SIZE); + __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, + sizeof(struct lowcore)); + MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE); MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; diff --git a/arch/s390/boot/version.c b/arch/s390/boot/version.c index d32e58bdda6a..fd32f038777f 100644 --- a/arch/s390/boot/version.c +++ b/arch/s390/boot/version.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <generated/utsversion.h> #include <generated/utsrelease.h> #include <generated/compile.h> #include "boot.h" diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig new file mode 100644 index 000000000000..06ee706b0d78 --- /dev/null +++ b/arch/s390/crypto/Kconfig @@ -0,0 +1,135 @@ +# SPDX-License-Identifier: GPL-2.0 + +menu "Accelerated Cryptographic Algorithms for CPU (s390)" + +config CRYPTO_CRC32_S390 + tristate "CRC32c and CRC32" + depends on S390 + select CRYPTO_HASH + select CRC32 + help + CRC32c and CRC32 CRC algorithms + + Architecture: s390 + + It is available with IBM z13 or later. + +config CRYPTO_SHA512_S390 + tristate "Hash functions: SHA-384 and SHA-512" + depends on S390 + select CRYPTO_HASH + help + SHA-384 and SHA-512 secure hash algorithms (FIPS 180) + + Architecture: s390 + + It is available as of z10. + +config CRYPTO_SHA1_S390 + tristate "Hash functions: SHA-1" + depends on S390 + select CRYPTO_HASH + help + SHA-1 secure hash algorithm (FIPS 180) + + Architecture: s390 + + It is available as of z990. + +config CRYPTO_SHA256_S390 + tristate "Hash functions: SHA-224 and SHA-256" + depends on S390 + select CRYPTO_HASH + help + SHA-224 and SHA-256 secure hash algorithms (FIPS 180) + + Architecture: s390 + + It is available as of z9. + +config CRYPTO_SHA3_256_S390 + tristate "Hash functions: SHA3-224 and SHA3-256" + depends on S390 + select CRYPTO_HASH + help + SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202) + + Architecture: s390 + + It is available as of z14. + +config CRYPTO_SHA3_512_S390 + tristate "Hash functions: SHA3-384 and SHA3-512" + depends on S390 + select CRYPTO_HASH + help + SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202) + + Architecture: s390 + + It is available as of z14. + +config CRYPTO_GHASH_S390 + tristate "Hash functions: GHASH" + depends on S390 + select CRYPTO_HASH + help + GCM GHASH hash function (NIST SP800-38D) + + Architecture: s390 + + It is available as of z196. + +config CRYPTO_AES_S390 + tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM" + depends on S390 + select CRYPTO_ALGAPI + select CRYPTO_SKCIPHER + help + Block cipher: AES cipher algorithms (FIPS 197) + AEAD cipher: AES with GCM + Length-preserving ciphers: AES with ECB, CBC, XTS, and CTR modes + + Architecture: s390 + + As of z9 the ECB and CBC modes are hardware accelerated + for 128 bit keys. + + As of z10 the ECB and CBC modes are hardware accelerated + for all AES key sizes. + + As of z196 the CTR mode is hardware accelerated for all AES + key sizes and XTS mode is hardware accelerated for 256 and + 512 bit keys. + +config CRYPTO_DES_S390 + tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR" + depends on S390 + select CRYPTO_ALGAPI + select CRYPTO_SKCIPHER + select CRYPTO_LIB_DES + help + Block ciphers: DES (FIPS 46-2) cipher algorithm + Block ciphers: Triple DES EDE (FIPS 46-3) cipher algorithm + Length-preserving ciphers: DES with ECB, CBC, and CTR modes + Length-preserving ciphers: Triple DES EDED with ECB, CBC, and CTR modes + + Architecture: s390 + + As of z990 the ECB and CBC mode are hardware accelerated. + As of z196 the CTR mode is hardware accelerated. + +config CRYPTO_CHACHA_S390 + tristate "Ciphers: ChaCha20" + depends on S390 + select CRYPTO_SKCIPHER + select CRYPTO_LIB_CHACHA_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CHACHA + help + Length-preserving cipher: ChaCha20 stream cipher (RFC 7539) + + Architecture: s390 + + It is available as of z13. + +endmenu diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h new file mode 100644 index 000000000000..4c61b14ee928 --- /dev/null +++ b/arch/s390/include/asm/abs_lowcore.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_ABS_LOWCORE_H +#define _ASM_S390_ABS_LOWCORE_H + +#include <asm/lowcore.h> + +#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore)) + +extern unsigned long __abs_lowcore; +extern bool abs_lowcore_mapped; + +struct lowcore *get_abs_lowcore(unsigned long *flags); +void put_abs_lowcore(struct lowcore *lc, unsigned long flags); +int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc); +void abs_lowcore_unmap(int cpu); + +#endif /* _ASM_S390_ABS_LOWCORE_H */ diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index d4e90f2ba77e..bd1596810cc1 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -214,7 +214,6 @@ extern struct ccw_device *ccw_device_create_console(struct ccw_driver *); extern void ccw_device_destroy_console(struct ccw_device *); extern int ccw_device_enable_console(struct ccw_device *); extern void ccw_device_wait_idle(struct ccw_device *); -extern int ccw_device_force_console(struct ccw_device *); extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size); extern void ccw_device_dma_free(struct ccw_device *cdev, diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index 267a8f88e143..adf7d8cdac7e 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -95,7 +95,8 @@ union ctlreg0 { Interruption-Filtering Override */ unsigned long : 3; unsigned long ccc : 1; /* Cryptography counter control */ - unsigned long : 18; + unsigned long pec : 1; /* PAI extension control */ + unsigned long : 17; unsigned long : 3; unsigned long lap : 1; /* Low-address-protection control */ unsigned long : 4; diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 26fe5e535728..8aa1f6530a3e 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -203,7 +203,9 @@ struct lowcore { __u8 pad_0x1400[0x1500-0x1400]; /* 0x1400 */ /* Cryptography-counter designation */ __u64 ccd; /* 0x1500 */ - __u8 pad_0x1508[0x1800-0x1508]; /* 0x1508 */ + /* AI-extension counter designation */ + __u64 aicd; /* 0x1508 */ + __u8 pad_0x1510[0x1800-0x1510]; /* 0x1510 */ /* Transaction abort diagnostic block */ struct pgm_tdb pgm_tdb; /* 0x1800 */ diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h new file mode 100644 index 000000000000..c7fa838cf6b9 --- /dev/null +++ b/arch/s390/include/asm/maccess.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_S390_MACCESS_H +#define __ASM_S390_MACCESS_H + +#include <linux/types.h> + +struct iov_iter; + +extern unsigned long __memcpy_real_area; +void memcpy_real_init(void); +size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count); +int memcpy_real(void *dest, unsigned long src, size_t count); +#ifdef CONFIG_CRASH_DUMP +int copy_oldmem_kernel(void *dst, unsigned long src, size_t count); +#endif + +#endif /* __ASM_S390_MACCESS_H */ diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h index 85248d8fee0c..0d1c74a7a650 100644 --- a/arch/s390/include/asm/os_info.h +++ b/arch/s390/include/asm/os_info.h @@ -41,20 +41,6 @@ u32 os_info_csum(struct os_info *os_info); #ifdef CONFIG_CRASH_DUMP void *os_info_old_entry(int nr, unsigned long *size); -size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count); - -static inline int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) -{ - struct iov_iter iter; - struct kvec kvec; - - kvec.iov_base = dst; - kvec.iov_len = count; - iov_iter_kvec(&iter, WRITE, &kvec, 1, count); - if (copy_oldmem_iter(&iter, src, count) < count) - return -EFAULT; - return 0; -} #else static inline void *os_info_old_entry(int nr, unsigned long *size) { diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h index 5b7e33ac6f0b..1a8a6b15d121 100644 --- a/arch/s390/include/asm/pai.h +++ b/arch/s390/include/asm/pai.h @@ -17,7 +17,9 @@ struct qpaci_info_block { struct { u64 : 8; u64 num_cc : 8; /* # of supported crypto counters */ - u64 : 48; + u64 : 9; + u64 num_nnpa : 7; /* # of supported NNPA counters */ + u64 : 32; }; }; @@ -42,6 +44,8 @@ static inline int qpaci(struct qpaci_info_block *info) #define PAI_CRYPTO_BASE 0x1000 /* First event number */ #define PAI_CRYPTO_MAXCTR 256 /* Max # of event counters */ #define PAI_CRYPTO_KERNEL_OFFSET 2048 +#define PAI_NNPA_BASE 0x1800 /* First event number */ +#define PAI_NNPA_MAXCTR 128 /* Max # of event counters */ DECLARE_STATIC_KEY_FALSE(pai_key); diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 7b4cdadbc023..108e732d7b14 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -117,7 +117,6 @@ struct zpci_bus { struct zpci_dev { struct zpci_bus *zbus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ - struct list_head bus_next; struct kref kref; struct hotplug_slot hotplug_slot; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index f019df19884d..f1cb9391190d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1777,6 +1777,10 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) extern int vmem_add_mapping(unsigned long start, unsigned long size); extern void vmem_remove_mapping(unsigned long start, unsigned long size); +extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc); +extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot); +extern void vmem_unmap_4k_page(unsigned long addr); +extern pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc); extern int s390_enable_sie(void); extern int s390_enable_skey(void); extern void s390_reset_cmma(struct mm_struct *mm); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index bd66f8e34949..87be3e855bf7 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -186,9 +186,6 @@ struct pt_regs; void show_registers(struct pt_regs *regs); void show_cacheinfo(struct seq_file *m); -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *tsk) { } - /* Free guarded storage control block */ void guarded_storage_release(struct task_struct *tsk); void gs_load_bc_cb(struct pt_regs *regs); @@ -306,23 +303,6 @@ static __always_inline void __noreturn disabled_wait(void) #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL -extern int memcpy_real(void *, unsigned long, size_t); -extern void memcpy_absolute(void *, void *, size_t); - -#define put_abs_lowcore(member, x) do { \ - unsigned long __abs_address = offsetof(struct lowcore, member); \ - __typeof__(((struct lowcore *)0)->member) __tmp = (x); \ - \ - memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp)); \ -} while (0) - -#define get_abs_lowcore(x, member) do { \ - unsigned long __abs_address = offsetof(struct lowcore, member); \ - __typeof__(((struct lowcore *)0)->member) *__ptr = &(x); \ - \ - memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr)); \ -} while (0) - extern int s390_isolate_bp(void); extern int s390_isolate_bp_guest(void); diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 7f5d4763357b..73ed2781073b 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -30,7 +30,8 @@ extern void smp_emergency_stop(void); extern int smp_find_processor_id(u16 address); extern int smp_store_status(int cpu); -extern void smp_save_dump_cpus(void); +extern void smp_save_dump_ipl_cpu(void); +extern void smp_save_dump_secondary_cpus(void); extern void smp_yield_cpu(int cpu); extern void smp_cpu_set_polarization(int cpu, int val); extern int smp_cpu_get_polarization(int cpu); @@ -58,6 +59,7 @@ static inline void smp_cpus_done(unsigned int max_cpus) { } +extern int smp_reinit_ipl_cpu(void); extern int smp_rescan_cpus(void); extern void __noreturn cpu_die(void); extern void __cpu_die(unsigned int cpu); diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h deleted file mode 100644 index 46fa3020b41e..000000000000 --- a/arch/s390/include/asm/termios.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * S390 version - * - * Derived from "include/asm-i386/termios.h" - */ -#ifndef _S390_TERMIOS_H -#define _S390_TERMIOS_H - -#include <uapi/asm/termios.h> - - -/* intr=^C quit=^\ erase=del kill=^U - eof=^D vtime=\0 vmin=\1 sxtc=\0 - start=^Q stop=^S susp=^Z eol=\0 - reprint=^R discard=^U werase=^W lnext=^V - eol2=\0 -*/ -#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" - -#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2)) -#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2)) - -#include <asm-generic/termios-base.h> - -#endif /* _S390_TERMIOS_H */ diff --git a/arch/s390/include/uapi/asm/termios.h b/arch/s390/include/uapi/asm/termios.h deleted file mode 100644 index 54223169c806..000000000000 --- a/arch/s390/include/uapi/asm/termios.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * S390 version - * - * Derived from "include/asm-i386/termios.h" - */ - -#ifndef _UAPI_S390_TERMIOS_H -#define _UAPI_S390_TERMIOS_H - -#include <asm/termbits.h> -#include <asm/ioctls.h> - -struct winsize { - unsigned short ws_row; - unsigned short ws_col; - unsigned short ws_xpixel; - unsigned short ws_ypixel; -}; - -#define NCC 8 -struct termio { - unsigned short c_iflag; /* input mode flags */ - unsigned short c_oflag; /* output mode flags */ - unsigned short c_cflag; /* control mode flags */ - unsigned short c_lflag; /* local mode flags */ - unsigned char c_line; /* line discipline */ - unsigned char c_cc[NCC]; /* control characters */ -}; - -/* modem lines */ -#define TIOCM_LE 0x001 -#define TIOCM_DTR 0x002 -#define TIOCM_RTS 0x004 -#define TIOCM_ST 0x008 -#define TIOCM_SR 0x010 -#define TIOCM_CTS 0x020 -#define TIOCM_CAR 0x040 -#define TIOCM_RNG 0x080 -#define TIOCM_DSR 0x100 -#define TIOCM_CD TIOCM_CAR -#define TIOCM_RI TIOCM_RNG -#define TIOCM_OUT1 0x2000 -#define TIOCM_OUT2 0x4000 -#define TIOCM_LOOP 0x8000 - -/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ - - -#endif /* _UAPI_S390_TERMIOS_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 3cbfa9fddd9a..5e6a23299790 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -33,16 +33,16 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls CFLAGS_dumpstack.o += -fno-optimize-sibling-calls CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls -obj-y := traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o +obj-y := head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o -obj-y += smp.o text_amode31.o stacktrace.o +obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o -extra-y += head64.o vmlinux.lds +extra-y += vmlinux.lds obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) @@ -72,7 +72,7 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o -obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o +obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o obj-$(CONFIG_TRACEPOINTS) += trace.o obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c new file mode 100644 index 000000000000..fb92e8ed0525 --- /dev/null +++ b/arch/s390/kernel/abs_lowcore.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/pgtable.h> +#include <asm/abs_lowcore.h> + +#define ABS_LOWCORE_UNMAPPED 1 +#define ABS_LOWCORE_LAP_ON 2 +#define ABS_LOWCORE_IRQS_ON 4 + +unsigned long __bootdata_preserved(__abs_lowcore); +bool __ro_after_init abs_lowcore_mapped; + +int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) +{ + unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore)); + unsigned long phys = __pa(lc); + int rc, i; + + for (i = 0; i < LC_PAGES; i++) { + rc = __vmem_map_4k_page(addr, phys, PAGE_KERNEL, alloc); + if (rc) { + /* + * Do not unmap allocated page tables in case the + * allocation was not requested. In such a case the + * request is expected coming from an atomic context, + * while the unmap attempt might sleep. + */ + if (alloc) { + for (--i; i >= 0; i--) { + addr -= PAGE_SIZE; + vmem_unmap_4k_page(addr); + } + } + return rc; + } + addr += PAGE_SIZE; + phys += PAGE_SIZE; + } + return 0; +} + +void abs_lowcore_unmap(int cpu) +{ + unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore)); + int i; + + for (i = 0; i < LC_PAGES; i++) { + vmem_unmap_4k_page(addr); + addr += PAGE_SIZE; + } +} + +struct lowcore *get_abs_lowcore(unsigned long *flags) +{ + unsigned long irq_flags; + union ctlreg0 cr0; + int cpu; + + *flags = 0; + cpu = get_cpu(); + if (abs_lowcore_mapped) { + return ((struct lowcore *)__abs_lowcore) + cpu; + } else { + if (cpu != 0) + panic("Invalid unmapped absolute lowcore access\n"); + local_irq_save(irq_flags); + if (!irqs_disabled_flags(irq_flags)) + *flags |= ABS_LOWCORE_IRQS_ON; + __ctl_store(cr0.val, 0, 0); + if (cr0.lap) { + *flags |= ABS_LOWCORE_LAP_ON; + __ctl_clear_bit(0, 28); + } + *flags |= ABS_LOWCORE_UNMAPPED; + return lowcore_ptr[0]; + } +} + +void put_abs_lowcore(struct lowcore *lc, unsigned long flags) +{ + if (abs_lowcore_mapped) { + if (flags) + panic("Invalid mapped absolute lowcore release\n"); + } else { + if (smp_processor_id() != 0) + panic("Invalid mapped absolute lowcore access\n"); + if (!(flags & ABS_LOWCORE_UNMAPPED)) + panic("Invalid unmapped absolute lowcore release\n"); + if (flags & ABS_LOWCORE_LAP_ON) + __ctl_set_bit(0, 28); + if (flags & ABS_LOWCORE_IRQS_ON) + local_irq_enable(); + } + put_cpu(); +} diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index bad8f47fc5d6..dd74fe664ed1 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -21,6 +21,7 @@ #include <asm/elf.h> #include <asm/ipl.h> #include <asm/sclp.h> +#include <asm/maccess.h> #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) @@ -53,8 +54,6 @@ struct save_area { }; static LIST_HEAD(dump_save_areas); -static DEFINE_MUTEX(memcpy_real_mutex); -static char memcpy_real_buf[PAGE_SIZE]; /* * Allocate a save area @@ -116,27 +115,7 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs) memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128)); } -static size_t copy_to_iter_real(struct iov_iter *iter, unsigned long src, size_t count) -{ - size_t len, copied, res = 0; - - mutex_lock(&memcpy_real_mutex); - while (count) { - len = min(PAGE_SIZE, count); - if (memcpy_real(memcpy_real_buf, src, len)) - break; - copied = copy_to_iter(memcpy_real_buf, len, iter); - count -= copied; - src += copied; - res += copied; - if (copied < len) - break; - } - mutex_unlock(&memcpy_real_mutex); - return res; -} - -size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count) +static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count) { size_t len, copied, res = 0; @@ -156,7 +135,7 @@ size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count) } else { len = count; } - copied = copy_to_iter_real(iter, src, len); + copied = memcpy_real_iter(iter, src, len); } count -= copied; src += copied; @@ -167,6 +146,19 @@ size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count) return res; } +int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) +{ + struct iov_iter iter; + struct kvec kvec; + + kvec.iov_base = dst; + kvec.iov_len = count; + iov_iter_kvec(&iter, WRITE, &kvec, 1, count); + if (copy_oldmem_iter(&iter, src, count) < count) + return -EFAULT; + return 0; +} + /* * Copy one page from "oldmem" */ diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 4331c7e6e1c0..d7a82066a638 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -250,7 +250,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area, rc->level = level; rc->buf_size = buf_size; rc->entry_size = sizeof(debug_entry_t) + buf_size; - strlcpy(rc->name, name, sizeof(rc->name)); + strscpy(rc->name, name, sizeof(rc->name)); memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *)); refcount_set(&(rc->ref_count), 0); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 432c8c987256..6030fdd6997b 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -267,7 +267,7 @@ char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; static void __init setup_boot_command_line(void) { /* copy arch command line */ - strlcpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE); + strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE); } static void __init check_image_bootable(void) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 1cc85b8ff42e..325cbf69ebbd 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -29,6 +29,7 @@ #include <asm/sclp.h> #include <asm/checksum.h> #include <asm/debug.h> +#include <asm/abs_lowcore.h> #include <asm/os_info.h> #include <asm/sections.h> #include <asm/boot_data.h> @@ -1642,12 +1643,16 @@ static struct shutdown_action __refdata dump_action = { static void dump_reipl_run(struct shutdown_trigger *trigger) { unsigned long ipib = (unsigned long) reipl_block_actual; + struct lowcore *abs_lc; + unsigned long flags; unsigned int csum; csum = (__force unsigned int) csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); - put_abs_lowcore(ipib, ipib); - put_abs_lowcore(ipib_checksum, csum); + abs_lc = get_abs_lowcore(&flags); + abs_lc->ipib = ipib; + abs_lc->ipib_checksum = csum; + put_abs_lowcore(abs_lc, flags); dump_run(trigger); } diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index ab761c008f98..4579b42286d5 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -21,6 +21,7 @@ #include <asm/elf.h> #include <asm/asm-offsets.h> #include <asm/cacheflush.h> +#include <asm/abs_lowcore.h> #include <asm/os_info.h> #include <asm/set_memory.h> #include <asm/stacktrace.h> @@ -222,13 +223,18 @@ void machine_kexec_cleanup(struct kimage *image) void arch_crash_save_vmcoreinfo(void) { + struct lowcore *abs_lc; + unsigned long flags; + VMCOREINFO_SYMBOL(lowcore_ptr); VMCOREINFO_SYMBOL(high_memory); VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31); vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); - put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note()); + abs_lc = get_abs_lowcore(&flags); + abs_lc->vmcore_info = paddr_vmcoreinfo_note(); + put_abs_lowcore(abs_lc, flags); } void machine_shutdown(void) diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 1acc2e05d70f..ec0bd9457e90 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -13,8 +13,9 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <asm/checksum.h> -#include <asm/lowcore.h> +#include <asm/abs_lowcore.h> #include <asm/os_info.h> +#include <asm/maccess.h> #include <asm/asm-offsets.h> /* @@ -57,13 +58,16 @@ void os_info_entry_add(int nr, void *ptr, u64 size) */ void __init os_info_init(void) { - void *ptr = &os_info; + struct lowcore *abs_lc; + unsigned long flags; os_info.version_major = OS_INFO_VERSION_MAJOR; os_info.version_minor = OS_INFO_VERSION_MINOR; os_info.magic = OS_INFO_MAGIC; os_info.csum = os_info_csum(&os_info); - put_abs_lowcore(os_info, __pa(ptr)); + abs_lc = get_abs_lowcore(&flags); + abs_lc->os_info = __pa(&os_info); + put_abs_lowcore(abs_lc, flags); } #ifdef CONFIG_CRASH_DUMP diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index f7dd3c849e68..f043a7ff220b 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -664,6 +664,7 @@ static int cfdiag_push_sample(struct perf_event *event, raw.frag.data = cpuhw->stop; raw.size = raw.frag.size; data.raw = &raw; + data.sample_flags |= PERF_SAMPLE_RAW; } overflow = perf_event_overflow(event, &data, ®s); diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index b38b4ae01589..6826e2a69a21 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -366,6 +366,7 @@ static int paicrypt_push_sample(void) raw.frag.data = cpump->save; raw.size = raw.frag.size; data.raw = &raw; + data.sample_flags |= PERF_SAMPLE_RAW; } overflow = perf_event_overflow(event, &data, ®s); diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c new file mode 100644 index 000000000000..d5c7c1e30c17 --- /dev/null +++ b/arch/s390/kernel/perf_pai_ext.c @@ -0,0 +1,671 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Performance event support - Processor Activity Instrumentation Extension + * Facility + * + * Copyright IBM Corp. 2022 + * Author(s): Thomas Richter <tmricht@linux.ibm.com> + */ +#define KMSG_COMPONENT "pai_ext" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/export.h> +#include <linux/io.h> + +#include <asm/cpu_mcf.h> +#include <asm/ctl_reg.h> +#include <asm/pai.h> +#include <asm/debug.h> + +#define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */ +#define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */ + +static debug_info_t *paiext_dbg; +static unsigned int paiext_cnt; /* Extracted with QPACI instruction */ + +enum paiext_mode { + PAI_MODE_NONE, + PAI_MODE_SAMPLING, + PAI_MODE_COUNTER, +}; + +struct pai_userdata { + u16 num; + u64 value; +} __packed; + +/* Create the PAI extension 1 control block area. + * The PAI extension control block 1 is pointed to by lowcore + * address 0x1508 for each CPU. This control block is 512 bytes in size + * and requires a 512 byte boundary alignment. + */ +struct paiext_cb { /* PAI extension 1 control block */ + u64 header; /* Not used */ + u64 reserved1; + u64 acc; /* Addr to analytics counter control block */ + u8 reserved2[488]; +} __packed; + +struct paiext_map { + unsigned long *area; /* Area for CPU to store counters */ + struct pai_userdata *save; /* Area to store non-zero counters */ + enum paiext_mode mode; /* Type of event */ + unsigned int active_events; /* # of PAI Extension users */ + unsigned int refcnt; + struct perf_event *event; /* Perf event for sampling */ + struct paiext_cb *paiext_cb; /* PAI extension control block area */ +}; + +struct paiext_mapptr { + struct paiext_map *mapptr; +}; + +static struct paiext_root { /* Anchor to per CPU data */ + int refcnt; /* Overall active events */ + struct paiext_mapptr __percpu *mapptr; +} paiext_root; + +/* Free per CPU data when the last event is removed. */ +static void paiext_root_free(void) +{ + if (!--paiext_root.refcnt) { + free_percpu(paiext_root.mapptr); + paiext_root.mapptr = NULL; + } +} + +/* On initialization of first event also allocate per CPU data dynamically. + * Start with an array of pointers, the array size is the maximum number of + * CPUs possible, which might be larger than the number of CPUs currently + * online. + */ +static int paiext_root_alloc(void) +{ + if (++paiext_root.refcnt == 1) { + /* The memory is already zeroed. */ + paiext_root.mapptr = alloc_percpu(struct paiext_mapptr); + if (!paiext_root.mapptr) { + /* Returing without refcnt adjustment is ok. The + * error code is handled by paiext_alloc() which + * decrements refcnt when an event can not be + * created. + */ + return -ENOMEM; + } + } + return 0; +} + +/* Protects against concurrent increment of sampler and counter member + * increments at the same time and prohibits concurrent execution of + * counting and sampling events. + * Ensures that analytics counter block is deallocated only when the + * sampling and counting on that cpu is zero. + * For details see paiext_alloc(). + */ +static DEFINE_MUTEX(paiext_reserve_mutex); + +/* Free all memory allocated for event counting/sampling setup */ +static void paiext_free(struct paiext_mapptr *mp) +{ + kfree(mp->mapptr->area); + kfree(mp->mapptr->paiext_cb); + kvfree(mp->mapptr->save); + kfree(mp->mapptr); + mp->mapptr = NULL; +} + +/* Release the PMU if event is the last perf event */ +static void paiext_event_destroy(struct perf_event *event) +{ + struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); + struct paiext_map *cpump = mp->mapptr; + + mutex_lock(&paiext_reserve_mutex); + cpump->event = NULL; + if (!--cpump->refcnt) /* Last reference gone */ + paiext_free(mp); + paiext_root_free(); + mutex_unlock(&paiext_reserve_mutex); + debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__, + event->cpu, mp->mapptr); + +} + +/* Used to avoid races in checking concurrent access of counting and + * sampling for pai_extension events. + * + * Only one instance of event pai_ext/NNPA_ALL/ for sampling is + * allowed and when this event is running, no counting event is allowed. + * Several counting events are allowed in parallel, but no sampling event + * is allowed while one (or more) counting events are running. + * + * This function is called in process context and it is safe to block. + * When the event initialization functions fails, no other call back will + * be invoked. + * + * Allocate the memory for the event. + */ +static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) +{ + struct paiext_mapptr *mp; + struct paiext_map *cpump; + int rc; + + mutex_lock(&paiext_reserve_mutex); + + rc = paiext_root_alloc(); + if (rc) + goto unlock; + + mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); + cpump = mp->mapptr; + if (!cpump) { /* Paiext_map allocated? */ + rc = -ENOMEM; + cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); + if (!cpump) + goto unlock; + + /* Allocate memory for counter area and counter extraction. + * These are + * - a 512 byte block and requires 512 byte boundary alignment. + * - a 1KB byte block and requires 1KB boundary alignment. + * Only the first counting event has to allocate the area. + * + * Note: This works with commit 59bb47985c1d by default. + * Backporting this to kernels without this commit might + * need adjustment. + */ + mp->mapptr = cpump; + cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL); + cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL); + cpump->save = kvmalloc_array(paiext_cnt + 1, + sizeof(struct pai_userdata), + GFP_KERNEL); + if (!cpump->save || !cpump->area || !cpump->paiext_cb) { + paiext_free(mp); + goto unlock; + } + cpump->mode = a->sample_period ? PAI_MODE_SAMPLING + : PAI_MODE_COUNTER; + } else { + /* Multiple invocation, check whats active. + * Supported are multiple counter events or only one sampling + * event concurrently at any one time. + */ + if (cpump->mode == PAI_MODE_SAMPLING || + (cpump->mode == PAI_MODE_COUNTER && a->sample_period)) { + rc = -EBUSY; + goto unlock; + } + } + + rc = 0; + cpump->event = event; + ++cpump->refcnt; + +unlock: + if (rc) { + /* Error in allocation of event, decrement anchor. Since + * the event in not created, its destroy() function is never + * invoked. Adjust the reference counter for the anchor. + */ + paiext_root_free(); + } + mutex_unlock(&paiext_reserve_mutex); + /* If rc is non-zero, no increment of counter/sampler was done. */ + return rc; +} + +/* The PAI extension 1 control block supports up to 128 entries. Return + * the index within PAIE1_CB given the event number. Also validate event + * number. + */ +static int paiext_event_valid(struct perf_event *event) +{ + u64 cfg = event->attr.config; + + if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) { + /* Offset NNPA in paiext_cb */ + event->hw.config_base = offsetof(struct paiext_cb, acc); + return 0; + } + return -EINVAL; +} + +/* Might be called on different CPU than the one the event is intended for. */ +static int paiext_event_init(struct perf_event *event) +{ + struct perf_event_attr *a = &event->attr; + int rc; + + /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */ + if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) + return -ENOENT; + /* PAI extension event must be valid and in supported range */ + rc = paiext_event_valid(event); + if (rc) + return rc; + /* Allow only CPU wide operation, no process context for now. */ + if (event->hw.target || event->cpu == -1) + return -ENOENT; + /* Allow only event NNPA_ALL for sampling. */ + if (a->sample_period && a->config != PAI_NNPA_BASE) + return -EINVAL; + /* Prohibit exclude_user event selection */ + if (a->exclude_user) + return -EINVAL; + + rc = paiext_alloc(a, event); + if (rc) + return rc; + event->hw.last_tag = 0; + event->destroy = paiext_event_destroy; + + if (a->sample_period) { + a->sample_period = 1; + a->freq = 0; + /* Register for paicrypt_sched_task() to be called */ + event->attach_state |= PERF_ATTACH_SCHED_CB; + /* Add raw data which are the memory mapped counters */ + a->sample_type |= PERF_SAMPLE_RAW; + /* Turn off inheritance */ + a->inherit = 0; + } + + return 0; +} + +static u64 paiext_getctr(struct paiext_map *cpump, int nr) +{ + return cpump->area[nr]; +} + +/* Read the counter values. Return value from location in buffer. For event + * NNPA_ALL sum up all events. + */ +static u64 paiext_getdata(struct perf_event *event) +{ + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); + struct paiext_map *cpump = mp->mapptr; + u64 sum = 0; + int i; + + if (event->attr.config != PAI_NNPA_BASE) + return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE); + + for (i = 1; i <= paiext_cnt; i++) + sum += paiext_getctr(cpump, i); + + return sum; +} + +static u64 paiext_getall(struct perf_event *event) +{ + return paiext_getdata(event); +} + +static void paiext_read(struct perf_event *event) +{ + u64 prev, new, delta; + + prev = local64_read(&event->hw.prev_count); + new = paiext_getall(event); + local64_set(&event->hw.prev_count, new); + delta = new - prev; + local64_add(delta, &event->count); +} + +static void paiext_start(struct perf_event *event, int flags) +{ + u64 sum; + + if (event->hw.last_tag) + return; + event->hw.last_tag = 1; + sum = paiext_getall(event); /* Get current value */ + local64_set(&event->hw.prev_count, sum); + local64_set(&event->count, 0); +} + +static int paiext_add(struct perf_event *event, int flags) +{ + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); + struct paiext_map *cpump = mp->mapptr; + struct paiext_cb *pcb = cpump->paiext_cb; + + if (++cpump->active_events == 1) { + S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb); + pcb->acc = virt_to_phys(cpump->area) | 0x1; + /* Enable CPU instruction lookup for PAIE1 control block */ + __ctl_set_bit(0, 49); + debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", + __func__, S390_lowcore.aicd, pcb->acc); + } + if (flags & PERF_EF_START && !event->attr.sample_period) { + /* Only counting needs initial counter value */ + paiext_start(event, PERF_EF_RELOAD); + } + event->hw.state = 0; + if (event->attr.sample_period) { + cpump->event = event; + perf_sched_cb_inc(event->pmu); + } + return 0; +} + +static void paiext_stop(struct perf_event *event, int flags) +{ + paiext_read(event); + event->hw.state = PERF_HES_STOPPED; +} + +static void paiext_del(struct perf_event *event, int flags) +{ + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); + struct paiext_map *cpump = mp->mapptr; + struct paiext_cb *pcb = cpump->paiext_cb; + + if (event->attr.sample_period) + perf_sched_cb_dec(event->pmu); + if (!event->attr.sample_period) { + /* Only counting needs to read counter */ + paiext_stop(event, PERF_EF_UPDATE); + } + if (--cpump->active_events == 0) { + /* Disable CPU instruction lookup for PAIE1 control block */ + __ctl_clear_bit(0, 49); + pcb->acc = 0; + S390_lowcore.aicd = 0; + debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", + __func__, S390_lowcore.aicd, pcb->acc); + } +} + +/* Create raw data and save it in buffer. Returns number of bytes copied. + * Saves only positive counter entries of the form + * 2 bytes: Number of counter + * 8 bytes: Value of counter + */ +static size_t paiext_copy(struct paiext_map *cpump) +{ + struct pai_userdata *userdata = cpump->save; + int i, outidx = 0; + + for (i = 1; i <= paiext_cnt; i++) { + u64 val = paiext_getctr(cpump, i); + + if (val) { + userdata[outidx].num = i; + userdata[outidx].value = val; + outidx++; + } + } + return outidx * sizeof(*userdata); +} + +/* Write sample when one or more counters values are nonzero. + * + * Note: The function paiext_sched_task() and paiext_push_sample() are not + * invoked after function paiext_del() has been called because of function + * perf_sched_cb_dec(). + * The function paiext_sched_task() and paiext_push_sample() are only + * called when sampling is active. Function perf_sched_cb_inc() + * has been invoked to install function paiext_sched_task() as call back + * to run at context switch time (see paiext_add()). + * + * This causes function perf_event_context_sched_out() and + * perf_event_context_sched_in() to check whether the PMU has installed an + * sched_task() callback. That callback is not active after paiext_del() + * returns and has deleted the event on that CPU. + */ +static int paiext_push_sample(void) +{ + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); + struct paiext_map *cpump = mp->mapptr; + struct perf_event *event = cpump->event; + struct perf_sample_data data; + struct perf_raw_record raw; + struct pt_regs regs; + size_t rawsize; + int overflow; + + rawsize = paiext_copy(cpump); + if (!rawsize) /* No incremented counters */ + return 0; + + /* Setup perf sample */ + memset(®s, 0, sizeof(regs)); + memset(&raw, 0, sizeof(raw)); + memset(&data, 0, sizeof(data)); + perf_sample_data_init(&data, 0, event->hw.last_period); + if (event->attr.sample_type & PERF_SAMPLE_TID) { + data.tid_entry.pid = task_tgid_nr(current); + data.tid_entry.tid = task_pid_nr(current); + } + if (event->attr.sample_type & PERF_SAMPLE_TIME) + data.time = event->clock(); + if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) + data.id = event->id; + if (event->attr.sample_type & PERF_SAMPLE_CPU) + data.cpu_entry.cpu = smp_processor_id(); + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw.frag.size = rawsize; + raw.frag.data = cpump->save; + raw.size = raw.frag.size; + data.raw = &raw; + } + + overflow = perf_event_overflow(event, &data, ®s); + perf_event_update_userpage(event); + /* Clear lowcore area after read */ + memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ); + return overflow; +} + +/* Called on schedule-in and schedule-out. No access to event structure, + * but for sampling only event NNPA_ALL is allowed. + */ +static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + /* We started with a clean page on event installation. So read out + * results on schedule_out and if page was dirty, clear values. + */ + if (!sched_in) + paiext_push_sample(); +} + +/* Attribute definitions for pai extension1 interface. As with other CPU + * Measurement Facilities, there is one attribute per mapped counter. + * The number of mapped counters may vary per machine generation. Use + * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction + * to determine the number of mapped counters. The instructions returns + * a positive number, which is the highest number of supported counters. + * All counters less than this number are also supported, there are no + * holes. A returned number of zero means no support for mapped counters. + * + * The identification of the counter is a unique number. The chosen range + * is 0x1800 + offset in mapped kernel page. + * All CPU Measurement Facility counters identifiers must be unique and + * the numbers from 0 to 496 are already used for the CPU Measurement + * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography + * counters. + * Numbers 0xb0000, 0xbc000 and 0xbd000 are already + * used for the CPU Measurement Sampling facility. + */ +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *paiext_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group paiext_events_group = { + .name = "events", + .attrs = NULL, /* Filled in attr_event_init() */ +}; + +static struct attribute_group paiext_format_group = { + .name = "format", + .attrs = paiext_format_attr, +}; + +static const struct attribute_group *paiext_attr_groups[] = { + &paiext_events_group, + &paiext_format_group, + NULL, +}; + +/* Performance monitoring unit for mapped counters */ +static struct pmu paiext = { + .task_ctx_nr = perf_invalid_context, + .event_init = paiext_event_init, + .add = paiext_add, + .del = paiext_del, + .start = paiext_start, + .stop = paiext_stop, + .read = paiext_read, + .sched_task = paiext_sched_task, + .attr_groups = paiext_attr_groups, +}; + +/* List of symbolic PAI extension 1 NNPA counter names. */ +static const char * const paiext_ctrnames[] = { + [0] = "NNPA_ALL", + [1] = "NNPA_ADD", + [2] = "NNPA_SUB", + [3] = "NNPA_MUL", + [4] = "NNPA_DIV", + [5] = "NNPA_MIN", + [6] = "NNPA_MAX", + [7] = "NNPA_LOG", + [8] = "NNPA_EXP", + [9] = "NNPA_IBM_RESERVED_9", + [10] = "NNPA_RELU", + [11] = "NNPA_TANH", + [12] = "NNPA_SIGMOID", + [13] = "NNPA_SOFTMAX", + [14] = "NNPA_BATCHNORM", + [15] = "NNPA_MAXPOOL2D", + [16] = "NNPA_AVGPOOL2D", + [17] = "NNPA_LSTMACT", + [18] = "NNPA_GRUACT", + [19] = "NNPA_CONVOLUTION", + [20] = "NNPA_MATMUL_OP", + [21] = "NNPA_MATMUL_OP_BCAST23", + [22] = "NNPA_SMALLBATCH", + [23] = "NNPA_LARGEDIM", + [24] = "NNPA_SMALLTENSOR", + [25] = "NNPA_1MFRAME", + [26] = "NNPA_2GFRAME", + [27] = "NNPA_ACCESSEXCEPT", +}; + +static void __init attr_event_free(struct attribute **attrs, int num) +{ + struct perf_pmu_events_attr *pa; + struct device_attribute *dap; + int i; + + for (i = 0; i < num; i++) { + dap = container_of(attrs[i], struct device_attribute, attr); + pa = container_of(dap, struct perf_pmu_events_attr, attr); + kfree(pa); + } + kfree(attrs); +} + +static int __init attr_event_init_one(struct attribute **attrs, int num) +{ + struct perf_pmu_events_attr *pa; + + pa = kzalloc(sizeof(*pa), GFP_KERNEL); + if (!pa) + return -ENOMEM; + + sysfs_attr_init(&pa->attr.attr); + pa->id = PAI_NNPA_BASE + num; + pa->attr.attr.name = paiext_ctrnames[num]; + pa->attr.attr.mode = 0444; + pa->attr.show = cpumf_events_sysfs_show; + pa->attr.store = NULL; + attrs[num] = &pa->attr.attr; + return 0; +} + +/* Create PMU sysfs event attributes on the fly. */ +static int __init attr_event_init(void) +{ + struct attribute **attrs; + int ret, i; + + attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs), + GFP_KERNEL); + if (!attrs) + return -ENOMEM; + for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) { + ret = attr_event_init_one(attrs, i); + if (ret) { + attr_event_free(attrs, i - 1); + return ret; + } + } + attrs[i] = NULL; + paiext_events_group.attrs = attrs; + return 0; +} + +static int __init paiext_init(void) +{ + struct qpaci_info_block ib; + int rc = -ENOMEM; + + if (!test_facility(197)) + return 0; + + qpaci(&ib); + paiext_cnt = ib.num_nnpa; + if (paiext_cnt >= PAI_NNPA_MAXCTR) + paiext_cnt = PAI_NNPA_MAXCTR; + if (!paiext_cnt) + return 0; + + rc = attr_event_init(); + if (rc) { + pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n"); + return rc; + } + + /* Setup s390dbf facility */ + paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128); + if (!paiext_dbg) { + pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n"); + rc = -ENOMEM; + goto out_init; + } + debug_register_view(paiext_dbg, &debug_sprintf_view); + + rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1); + if (rc) { + pr_err("Registration of " KMSG_COMPONENT " PMU failed with " + "rc=%i\n", rc); + goto out_pmu; + } + + return 0; + +out_pmu: + debug_unregister_view(paiext_dbg, &debug_sprintf_view); + debug_unregister(paiext_dbg); +out_init: + attr_event_free(paiext_events_group.attrs, + ARRAY_SIZE(paiext_ctrnames) + 1); + return rc; +} + +device_initcall(paiext_init); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index d5119e039d85..42af4b3aa02b 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -224,13 +224,13 @@ unsigned long __get_wchan(struct task_struct *p) unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() & ~PAGE_MASK; + sp -= prandom_u32_max(PAGE_SIZE); return sp & ~0xf; } static inline unsigned long brk_rnd(void) { - return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT; + return (get_random_u16() & BRK_RND_MASK) << PAGE_SHIFT; } unsigned long arch_randomize_brk(struct mm_struct *mm) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index bbd4bde4f65d..ab19ddb09d65 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -58,7 +58,7 @@ #include <asm/smp.h> #include <asm/mmu_context.h> #include <asm/cpcmd.h> -#include <asm/lowcore.h> +#include <asm/abs_lowcore.h> #include <asm/nmi.h> #include <asm/irq.h> #include <asm/page.h> @@ -74,6 +74,7 @@ #include <asm/alternative.h> #include <asm/nospec-branch.h> #include <asm/mem_detect.h> +#include <asm/maccess.h> #include <asm/uv.h> #include <asm/asm-offsets.h> #include "entry.h" @@ -395,6 +396,7 @@ void __init arch_call_rest_init(void) { unsigned long stack; + smp_reinit_ipl_cpu(); stack = stack_alloc(); if (!stack) panic("Couldn't allocate kernel stack"); @@ -411,8 +413,9 @@ void __init arch_call_rest_init(void) static void __init setup_lowcore_dat_off(void) { unsigned long int_psw_mask = PSW_KERNEL_BITS; + struct lowcore *abs_lc, *lc; unsigned long mcck_stack; - struct lowcore *lc; + unsigned long flags; if (IS_ENABLED(CONFIG_KASAN)) int_psw_mask |= PSW_MASK_DAT; @@ -474,12 +477,14 @@ static void __init setup_lowcore_dat_off(void) lc->restart_data = 0; lc->restart_source = -1U; - put_abs_lowcore(restart_stack, lc->restart_stack); - put_abs_lowcore(restart_fn, lc->restart_fn); - put_abs_lowcore(restart_data, lc->restart_data); - put_abs_lowcore(restart_source, lc->restart_source); - put_abs_lowcore(restart_psw, lc->restart_psw); - put_abs_lowcore(mcesad, lc->mcesad); + abs_lc = get_abs_lowcore(&flags); + abs_lc->restart_stack = lc->restart_stack; + abs_lc->restart_fn = lc->restart_fn; + abs_lc->restart_data = lc->restart_data; + abs_lc->restart_source = lc->restart_source; + abs_lc->restart_psw = lc->restart_psw; + abs_lc->mcesad = lc->mcesad; + put_abs_lowcore(abs_lc, flags); mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); if (!mcck_stack) @@ -500,8 +505,8 @@ static void __init setup_lowcore_dat_off(void) static void __init setup_lowcore_dat_on(void) { - struct lowcore *lc = lowcore_ptr[0]; - int cr; + struct lowcore *abs_lc; + unsigned long flags; __ctl_clear_bit(0, 28); S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; @@ -510,10 +515,15 @@ static void __init setup_lowcore_dat_on(void) S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; __ctl_set_bit(0, 28); __ctl_store(S390_lowcore.cregs_save_area, 0, 15); - put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS); - put_abs_lowcore(program_new_psw, lc->program_new_psw); - for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++) - put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]); + if (abs_lowcore_map(0, lowcore_ptr[0], true)) + panic("Couldn't setup absolute lowcore"); + abs_lowcore_mapped = true; + abs_lc = get_abs_lowcore(&flags); + abs_lc->restart_flags = RESTART_FLAG_CTLREGS; + abs_lc->program_new_psw = S390_lowcore.program_new_psw; + memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area, + sizeof(abs_lc->cregs_save_area)); + put_abs_lowcore(abs_lc, flags); } static struct resource code_resource = { @@ -1019,10 +1029,10 @@ void __init setup_arch(char **cmdline_p) reserve_crashkernel(); #ifdef CONFIG_CRASH_DUMP /* - * Be aware that smp_save_dump_cpus() triggers a system reset. + * Be aware that smp_save_dump_secondary_cpus() triggers a system reset. * Therefore CPU and device initialization should be done afterwards. */ - smp_save_dump_cpus(); + smp_save_dump_secondary_cpus(); #endif setup_resources(); @@ -1041,12 +1051,15 @@ void __init setup_arch(char **cmdline_p) * Create kernel page tables and switch to virtual addressing. */ paging_init(); - + memcpy_real_init(); /* * After paging_init created the kernel page table, the new PSWs * in lowcore can now run with DAT enabled. */ setup_lowcore_dat_on(); +#ifdef CONFIG_CRASH_DUMP + smp_save_dump_ipl_cpu(); +#endif /* Setup default console */ conmode_default(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 30c91d565933..0031325ce4bc 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -45,7 +45,7 @@ #include <asm/irq.h> #include <asm/tlbflush.h> #include <asm/vtimer.h> -#include <asm/lowcore.h> +#include <asm/abs_lowcore.h> #include <asm/sclp.h> #include <asm/debug.h> #include <asm/os_info.h> @@ -55,6 +55,7 @@ #include <asm/stacktrace.h> #include <asm/topology.h> #include <asm/vdso.h> +#include <asm/maccess.h> #include "entry.h" enum { @@ -212,10 +213,14 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->preempt_count = PREEMPT_DISABLED; if (nmi_alloc_mcesa(&lc->mcesad)) goto out; + if (abs_lowcore_map(cpu, lc, true)) + goto out_mcesa; lowcore_ptr[cpu] = lc; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc)); return 0; +out_mcesa: + nmi_free_mcesa(&lc->mcesad); out: stack_free(mcck_stack); stack_free(async_stack); @@ -237,6 +242,7 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[cpu] = NULL; + abs_lowcore_unmap(cpu); nmi_free_mcesa(&lc->mcesad); stack_free(async_stack); stack_free(mcck_stack); @@ -315,9 +321,12 @@ static void pcpu_delegate(struct pcpu *pcpu, pcpu_delegate_fn *func, void *data, unsigned long stack) { - struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; - unsigned int source_cpu = stap(); + struct lowcore *lc, *abs_lc; + unsigned int source_cpu; + unsigned long flags; + lc = lowcore_ptr[pcpu - pcpu_devices]; + source_cpu = stap(); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); if (pcpu->address == source_cpu) { call_on_stack(2, stack, void, __pcpu_delegate, @@ -332,10 +341,12 @@ static void pcpu_delegate(struct pcpu *pcpu, lc->restart_data = (unsigned long)data; lc->restart_source = source_cpu; } else { - put_abs_lowcore(restart_stack, stack); - put_abs_lowcore(restart_fn, (unsigned long)func); - put_abs_lowcore(restart_data, (unsigned long)data); - put_abs_lowcore(restart_source, source_cpu); + abs_lc = get_abs_lowcore(&flags); + abs_lc->restart_stack = stack; + abs_lc->restart_fn = (unsigned long)func; + abs_lc->restart_data = (unsigned long)data; + abs_lc->restart_source = source_cpu; + put_abs_lowcore(abs_lc, flags); } __bpon(); asm volatile( @@ -581,6 +592,8 @@ static DEFINE_SPINLOCK(ctl_lock); void smp_ctl_set_clear_bit(int cr, int bit, bool set) { struct ec_creg_mask_parms parms = { .cr = cr, }; + struct lowcore *abs_lc; + unsigned long flags; u64 ctlreg; if (set) { @@ -591,9 +604,11 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set) parms.andval = ~(1UL << bit); } spin_lock(&ctl_lock); - get_abs_lowcore(ctlreg, cregs_save_area[cr]); + abs_lc = get_abs_lowcore(&flags); + ctlreg = abs_lc->cregs_save_area[cr]; ctlreg = (ctlreg & parms.andval) | parms.orval; - put_abs_lowcore(cregs_save_area[cr], ctlreg); + abs_lc->cregs_save_area[cr] = ctlreg; + put_abs_lowcore(abs_lc, flags); spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } @@ -650,35 +665,36 @@ int smp_store_status(int cpu) * This case does not exist for s390 anymore, setup_arch explicitly * deactivates the elfcorehdr= kernel parameter */ -static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr, - bool is_boot_cpu, __vector128 *vxrs) +static bool dump_available(void) { - if (is_boot_cpu) - vxrs = boot_cpu_vector_save_area; - else - __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(vxrs)); - save_area_add_vxrs(sa, vxrs); + return oldmem_data.start || is_ipl_type_dump(); } -static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr, - bool is_boot_cpu, void *regs) +void __init smp_save_dump_ipl_cpu(void) { - if (is_boot_cpu) - copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); - else - __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(regs)); + struct save_area *sa; + void *regs; + + if (!dump_available()) + return; + sa = save_area_alloc(true); + regs = memblock_alloc(512, 8); + if (!sa || !regs) + panic("could not allocate memory for boot CPU save area\n"); + copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); save_area_add_regs(sa, regs); + memblock_free(regs, 512); + if (MACHINE_HAS_VX) + save_area_add_vxrs(sa, boot_cpu_vector_save_area); } -void __init smp_save_dump_cpus(void) +void __init smp_save_dump_secondary_cpus(void) { int addr, boot_cpu_addr, max_cpu_addr; struct save_area *sa; - bool is_boot_cpu; void *page; - if (!(oldmem_data.start || is_ipl_type_dump())) - /* No previous system present, normal boot. */ + if (!dump_available()) return; /* Allocate a page as dumping area for the store status sigps */ page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); @@ -691,26 +707,20 @@ void __init smp_save_dump_cpus(void) boot_cpu_addr = stap(); max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev; for (addr = 0; addr <= max_cpu_addr; addr++) { + if (addr == boot_cpu_addr) + continue; if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) == SIGP_CC_NOT_OPERATIONAL) continue; - is_boot_cpu = (addr == boot_cpu_addr); - /* Allocate save area */ - sa = save_area_alloc(is_boot_cpu); + sa = save_area_alloc(false); if (!sa) panic("could not allocate memory for save area\n"); - if (MACHINE_HAS_VX) - /* Get the vector registers */ - smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page); - /* - * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers - * of the boot CPU are stored in the HSA. To retrieve - * these registers an SCLP request is required which is - * done by drivers/s390/char/zcore.c:init_cpu_info() - */ - if (!is_boot_cpu || oldmem_data.start) - /* Get the CPU registers */ - smp_save_cpu_regs(sa, addr, is_boot_cpu, page); + __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page)); + save_area_add_regs(sa, page); + if (MACHINE_HAS_VX) { + __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page)); + save_area_add_vxrs(sa, page); + } } memblock_free(page, PAGE_SIZE); diag_amode31_ops.diag308_reset(); @@ -1256,7 +1266,7 @@ static __always_inline void set_new_lowcore(struct lowcore *lc) : "memory", "cc"); } -static int __init smp_reinit_ipl_cpu(void) +int __init smp_reinit_ipl_cpu(void) { unsigned long async_stack, nodat_stack, mcck_stack; struct lowcore *lc, *lc_ipl; @@ -1281,6 +1291,8 @@ static int __init smp_reinit_ipl_cpu(void) __ctl_clear_bit(0, 28); /* disable lowcore protection */ S390_lowcore.mcesad = mcesad; __ctl_load(cr0, 0, 0); + if (abs_lowcore_map(0, lc, false)) + panic("Couldn't remap absolute lowcore"); lowcore_ptr[0] = lc; local_mcck_enable(); local_irq_restore(flags); @@ -1291,4 +1303,3 @@ static int __init smp_reinit_ipl_cpu(void) return 0; } -early_initcall(smp_reinit_ipl_cpu); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 5075cde77b29..3105ca5bd470 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -69,10 +69,11 @@ static struct page *find_timens_vvar_page(struct vm_area_struct *vma) int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) { struct mm_struct *mm = task->mm; + VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *vma; mmap_read_lock(mm); - for (vma = mm->mmap; vma; vma = vma->vm_next) { + for_each_vma(vmi, vma) { unsigned long size = vma->vm_end - vma->vm_start; if (!vma_is_special_mapping(vma, &vvar_mapping)) @@ -226,7 +227,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len) end -= len; if (end > start) { - offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); + offset = prandom_u32_max(((end - start) >> PAGE_SHIFT) + 1); addr = start + (offset << PAGE_SHIFT); } else { addr = start; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b7ef0b71014d..45d4b8182b07 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4343,8 +4343,6 @@ retry: goto retry; } - /* nothing to do, just clear the request */ - kvm_clear_request(KVM_REQ_UNHALT, vcpu); /* we left the vsie handler, nothing to do, just clear the request */ kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index f7f5adea8940..be14c58cb989 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -13,13 +13,10 @@ void __delay(unsigned long loops) { - /* - * To end the bloody studid and useless discussion about the - * BogoMips number I took the liberty to define the __delay - * function in a way that that resulting BogoMips number will - * yield the megahertz number of the cpu. The important function - * is udelay and that is done using the tod clock. -- martin. - */ + /* + * Loop 'loops' times. Callers must not assume a specific + * amount of time passes before this function returns. + */ asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1)); } EXPORT_SYMBOL(__delay); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index d7b3b193d108..58033dfcb6d4 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -81,8 +81,9 @@ unsigned long _copy_from_user_key(void *to, const void __user *from, might_fault(); if (!should_fail_usercopy()) { - instrument_copy_from_user(to, from, n); + instrument_copy_from_user_before(to, from, n); res = raw_copy_from_user_key(to, from, n, key); + instrument_copy_from_user_after(to, from, n, res); } if (unlikely(res)) memset(to + (n - res), 0, res); diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 9f9af5298dd6..9953819d7959 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -8,8 +8,10 @@ #include <linux/kasan.h> #include <asm/ptdump.h> #include <asm/kasan.h> +#include <asm/abs_lowcore.h> #include <asm/nospec-branch.h> #include <asm/sections.h> +#include <asm/maccess.h> static unsigned long max_addr; @@ -21,6 +23,8 @@ struct addr_marker { enum address_markers_idx { IDENTITY_BEFORE_NR = 0, IDENTITY_BEFORE_END_NR, + AMODE31_START_NR, + AMODE31_END_NR, KERNEL_START_NR, KERNEL_END_NR, #ifdef CONFIG_KFENCE @@ -39,11 +43,17 @@ enum address_markers_idx { VMALLOC_END_NR, MODULES_NR, MODULES_END_NR, + ABS_LOWCORE_NR, + ABS_LOWCORE_END_NR, + MEMCPY_REAL_NR, + MEMCPY_REAL_END_NR, }; static struct addr_marker address_markers[] = { [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"}, [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"}, + [AMODE31_START_NR] = {0, "Amode31 Area Start"}, + [AMODE31_END_NR] = {0, "Amode31 Area End"}, [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, #ifdef CONFIG_KFENCE @@ -62,6 +72,10 @@ static struct addr_marker address_markers[] = { [VMALLOC_END_NR] = {0, "vmalloc Area End"}, [MODULES_NR] = {0, "Modules Area Start"}, [MODULES_END_NR] = {0, "Modules Area End"}, + [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"}, + [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"}, + [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"}, + [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"}, { -1, NULL } }; @@ -276,8 +290,14 @@ static int pt_dump_init(void) max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; + address_markers[AMODE31_START_NR].start_address = __samode31; + address_markers[AMODE31_END_NR].start_address = __eamode31; address_markers[MODULES_NR].start_address = MODULES_VADDR; address_markers[MODULES_END_NR].start_address = MODULES_END; + address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore; + address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE; + address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area; + address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + PAGE_SIZE; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 9ab6ca6f7f59..9649d9382e0a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -268,8 +268,7 @@ static noinline void do_sigbus(struct pt_regs *regs) (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); } -static noinline void do_fault_error(struct pt_regs *regs, int access, - vm_fault_t fault) +static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault) { int si_code; @@ -516,7 +515,7 @@ void do_protection_exception(struct pt_regs *regs) fault = do_exception(regs, access); } if (unlikely(fault)) - do_fault_error(regs, access, fault); + do_fault_error(regs, fault); } NOKPROBE_SYMBOL(do_protection_exception); @@ -528,7 +527,7 @@ void do_dat_exception(struct pt_regs *regs) access = VM_ACCESS_FLAGS; fault = do_exception(regs, access); if (unlikely(fault)) - do_fault_error(regs, access, fault); + do_fault_error(regs, fault); } NOKPROBE_SYMBOL(do_dat_exception); @@ -803,7 +802,7 @@ void do_secure_storage_access(struct pt_regs *regs) addr = __gmap_translate(gmap, addr); mmap_read_unlock(mm); if (IS_ERR_VALUE(addr)) { - do_fault_error(regs, VM_ACCESS_FLAGS, VM_FAULT_BADMAP); + do_fault_error(regs, VM_FAULT_BADMAP); break; } fallthrough; @@ -813,7 +812,7 @@ void do_secure_storage_access(struct pt_regs *regs) vma = find_vma(mm, addr); if (!vma) { mmap_read_unlock(mm); - do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); + do_fault_error(regs, VM_FAULT_BADMAP); break; } page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET); @@ -836,7 +835,7 @@ void do_secure_storage_access(struct pt_regs *regs) BUG(); break; default: - do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); + do_fault_error(regs, VM_FAULT_BADMAP); WARN_ON_ONCE(1); } } @@ -848,7 +847,7 @@ void do_non_secure_storage_access(struct pt_regs *regs) struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; if (get_fault_type(regs) != GMAP_FAULT) { - do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); + do_fault_error(regs, VM_FAULT_BADMAP); WARN_ON_ONCE(1); return; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 62758cb5872f..02d15c8dc92e 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2515,8 +2515,9 @@ static const struct mm_walk_ops thp_split_walk_ops = { static inline void thp_split_mm(struct mm_struct *mm) { struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); - for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + for_each_vma(vmi, vma) { vma->vm_flags &= ~VM_HUGEPAGE; vma->vm_flags |= VM_NOHUGEPAGE; walk_page_vma(vma, &thp_split_walk_ops, NULL); @@ -2584,8 +2585,9 @@ int gmap_mark_unmergeable(void) struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int ret; + VMA_ITERATOR(vmi, mm, 0); - for (vma = mm->mmap; vma; vma = vma->vm_next) { + for_each_vma(vmi, vma) { ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, MADV_UNMERGEABLE, &vma->vm_flags); if (ret) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 10e51ef9c79a..c299a18273ff 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -237,16 +237,6 @@ int pud_huge(pud_t pud) return pud_large(pud); } -struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int flags) -{ - if (flags & FOLL_GET) - return NULL; - - return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); -} - bool __init arch_hugetlb_valid_size(unsigned long size) { if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 4a154a084966..97d66a3e60fb 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -37,7 +37,7 @@ #include <asm/kfence.h> #include <asm/ptdump.h> #include <asm/dma.h> -#include <asm/lowcore.h> +#include <asm/abs_lowcore.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/sections.h> diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index d6d84e02f35a..1571cdcb0c50 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -12,10 +12,17 @@ #include <linux/errno.h> #include <linux/gfp.h> #include <linux/cpu.h> +#include <linux/uio.h> #include <asm/asm-extable.h> #include <asm/ctl_reg.h> #include <asm/io.h> +#include <asm/abs_lowcore.h> #include <asm/stacktrace.h> +#include <asm/maccess.h> + +unsigned long __bootdata_preserved(__memcpy_real_area); +static __ro_after_init pte_t *memcpy_real_ptep; +static DEFINE_MUTEX(memcpy_real_mutex); static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size) { @@ -76,118 +83,72 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size) return dst; } -static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count) +void __init memcpy_real_init(void) { - union register_pair _dst, _src; - int rc = -EFAULT; - - _dst.even = (unsigned long) dest; - _dst.odd = (unsigned long) count; - _src.even = (unsigned long) src; - _src.odd = (unsigned long) count; - asm volatile ( - "0: mvcle %[dst],%[src],0\n" - "1: jo 0b\n" - " lhi %[rc],0\n" - "2:\n" - EX_TABLE(1b,2b) - : [rc] "+&d" (rc), [dst] "+&d" (_dst.pair), [src] "+&d" (_src.pair) - : : "cc", "memory"); - return rc; -} - -static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest, - unsigned long src, - unsigned long count) -{ - int irqs_disabled, rc; - unsigned long flags; - - if (!count) - return 0; - flags = arch_local_irq_save(); - irqs_disabled = arch_irqs_disabled_flags(flags); - if (!irqs_disabled) - trace_hardirqs_off(); - __arch_local_irq_stnsm(0xf8); // disable DAT - rc = __memcpy_real((void *) dest, (void *) src, (size_t) count); - if (flags & PSW_MASK_DAT) - __arch_local_irq_stosm(0x04); // enable DAT - if (!irqs_disabled) - trace_hardirqs_on(); - __arch_local_irq_ssm(flags); - return rc; + memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true); + if (!memcpy_real_ptep) + panic("Couldn't setup memcpy real area"); } -/* - * Copy memory in real mode (kernel to kernel) - */ -int memcpy_real(void *dest, unsigned long src, size_t count) +size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count) { - unsigned long _dest = (unsigned long)dest; - unsigned long _src = (unsigned long)src; - unsigned long _count = (unsigned long)count; - int rc; - - if (S390_lowcore.nodat_stack != 0) { - preempt_disable(); - rc = call_on_stack(3, S390_lowcore.nodat_stack, - unsigned long, _memcpy_real, - unsigned long, _dest, - unsigned long, _src, - unsigned long, _count); - preempt_enable(); - return rc; + size_t len, copied, res = 0; + unsigned long phys, offset; + void *chunk; + pte_t pte; + + while (count) { + phys = src & PAGE_MASK; + offset = src & ~PAGE_MASK; + chunk = (void *)(__memcpy_real_area + offset); + len = min(count, PAGE_SIZE - offset); + pte = mk_pte_phys(phys, PAGE_KERNEL_RO); + + mutex_lock(&memcpy_real_mutex); + if (pte_val(pte) != pte_val(*memcpy_real_ptep)) { + __ptep_ipte(__memcpy_real_area, memcpy_real_ptep, 0, 0, IPTE_GLOBAL); + set_pte(memcpy_real_ptep, pte); + } + copied = copy_to_iter(chunk, len, iter); + mutex_unlock(&memcpy_real_mutex); + + count -= copied; + src += copied; + res += copied; + if (copied < len) + break; } - /* - * This is a really early memcpy_real call, the stacks are - * not set up yet. Just call _memcpy_real on the early boot - * stack - */ - return _memcpy_real(_dest, _src, _count); + return res; } -/* - * Copy memory in absolute mode (kernel to kernel) - */ -void memcpy_absolute(void *dest, void *src, size_t count) +int memcpy_real(void *dest, unsigned long src, size_t count) { - unsigned long cr0, flags, prefix; - - flags = arch_local_irq_save(); - __ctl_store(cr0, 0, 0); - __ctl_clear_bit(0, 28); /* disable lowcore protection */ - prefix = store_prefix(); - if (prefix) { - local_mcck_disable(); - set_prefix(0); - memcpy(dest, src, count); - set_prefix(prefix); - local_mcck_enable(); - } else { - memcpy(dest, src, count); - } - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); + struct iov_iter iter; + struct kvec kvec; + + kvec.iov_base = dest; + kvec.iov_len = count; + iov_iter_kvec(&iter, WRITE, &kvec, 1, count); + if (memcpy_real_iter(&iter, src, count) < count) + return -EFAULT; + return 0; } /* - * Check if physical address is within prefix or zero page + * Find CPU that owns swapped prefix page */ -static int is_swapped(phys_addr_t addr) +static int get_swapped_owner(phys_addr_t addr) { phys_addr_t lc; int cpu; - if (addr < sizeof(struct lowcore)) - return 1; for_each_online_cpu(cpu) { lc = virt_to_phys(lowcore_ptr[cpu]); if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc) continue; - return 1; + return cpu; } - return 0; + return -1; } /* @@ -200,17 +161,35 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) { void *ptr = phys_to_virt(addr); void *bounce = ptr; + struct lowcore *abs_lc; + unsigned long flags; unsigned long size; + int this_cpu, cpu; cpus_read_lock(); - preempt_disable(); - if (is_swapped(addr)) { - size = PAGE_SIZE - (addr & ~PAGE_MASK); - bounce = (void *) __get_free_page(GFP_ATOMIC); - if (bounce) - memcpy_absolute(bounce, ptr, size); + this_cpu = get_cpu(); + if (addr >= sizeof(struct lowcore)) { + cpu = get_swapped_owner(addr); + if (cpu < 0) + goto out; + } + bounce = (void *)__get_free_page(GFP_ATOMIC); + if (!bounce) + goto out; + size = PAGE_SIZE - (addr & ~PAGE_MASK); + if (addr < sizeof(struct lowcore)) { + abs_lc = get_abs_lowcore(&flags); + ptr = (void *)abs_lc + addr; + memcpy(bounce, ptr, size); + put_abs_lowcore(abs_lc, flags); + } else if (cpu == this_cpu) { + ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu])); + memcpy(bounce, ptr, size); + } else { + memcpy(bounce, ptr, size); } - preempt_enable(); +out: + put_cpu(); cpus_read_unlock(); return bounce; } diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 5980ce348832..3327c47bc181 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -37,7 +37,7 @@ static inline int mmap_is_legacy(struct rlimit *rlim_stack) unsigned long arch_mmap_rnd(void) { - return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT; + return (get_random_u32() & MMAP_RND_MASK) << PAGE_SHIFT; } static unsigned long mmap_base_legacy(unsigned long rnd) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index c2583f921ca8..ee1a97078527 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -240,7 +240,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, } else if (pmd_none(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE) && - MACHINE_HAS_EDAT1 && addr && direct && + MACHINE_HAS_EDAT1 && direct && !debug_pagealloc_enabled()) { set_pmd(pmd, __pmd(__pa(addr) | prot)); pages++; @@ -336,7 +336,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, } else if (pud_none(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE) && - MACHINE_HAS_EDAT2 && addr && direct && + MACHINE_HAS_EDAT2 && direct && !debug_pagealloc_enabled()) { set_pud(pud, __pud(__pa(addr) | prot)); pages++; @@ -561,6 +561,103 @@ int vmem_add_mapping(unsigned long start, unsigned long size) } /* + * Allocate new or return existing page-table entry, but do not map it + * to any physical address. If missing, allocate segment- and region- + * table entries along. Meeting a large segment- or region-table entry + * while traversing is an error, since the function is expected to be + * called against virtual regions reserverd for 4KB mappings only. + */ +pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc) +{ + pte_t *ptep = NULL; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) { + if (!alloc) + goto out; + p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); + if (!p4d) + goto out; + pgd_populate(&init_mm, pgd, p4d); + } + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + if (!alloc) + goto out; + pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); + if (!pud) + goto out; + p4d_populate(&init_mm, p4d, pud); + } + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + if (!alloc) + goto out; + pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); + if (!pmd) + goto out; + pud_populate(&init_mm, pud, pmd); + } else if (WARN_ON_ONCE(pud_large(*pud))) { + goto out; + } + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + if (!alloc) + goto out; + pte = vmem_pte_alloc(); + if (!pte) + goto out; + pmd_populate(&init_mm, pmd, pte); + } else if (WARN_ON_ONCE(pmd_large(*pmd))) { + goto out; + } + ptep = pte_offset_kernel(pmd, addr); +out: + return ptep; +} + +int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc) +{ + pte_t *ptep, pte; + + if (!IS_ALIGNED(addr, PAGE_SIZE)) + return -EINVAL; + ptep = vmem_get_alloc_pte(addr, alloc); + if (!ptep) + return -ENOMEM; + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); + pte = mk_pte_phys(phys, prot); + set_pte(ptep, pte); + return 0; +} + +int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot) +{ + int rc; + + mutex_lock(&vmem_mutex); + rc = __vmem_map_4k_page(addr, phys, prot, true); + mutex_unlock(&vmem_mutex); + return rc; +} + +void vmem_unmap_4k_page(unsigned long addr) +{ + pte_t *ptep; + + mutex_lock(&vmem_mutex); + ptep = virt_to_kpte(addr); + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); + pte_clear(&init_mm, addr, ptep); + mutex_unlock(&vmem_mutex); +} + +/* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug * additional memory segments. @@ -584,6 +681,9 @@ void __init vmem_map_init(void) __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); + /* lowcore requires 4k mapping for real addresses / prefixing */ + set_memory_4k(0, LC_PAGES); + /* lowcore must be executable for LPSWE */ if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f46833a25526..227cf0a62800 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -666,7 +666,7 @@ static int __init dma_alloc_cpu_table_caches(void) int __init zpci_dma_init(void) { - s390_iommu_aperture = (u64)high_memory; + s390_iommu_aperture = (u64)virt_to_phys(high_memory); if (!s390_iommu_aperture_factor) s390_iommu_aperture = ULONG_MAX; else diff --git a/arch/sh/Makefile b/arch/sh/Makefile index b39412bf91fb..5c8776482530 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -114,8 +114,6 @@ endif export ld-bfd -head-y := arch/sh/kernel/head_32.o - # Mach groups machdir-$(CONFIG_SOLUTION_ENGINE) += mach-se machdir-$(CONFIG_SH_HP6XX) += mach-hp6xx diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig index 530498f18990..99931a13a74d 100644 --- a/arch/sh/configs/apsh4a3a_defconfig +++ b/arch/sh/configs/apsh4a3a_defconfig @@ -85,7 +85,7 @@ CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_PREEMPT is not set # CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y # CONFIG_FTRACE is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index 6abd9bd70106..d9fb124bf015 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -116,7 +116,7 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_VM=y CONFIG_DWARF_UNWINDER=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index e699e2e04128..b52e14ccb450 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -8,7 +8,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_CPU_SUBTYPE_SH7724=y -CONFIG_FORCE_MAX_ZONEORDER=12 +CONFIG_ARCH_FORCE_MAX_ORDER=12 CONFIG_MEMORY_SIZE=0x10000000 CONFIG_FLATMEM_MANUAL=y CONFIG_SH_ECOVEC=y diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig index d77f54e906fd..f427a95bcd21 100644 --- a/arch/sh/configs/edosk7760_defconfig +++ b/arch/sh/configs/edosk7760_defconfig @@ -107,7 +107,7 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set CONFIG_TIMER_STATS=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_CRYPTO=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_DES=y diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig index 0989ed929540..ef1d98e35c91 100644 --- a/arch/sh/configs/magicpanelr2_defconfig +++ b/arch/sh/configs/magicpanelr2_defconfig @@ -84,7 +84,7 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set CONFIG_DEBUG_KOBJECT=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_FRAME_POINTER=y CONFIG_CRC_CCITT=m CONFIG_CRC16=m diff --git a/arch/sh/configs/polaris_defconfig b/arch/sh/configs/polaris_defconfig index 246408ec7462..f42e4867ddc1 100644 --- a/arch/sh/configs/polaris_defconfig +++ b/arch/sh/configs/polaris_defconfig @@ -79,5 +79,5 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_DEBUG_SPINLOCK_SLEEP=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_SG=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index f823cc6b18f9..e527cd60a191 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -101,7 +101,7 @@ CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_PREEMPT is not set -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index f96bc20d4b1a..a3f952a83d97 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -96,7 +96,7 @@ CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_PREEMPT is not set CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_SH_STANDARD_BIOS=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_4KSTACKS=y diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig index 5a54e2b883f0..d00fafc021e1 100644 --- a/arch/sh/configs/rsk7203_defconfig +++ b/arch/sh/configs/rsk7203_defconfig @@ -112,7 +112,7 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_SPINLOCK_SLEEP=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index 7d6d32359848..41cb588ca99c 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -131,7 +131,7 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set CONFIG_TIMER_STATS=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_SH_STANDARD_BIOS=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_DES=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index a8662b6927ec..97b7356639ed 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -16,7 +16,6 @@ CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_MEMCG=y -CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index ee6d28ae08de..36356223d51c 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -93,7 +93,7 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_FRAME_POINTER=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index bad921bc10f8..46c5a263a239 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -121,7 +121,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_FRAME_POINTER=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_CCITT=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index 79f02f1c0dc8..259c69e3fa22 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -159,7 +159,7 @@ CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y # CONFIG_DETECT_SOFTLOCKUP is not set # CONFIG_SCHED_DEBUG is not set -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_FRAME_POINTER=y CONFIG_SH_STANDARD_BIOS=y CONFIG_CRYPTO_NULL=y diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig index a2700ab165af..2579dc4bc0c8 100644 --- a/arch/sh/configs/sh7757lcr_defconfig +++ b/arch/sh/configs/sh7757lcr_defconfig @@ -80,6 +80,6 @@ CONFIG_NLS_ISO8859_1=y CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y # CONFIG_FTRACE is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index 7eb3c10f28ad..781ff13227fc 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -141,7 +141,7 @@ CONFIG_DEBUG_KMEMLEAK=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_SPINLOCK_SLEEP=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_LATENCYTOP=y # CONFIG_FTRACE is not set CONFIG_CRYPTO_HMAC=y diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index cb2f56468fe0..8fc687c98fd1 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -14,7 +14,6 @@ CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_MEMCG=y -CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_DEV_INITRD=y @@ -139,7 +138,7 @@ CONFIG_PRINTK_TIME=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_FRAME_POINTER=y # CONFIG_FTRACE is not set # CONFIG_DUMP_CODE is not set diff --git a/arch/sh/include/asm/hw_breakpoint.h b/arch/sh/include/asm/hw_breakpoint.h index 199d17b765f2..361a0f57bdeb 100644 --- a/arch/sh/include/asm/hw_breakpoint.h +++ b/arch/sh/include/asm/hw_breakpoint.h @@ -48,10 +48,7 @@ struct pmu; /* Maximum number of UBC channels */ #define HBP_NUM 2 -static inline int hw_breakpoint_slots(int type) -{ - return HBP_NUM; -} +#define hw_breakpoint_slots(type) (HBP_NUM) /* arch/sh/kernel/hw_breakpoint.c */ extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h index 45240ec6b85a..27aebf1e75a2 100644 --- a/arch/sh/include/asm/processor_32.h +++ b/arch/sh/include/asm/processor_32.h @@ -127,9 +127,6 @@ struct task_struct; extern void start_thread(struct pt_regs *regs, unsigned long new_pc, unsigned long new_sp); -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - /* * FPU lazy state save handling. */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index aa0fbc9202b1..69cd9ac4b2ab 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -3,7 +3,7 @@ # Makefile for the Linux/SuperH kernel. # -extra-y := head_32.o vmlinux.lds +extra-y := vmlinux.lds ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities @@ -12,7 +12,7 @@ endif CFLAGS_REMOVE_return_address.o = -pg -obj-y := debugtraps.o dumpstack.o \ +obj-y := head_32.o debugtraps.o dumpstack.o \ idle.o io.o irq.o irq_32.o kdebugfs.o \ machvec.o nmi_debug.o process.o \ process_32.o ptrace.o ptrace_32.o \ diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index a808843375e7..92b6649d4929 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -84,11 +84,6 @@ void flush_thread(void) #endif } -void release_thread(struct task_struct *dead_task) -{ - /* do nothing */ -} - asmlinkage void ret_from_fork(void); asmlinkage void ret_from_kernel_thread(void); diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index ba569cfb4368..411fdc0901f7 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig @@ -18,7 +18,7 @@ config PAGE_OFFSET default "0x80000000" if MMU default "0x00000000" -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int "Maximum zone order" range 9 64 if PAGE_SIZE_16KB default "9" if PAGE_SIZE_16KB diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 1c852bb530ec..4d3d1af90d52 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -269,7 +269,7 @@ config ARCH_SPARSEMEM_ENABLE config ARCH_SPARSEMEM_DEFAULT def_bool y if SPARC64 -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int "Maximum zone order" default "13" help diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index fe58a410b4ce..a4ea5b05f288 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -56,8 +56,6 @@ endif endif -head-y := arch/sparc/kernel/head_$(BITS).o - libs-y += arch/sparc/prom/ libs-y += arch/sparc/lib/ diff --git a/arch/sparc/crypto/Kconfig b/arch/sparc/crypto/Kconfig new file mode 100644 index 000000000000..cfe5102b1c68 --- /dev/null +++ b/arch/sparc/crypto/Kconfig @@ -0,0 +1,90 @@ +# SPDX-License-Identifier: GPL-2.0 + +menu "Accelerated Cryptographic Algorithms for CPU (sparc64)" + +config CRYPTO_DES_SPARC64 + tristate "Ciphers: DES and Triple DES EDE, modes: ECB/CBC" + depends on SPARC64 + select CRYPTO_ALGAPI + select CRYPTO_LIB_DES + select CRYPTO_SKCIPHER + help + Block cipher: DES (FIPS 46-2) cipher algorithm + Block cipher: Triple DES EDE (FIPS 46-3) cipher algorithm + Length-preserving ciphers: DES with ECB and CBC modes + Length-preserving ciphers: Tripe DES EDE with ECB and CBC modes + + Architecture: sparc64 + +config CRYPTO_CRC32C_SPARC64 + tristate "CRC32c" + depends on SPARC64 + select CRYPTO_HASH + select CRC32 + help + CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) + + Architecture: sparc64 + +config CRYPTO_MD5_SPARC64 + tristate "Digests: MD5" + depends on SPARC64 + select CRYPTO_MD5 + select CRYPTO_HASH + help + MD5 message digest algorithm (RFC1321) + + Architecture: sparc64 using crypto instructions, when available + +config CRYPTO_SHA1_SPARC64 + tristate "Hash functions: SHA-1" + depends on SPARC64 + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash algorithm (FIPS 180) + + Architecture: sparc64 + +config CRYPTO_SHA256_SPARC64 + tristate "Hash functions: SHA-224 and SHA-256" + depends on SPARC64 + select CRYPTO_SHA256 + select CRYPTO_HASH + help + SHA-224 and SHA-256 secure hash algorithms (FIPS 180) + + Architecture: sparc64 using crypto instructions, when available + +config CRYPTO_SHA512_SPARC64 + tristate "Hash functions: SHA-384 and SHA-512" + depends on SPARC64 + select CRYPTO_SHA512 + select CRYPTO_HASH + help + SHA-384 and SHA-512 secure hash algorithms (FIPS 180) + + Architecture: sparc64 using crypto instructions, when available + +config CRYPTO_AES_SPARC64 + tristate "Ciphers: AES, modes: ECB, CBC, CTR" + depends on SPARC64 + select CRYPTO_SKCIPHER + help + Block ciphers: AES cipher algorithms (FIPS-197) + Length-preseving ciphers: AES with ECB, CBC, and CTR modes + + Architecture: sparc64 using crypto instructions + +config CRYPTO_CAMELLIA_SPARC64 + tristate "Ciphers: Camellia, modes: ECB, CBC" + depends on SPARC64 + select CRYPTO_ALGAPI + select CRYPTO_SKCIPHER + help + Block ciphers: Camellia cipher algorithms + Length-preserving ciphers: Camellia with ECB and CBC modes + + Architecture: sparc64 + +endmenu diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index b26c35336b51..ba8b70ffec08 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -80,9 +80,6 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc, : "memory"); } -/* Free all resources held by a thread. */ -#define release_thread(tsk) do { } while(0) - unsigned long __get_wchan(struct task_struct *); #define task_pt_regs(tsk) ((tsk)->thread.kregs) diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index 89850dff6b03..2667f35d5ea5 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -176,9 +176,6 @@ do { \ regs->tstate &= ~TSTATE_PEF; \ } while (0) -/* Free all resources held by a thread. */ -#define release_thread(tsk) do { } while (0) - unsigned long __get_wchan(struct task_struct *task); #define task_pt_regs(tsk) (task_thread_info(tsk)->kregs) diff --git a/arch/sparc/include/asm/termios.h b/arch/sparc/include/asm/termios.h deleted file mode 100644 index 4a558efdfa93..000000000000 --- a/arch/sparc/include/asm/termios.h +++ /dev/null @@ -1,147 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SPARC_TERMIOS_H -#define _SPARC_TERMIOS_H - -#include <uapi/asm/termios.h> - - -/* - * c_cc characters in the termio structure. Oh, how I love being - * backwardly compatible. Notice that character 4 and 5 are - * interpreted differently depending on whether ICANON is set in - * c_lflag. If it's set, they are used as _VEOF and _VEOL, otherwise - * as _VMIN and V_TIME. This is for compatibility with OSF/1 (which - * is compatible with sysV)... - */ -#define _VMIN 4 -#define _VTIME 5 - -/* intr=^C quit=^\ erase=del kill=^U - eof=^D eol=\0 eol2=\0 sxtc=\0 - start=^Q stop=^S susp=^Z dsusp=^Y - reprint=^R discard=^U werase=^W lnext=^V - vmin=\1 vtime=\0 -*/ -#define INIT_C_CC "\003\034\177\025\004\000\000\000\021\023\032\031\022\025\027\026\001" - -/* - * Translate a "termio" structure into a "termios". Ugh. - */ -#define user_termio_to_kernel_termios(termios, termio) \ -({ \ - unsigned short tmp; \ - int err; \ - err = get_user(tmp, &(termio)->c_iflag); \ - (termios)->c_iflag = (0xffff0000 & ((termios)->c_iflag)) | tmp; \ - err |= get_user(tmp, &(termio)->c_oflag); \ - (termios)->c_oflag = (0xffff0000 & ((termios)->c_oflag)) | tmp; \ - err |= get_user(tmp, &(termio)->c_cflag); \ - (termios)->c_cflag = (0xffff0000 & ((termios)->c_cflag)) | tmp; \ - err |= get_user(tmp, &(termio)->c_lflag); \ - (termios)->c_lflag = (0xffff0000 & ((termios)->c_lflag)) | tmp; \ - err |= copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \ - err; \ -}) - -/* - * Translate a "termios" structure into a "termio". Ugh. - * - * Note the "fun" _VMIN overloading. - */ -#define kernel_termios_to_user_termio(termio, termios) \ -({ \ - int err; \ - err = put_user((termios)->c_iflag, &(termio)->c_iflag); \ - err |= put_user((termios)->c_oflag, &(termio)->c_oflag); \ - err |= put_user((termios)->c_cflag, &(termio)->c_cflag); \ - err |= put_user((termios)->c_lflag, &(termio)->c_lflag); \ - err |= put_user((termios)->c_line, &(termio)->c_line); \ - err |= copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \ - if (!((termios)->c_lflag & ICANON)) { \ - err |= put_user((termios)->c_cc[VMIN], &(termio)->c_cc[_VMIN]); \ - err |= put_user((termios)->c_cc[VTIME], &(termio)->c_cc[_VTIME]); \ - } \ - err; \ -}) - -#define user_termios_to_kernel_termios(k, u) \ -({ \ - int err; \ - err = get_user((k)->c_iflag, &(u)->c_iflag); \ - err |= get_user((k)->c_oflag, &(u)->c_oflag); \ - err |= get_user((k)->c_cflag, &(u)->c_cflag); \ - err |= get_user((k)->c_lflag, &(u)->c_lflag); \ - err |= get_user((k)->c_line, &(u)->c_line); \ - err |= copy_from_user((k)->c_cc, (u)->c_cc, NCCS); \ - if ((k)->c_lflag & ICANON) { \ - err |= get_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \ - err |= get_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \ - } else { \ - err |= get_user((k)->c_cc[VMIN], &(u)->c_cc[_VMIN]); \ - err |= get_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \ - } \ - err |= get_user((k)->c_ispeed, &(u)->c_ispeed); \ - err |= get_user((k)->c_ospeed, &(u)->c_ospeed); \ - err; \ -}) - -#define kernel_termios_to_user_termios(u, k) \ -({ \ - int err; \ - err = put_user((k)->c_iflag, &(u)->c_iflag); \ - err |= put_user((k)->c_oflag, &(u)->c_oflag); \ - err |= put_user((k)->c_cflag, &(u)->c_cflag); \ - err |= put_user((k)->c_lflag, &(u)->c_lflag); \ - err |= put_user((k)->c_line, &(u)->c_line); \ - err |= copy_to_user((u)->c_cc, (k)->c_cc, NCCS); \ - if (!((k)->c_lflag & ICANON)) { \ - err |= put_user((k)->c_cc[VMIN], &(u)->c_cc[_VMIN]); \ - err |= put_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \ - } else { \ - err |= put_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \ - err |= put_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \ - } \ - err |= put_user((k)->c_ispeed, &(u)->c_ispeed); \ - err |= put_user((k)->c_ospeed, &(u)->c_ospeed); \ - err; \ -}) - -#define user_termios_to_kernel_termios_1(k, u) \ -({ \ - int err; \ - err = get_user((k)->c_iflag, &(u)->c_iflag); \ - err |= get_user((k)->c_oflag, &(u)->c_oflag); \ - err |= get_user((k)->c_cflag, &(u)->c_cflag); \ - err |= get_user((k)->c_lflag, &(u)->c_lflag); \ - err |= get_user((k)->c_line, &(u)->c_line); \ - err |= copy_from_user((k)->c_cc, (u)->c_cc, NCCS); \ - if ((k)->c_lflag & ICANON) { \ - err |= get_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \ - err |= get_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \ - } else { \ - err |= get_user((k)->c_cc[VMIN], &(u)->c_cc[_VMIN]); \ - err |= get_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \ - } \ - err; \ -}) - -#define kernel_termios_to_user_termios_1(u, k) \ -({ \ - int err; \ - err = put_user((k)->c_iflag, &(u)->c_iflag); \ - err |= put_user((k)->c_oflag, &(u)->c_oflag); \ - err |= put_user((k)->c_cflag, &(u)->c_cflag); \ - err |= put_user((k)->c_lflag, &(u)->c_lflag); \ - err |= put_user((k)->c_line, &(u)->c_line); \ - err |= copy_to_user((u)->c_cc, (k)->c_cc, NCCS); \ - if (!((k)->c_lflag & ICANON)) { \ - err |= put_user((k)->c_cc[VMIN], &(u)->c_cc[_VMIN]); \ - err |= put_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \ - } else { \ - err |= put_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \ - err |= put_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \ - } \ - err; \ -}) - -#endif /* _SPARC_TERMIOS_H */ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index d3a0e072ebe8..0984bb6f0f17 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -7,8 +7,6 @@ asflags-y := -ansi ccflags-y := -Werror -extra-y := head_$(BITS).o - # Undefine sparc when processing vmlinux.lds - it is used # And teach CPP we are doing $(BITS) builds (for this case) CPPFLAGS_vmlinux.lds := -Usparc -m$(BITS) @@ -22,6 +20,7 @@ CFLAGS_REMOVE_perf_event.o := -pg CFLAGS_REMOVE_pcr.o := -pg endif +obj-y := head_$(BITS).o obj-$(CONFIG_SPARC64) += urtt_fill.o obj-$(CONFIG_SPARC32) += entry.o wof.o wuf.o obj-$(CONFIG_SPARC32) += etrap_32.o @@ -87,12 +86,13 @@ obj-$(CONFIG_SPARC64_SMP) += hvtramp.o obj-y += auxio_$(BITS).o obj-$(CONFIG_SUN_PM) += apc.o pmc.o +obj-y += termios.o + obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULES) += sparc_ksyms.o obj-$(CONFIG_SPARC_LED) += led.o obj-$(CONFIG_KGDB) += kgdb_$(BITS).o - obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o @@ -104,6 +104,7 @@ obj-$(CONFIG_SPARC64_PCI) += pci_psycho.o pci_sabre.o pci_schizo.o obj-$(CONFIG_SPARC64_PCI) += pci_sun4v.o pci_sun4v_asm.o pci_fire.o obj-$(CONFIG_SPARC64_PCI_MSI) += pci_msi.o + obj-$(CONFIG_COMPAT) += sys32.o sys_sparc32.o signal32.o obj-$(CONFIG_US3_MC) += chmc.o diff --git a/arch/sparc/kernel/termios.c b/arch/sparc/kernel/termios.c new file mode 100644 index 000000000000..ee64965c27cd --- /dev/null +++ b/arch/sparc/kernel/termios.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/termios_internal.h> + +/* + * c_cc characters in the termio structure. Oh, how I love being + * backwardly compatible. Notice that character 4 and 5 are + * interpreted differently depending on whether ICANON is set in + * c_lflag. If it's set, they are used as _VEOF and _VEOL, otherwise + * as _VMIN and V_TIME. This is for compatibility with OSF/1 (which + * is compatible with sysV)... + */ +#define _VMIN 4 +#define _VTIME 5 + +int kernel_termios_to_user_termio(struct termio __user *termio, + struct ktermios *termios) +{ + struct termio v; + memset(&v, 0, sizeof(struct termio)); + v.c_iflag = termios->c_iflag; + v.c_oflag = termios->c_oflag; + v.c_cflag = termios->c_cflag; + v.c_lflag = termios->c_lflag; + v.c_line = termios->c_line; + memcpy(v.c_cc, termios->c_cc, NCC); + if (!(v.c_lflag & ICANON)) { + v.c_cc[_VMIN] = termios->c_cc[VMIN]; + v.c_cc[_VTIME] = termios->c_cc[VTIME]; + } + return copy_to_user(termio, &v, sizeof(struct termio)); +} + +int user_termios_to_kernel_termios(struct ktermios *k, + struct termios2 __user *u) +{ + int err; + err = get_user(k->c_iflag, &u->c_iflag); + err |= get_user(k->c_oflag, &u->c_oflag); + err |= get_user(k->c_cflag, &u->c_cflag); + err |= get_user(k->c_lflag, &u->c_lflag); + err |= get_user(k->c_line, &u->c_line); + err |= copy_from_user(k->c_cc, u->c_cc, NCCS); + if (k->c_lflag & ICANON) { + err |= get_user(k->c_cc[VEOF], &u->c_cc[VEOF]); + err |= get_user(k->c_cc[VEOL], &u->c_cc[VEOL]); + } else { + err |= get_user(k->c_cc[VMIN], &u->c_cc[_VMIN]); + err |= get_user(k->c_cc[VTIME], &u->c_cc[_VTIME]); + } + err |= get_user(k->c_ispeed, &u->c_ispeed); + err |= get_user(k->c_ospeed, &u->c_ospeed); + return err; +} + +int kernel_termios_to_user_termios(struct termios2 __user *u, + struct ktermios *k) +{ + int err; + err = put_user(k->c_iflag, &u->c_iflag); + err |= put_user(k->c_oflag, &u->c_oflag); + err |= put_user(k->c_cflag, &u->c_cflag); + err |= put_user(k->c_lflag, &u->c_lflag); + err |= put_user(k->c_line, &u->c_line); + err |= copy_to_user(u->c_cc, k->c_cc, NCCS); + if (!(k->c_lflag & ICANON)) { + err |= put_user(k->c_cc[VMIN], &u->c_cc[_VMIN]); + err |= put_user(k->c_cc[VTIME], &u->c_cc[_VTIME]); + } else { + err |= put_user(k->c_cc[VEOF], &u->c_cc[VEOF]); + err |= put_user(k->c_cc[VEOL], &u->c_cc[VEOL]); + } + err |= put_user(k->c_ispeed, &u->c_ispeed); + err |= put_user(k->c_ospeed, &u->c_ospeed); + return err; +} + +int user_termios_to_kernel_termios_1(struct ktermios *k, + struct termios __user *u) +{ + int err; + err = get_user(k->c_iflag, &u->c_iflag); + err |= get_user(k->c_oflag, &u->c_oflag); + err |= get_user(k->c_cflag, &u->c_cflag); + err |= get_user(k->c_lflag, &u->c_lflag); + err |= get_user(k->c_line, &u->c_line); + err |= copy_from_user(k->c_cc, u->c_cc, NCCS); + if (k->c_lflag & ICANON) { + err |= get_user(k->c_cc[VEOF], &u->c_cc[VEOF]); + err |= get_user(k->c_cc[VEOL], &u->c_cc[VEOL]); + } else { + err |= get_user(k->c_cc[VMIN], &u->c_cc[_VMIN]); + err |= get_user(k->c_cc[VTIME], &u->c_cc[_VTIME]); + } + return err; +} + +int kernel_termios_to_user_termios_1(struct termios __user *u, + struct ktermios *k) +{ + int err; + err = put_user(k->c_iflag, &u->c_iflag); + err |= put_user(k->c_oflag, &u->c_oflag); + err |= put_user(k->c_cflag, &u->c_cflag); + err |= put_user(k->c_lflag, &u->c_lflag); + err |= put_user(k->c_line, &u->c_line); + err |= copy_to_user(u->c_cc, k->c_cc, NCCS); + if (!(k->c_lflag & ICANON)) { + err |= put_user(k->c_cc[VMIN], &u->c_cc[_VMIN]); + err |= put_user(k->c_cc[VTIME], &u->c_cc[_VTIME]); + } else { + err |= put_user(k->c_cc[VEOF], &u->c_cc[VEOF]); + err |= put_user(k->c_cc[VEOL], &u->c_cc[VEOL]); + } + return err; +} diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c index cc19e09b0fa1..ae9a86cb6f3d 100644 --- a/arch/sparc/vdso/vma.c +++ b/arch/sparc/vdso/vma.c @@ -354,7 +354,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned int len) unsigned int offset; /* This loses some more bits than a modulo, but is cheaper */ - offset = get_random_int() & (PTRS_PER_PTE - 1); + offset = prandom_u32_max(PTRS_PER_PTE); return start + (offset << PAGE_SHIFT); } diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig index fb51bd206dbe..c0162286d68b 100644 --- a/arch/um/configs/i386_defconfig +++ b/arch/um/configs/i386_defconfig @@ -69,5 +69,5 @@ CONFIG_JOLIET=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NLS=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig index 477b87317424..bec6e5d95687 100644 --- a/arch/um/configs/x86_64_defconfig +++ b/arch/um/configs/x86_64_defconfig @@ -67,6 +67,6 @@ CONFIG_JOLIET=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NLS=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_FRAME_WARN=1024 CONFIG_DEBUG_KERNEL=y diff --git a/arch/um/drivers/chan.h b/arch/um/drivers/chan.h index c37cc4f26f91..3fec3b8406e9 100644 --- a/arch/um/drivers/chan.h +++ b/arch/um/drivers/chan.h @@ -36,7 +36,6 @@ extern int console_write_chan(struct chan *chan, const char *buf, int len); extern int console_open_chan(struct line *line, struct console *co); extern void deactivate_chan(struct chan *chan, int irq); -extern void reactivate_chan(struct chan *chan, int irq); extern void chan_enable_winch(struct chan *chan, struct tty_port *port); extern int enable_chan(struct line *line); extern void close_chan(struct line *line); diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 8ca67a692683..5026e7b9adfe 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -283,7 +283,7 @@ struct unplugged_pages { }; static DEFINE_MUTEX(plug_mem_mutex); -static unsigned long long unplugged_pages_count = 0; +static unsigned long long unplugged_pages_count; static LIST_HEAD(unplugged_pages); static int unplug_index = UNPLUGGED_PER_PAGE; @@ -846,13 +846,12 @@ static int notify_panic(struct notifier_block *self, unsigned long unused1, mconsole_notify(notify_socket, MCONSOLE_PANIC, message, strlen(message) + 1); - return 0; + return NOTIFY_DONE; } static struct notifier_block panic_exit_notifier = { - .notifier_call = notify_panic, - .next = NULL, - .priority = 1 + .notifier_call = notify_panic, + .priority = INT_MAX, /* run as soon as possible */ }; static int add_notifier(void) diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c index 0bf78ff89011..807cd3358740 100644 --- a/arch/um/drivers/mmapper_kern.c +++ b/arch/um/drivers/mmapper_kern.c @@ -122,7 +122,7 @@ static int __init mmapper_init(void) return 0; } -static void mmapper_exit(void) +static void __exit mmapper_exit(void) { misc_deregister(&mmapper_dev); } diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 59331384c2d3..3d7836c46507 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -265,7 +265,7 @@ static void uml_net_poll_controller(struct net_device *dev) static void uml_net_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver)); + strscpy(info->driver, DRIVER_NAME, sizeof(info->driver)); } static const struct ethtool_ops uml_net_ethtool_ops = { diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c index 8514966778d5..277cea3d30eb 100644 --- a/arch/um/drivers/ssl.c +++ b/arch/um/drivers/ssl.c @@ -106,7 +106,7 @@ static const struct tty_operations ssl_ops = { /* Changed by ssl_init and referenced by ssl_exit, which are both serialized * by being an initcall and exitcall, respectively. */ -static int ssl_init_done = 0; +static int ssl_init_done; static void ssl_console_write(struct console *c, const char *string, unsigned len) diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c index 489d5a746ed3..1c239737d88e 100644 --- a/arch/um/drivers/stdio_console.c +++ b/arch/um/drivers/stdio_console.c @@ -88,7 +88,7 @@ static int con_remove(int n, char **error_out) } /* Set in an initcall, checked in an exitcall */ -static int con_init_done = 0; +static int con_init_done; static int con_install(struct tty_driver *driver, struct tty_struct *tty) { diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index eb2d2f0f0bcc..f4c1e6e97ad5 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1555,7 +1555,7 @@ static void do_io(struct io_thread_req *req, struct io_desc *desc) int kernel_fd = -1; /* Only changed by the io thread. XXX: currently unused. */ -static int io_count = 0; +static int io_count; int io_thread(void *arg) { diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 548265312743..ded7c47d2fbe 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1372,7 +1372,7 @@ static void vector_net_poll_controller(struct net_device *dev) static void vector_net_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver)); + strscpy(info->driver, DRIVER_NAME, sizeof(info->driver)); } static int vector_net_load_bpf_flash(struct net_device *dev, diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index 027847023184..acb55b302b14 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -857,7 +857,7 @@ void *pci_root_bus_fwnode(struct pci_bus *bus) return um_pci_fwnode; } -static int um_pci_init(void) +static int __init um_pci_init(void) { int err, i; @@ -940,7 +940,7 @@ free: } module_init(um_pci_init); -static void um_pci_exit(void) +static void __exit um_pci_exit(void) { unregister_virtio_driver(&um_pci_virtio_driver); irq_domain_remove(um_pci_msi_domain); diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index e719af8bdf56..588930a0ced1 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -374,45 +374,48 @@ static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, u8 extra_payload[512]; } msg; int rc; + irqreturn_t irq_rc = IRQ_NONE; - rc = vhost_user_recv_req(vu_dev, &msg.msg, - sizeof(msg.msg.payload) + - sizeof(msg.extra_payload)); - - vu_dev->recv_rc = rc; - if (rc) - return IRQ_NONE; - - switch (msg.msg.header.request) { - case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG: - vu_dev->config_changed_irq = true; - response = 0; - break; - case VHOST_USER_SLAVE_VRING_CALL: - virtio_device_for_each_vq((&vu_dev->vdev), vq) { - if (vq->index == msg.msg.payload.vring_state.index) { - response = 0; - vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index); - break; + while (1) { + rc = vhost_user_recv_req(vu_dev, &msg.msg, + sizeof(msg.msg.payload) + + sizeof(msg.extra_payload)); + if (rc) + break; + + switch (msg.msg.header.request) { + case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG: + vu_dev->config_changed_irq = true; + response = 0; + break; + case VHOST_USER_SLAVE_VRING_CALL: + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + if (vq->index == msg.msg.payload.vring_state.index) { + response = 0; + vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index); + break; + } } + break; + case VHOST_USER_SLAVE_IOTLB_MSG: + /* not supported - VIRTIO_F_ACCESS_PLATFORM */ + case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: + /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */ + default: + vu_err(vu_dev, "unexpected slave request %d\n", + msg.msg.header.request); } - break; - case VHOST_USER_SLAVE_IOTLB_MSG: - /* not supported - VIRTIO_F_ACCESS_PLATFORM */ - case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: - /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */ - default: - vu_err(vu_dev, "unexpected slave request %d\n", - msg.msg.header.request); - } - - if (ev && !vu_dev->suspended) - time_travel_add_irq_event(ev); - if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY) - vhost_user_reply(vu_dev, &msg.msg, response); + if (ev && !vu_dev->suspended) + time_travel_add_irq_event(ev); - return IRQ_HANDLED; + if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY) + vhost_user_reply(vu_dev, &msg.msg, response); + irq_rc = IRQ_HANDLED; + }; + /* mask EAGAIN as we try non-blocking read until socket is empty */ + vu_dev->recv_rc = (rc == -EAGAIN) ? 0 : rc; + return irq_rc; } static irqreturn_t vu_req_interrupt(int irq, void *data) diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index d0fc1862da95..bb5f06480da9 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -55,10 +55,6 @@ struct thread_struct { .request = { 0 } \ } -static inline void release_thread(struct task_struct *task) -{ -} - /* * User space process size: 3GB (default). */ diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index e7c7b53a1435..91485119ae67 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -169,7 +169,7 @@ __uml_setup("iomem=", parse_iomem, ); /* - * This list is constructed in parse_iomem and addresses filled in in + * This list is constructed in parse_iomem and addresses filled in * setup_iomem, both of which run during early boot. Afterwards, it's * unchanged. */ diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 80b90b1276a1..010bc422a09d 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -356,7 +356,7 @@ int singlestepping(void * t) unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; + sp -= prandom_u32_max(8192); return sp & ~0xf; } #endif diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index bc38f79ca3a3..ad449173a1a1 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -584,21 +584,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, void flush_tlb_mm(struct mm_struct *mm) { - struct vm_area_struct *vma = mm->mmap; + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); - while (vma != NULL) { + for_each_vma(vmi, vma) fix_range(mm, vma->vm_start, vma->vm_end, 0); - vma = vma->vm_next; - } } void force_flush_all(void) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma = mm->mmap; + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); - while (vma != NULL) { + for_each_vma(vmi, vma) fix_range(mm, vma->vm_start, vma->vm_end, 1); - vma = vma->vm_next; - } } diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index d9e023c78f56..8adf8e89b255 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -96,7 +96,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + return *pos < nr_cpu_ids ? cpu_data + *pos : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) @@ -132,7 +132,7 @@ static int have_root __initdata; static int have_console __initdata; /* Set in uml_mem_setup and modified in linux_main */ -long long physmem_size = 32 * 1024 * 1024; +long long physmem_size = 64 * 1024 * 1024; EXPORT_SYMBOL(physmem_size); static const char *usage_string = @@ -247,13 +247,13 @@ static int panic_exit(struct notifier_block *self, unsigned long unused1, bust_spinlocks(0); uml_exitcode = 1; os_dump_core(); - return 0; + + return NOTIFY_DONE; } static struct notifier_block panic_exit_notifier = { - .notifier_call = panic_exit, - .next = NULL, - .priority = 0 + .notifier_call = panic_exit, + .priority = INT_MAX - 1, /* run as 2nd notifier, won't return */ }; void uml_finishsetup(void) @@ -416,7 +416,7 @@ void __init setup_arch(char **cmdline_p) read_initrd(); paging_init(); - strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); + strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; setup_hostinfo(host_info, sizeof host_info); diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c index 8031a038eb58..72bc60ade347 100644 --- a/arch/um/kernel/umid.c +++ b/arch/um/kernel/umid.c @@ -9,7 +9,7 @@ #include <os.h> /* Changed by set_umid_arg */ -static int umid_inited = 0; +static int umid_inited; static int __init set_umid_arg(char *name, int *add) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 088af7c84e5d..6d1879ef933a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -85,6 +85,7 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_NONLEAF_PMD_YOUNG if PGTABLE_LEVELS > 2 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_COPY_MC if X86_64 select ARCH_HAS_SET_MEMORY @@ -130,7 +131,9 @@ config X86 select CLKEVT_I8253 select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CLOCKSOURCE_WATCHDOG - select DCACHE_WORD_ACCESS + # Word-size accesses may read uninitialized data past the trailing \0 + # in strings and cause false KMSAN reports. + select DCACHE_WORD_ACCESS if !KMSAN select DYNAMIC_SIGFRAME select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT @@ -168,6 +171,7 @@ config X86 select HAVE_ARCH_KASAN if X86_64 select HAVE_ARCH_KASAN_VMALLOC if X86_64 select HAVE_ARCH_KFENCE + select HAVE_ARCH_KMSAN if X86_64 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT @@ -328,6 +332,10 @@ config GENERIC_ISA_DMA def_bool y depends on ISA_DMA_API +config GENERIC_CSUM + bool + default y if KMSAN || KASAN + config GENERIC_BUG def_bool y depends on BUG diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2d7e640674c6..415a5d138de4 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -244,11 +244,6 @@ archheaders: ### # Kernel objects -head-y := arch/x86/kernel/head_$(BITS).o -head-y += arch/x86/kernel/head$(BITS).o -head-y += arch/x86/kernel/ebda.o -head-y += arch/x86/kernel/platform-quirks.o - libs-y += arch/x86/lib/ # drivers-y are linked after core-y diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index ffec8bb01ba8..9860ca5979f8 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -12,6 +12,7 @@ # Sanitizer runtimes are unavailable and cannot be linked for early boot code. KASAN_SANITIZE := n KCSAN_SANITIZE := n +KMSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y # Kernel does not boot with kcov instrumentation here. diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 35ce1a64068b..3a261abb6d15 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -20,6 +20,7 @@ # Sanitizer runtimes are unavailable and cannot be linked for early boot code. KASAN_SANITIZE := n KCSAN_SANITIZE := n +KMSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 4a3f223973f4..e476bcbd9b42 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -29,6 +29,7 @@ #include <linux/uts.h> #include <linux/utsname.h> #include <linux/ctype.h> +#include <generated/utsversion.h> #include <generated/utsrelease.h> #define _SETUP diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c index a1aaaf6c06a6..945383f0f606 100644 --- a/arch/x86/boot/version.c +++ b/arch/x86/boot/version.c @@ -11,6 +11,7 @@ */ #include "boot.h" +#include <generated/utsversion.h> #include <generated/utsrelease.h> #include <generated/compile.h> diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 7207219509f6..3cf34912abfe 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -27,7 +27,6 @@ CONFIG_CGROUP_MISC=y CONFIG_CGROUP_DEBUG=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y -# CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_SMP=y CONFIG_HYPERVISOR_GUEST=y @@ -44,6 +43,7 @@ CONFIG_EFI_STUB=y CONFIG_HZ_1000=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y +# CONFIG_RETHUNK is not set CONFIG_HIBERNATION=y CONFIG_PM_DEBUG=y CONFIG_PM_TRACE_RTC=y @@ -62,6 +62,7 @@ CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_BINFMT_MISC=y +# CONFIG_COMPAT_BRK is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -269,9 +270,10 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_KERNEL=y CONFIG_FRAME_WARN=1024 CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_WX=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 5ce67b73e218..27759236fd60 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -26,7 +26,6 @@ CONFIG_CGROUP_MISC=y CONFIG_CGROUP_DEBUG=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y -# CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_SMP=y CONFIG_HYPERVISOR_GUEST=y @@ -62,6 +61,7 @@ CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_BINFMT_MISC=y +# CONFIG_COMPAT_BRK is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -267,8 +267,9 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_PRINTK_TIME=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_WX=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig new file mode 100644 index 000000000000..71c4c473d34b --- /dev/null +++ b/arch/x86/crypto/Kconfig @@ -0,0 +1,484 @@ +# SPDX-License-Identifier: GPL-2.0 + +menu "Accelerated Cryptographic Algorithms for CPU (x86)" + +config CRYPTO_CURVE25519_X86 + tristate "Public key crypto: Curve25519 (ADX)" + depends on X86 && 64BIT + select CRYPTO_LIB_CURVE25519_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CURVE25519 + help + Curve25519 algorithm + + Architecture: x86_64 using: + - ADX (large integer arithmetic) + +config CRYPTO_AES_NI_INTEL + tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XTR, XTS, GCM (AES-NI)" + depends on X86 + select CRYPTO_AEAD + select CRYPTO_LIB_AES + select CRYPTO_ALGAPI + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + help + Block cipher: AES cipher algorithms + AEAD cipher: AES with GCM + Length-preserving ciphers: AES with ECB, CBC, CTS, CTR, XTR, XTS + + Architecture: x86 (32-bit and 64-bit) using: + - AES-NI (AES new instructions) + +config CRYPTO_BLOWFISH_X86_64 + tristate "Ciphers: Blowfish, modes: ECB, CBC" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_BLOWFISH_COMMON + imply CRYPTO_CTR + help + Block cipher: Blowfish cipher algorithm + Length-preserving ciphers: Blowfish with ECB and CBC modes + + Architecture: x86_64 + +config CRYPTO_CAMELLIA_X86_64 + tristate "Ciphers: Camellia with modes: ECB, CBC" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + imply CRYPTO_CTR + help + Block cipher: Camellia cipher algorithms + Length-preserving ciphers: Camellia with ECB and CBC modes + + Architecture: x86_64 + +config CRYPTO_CAMELLIA_AESNI_AVX_X86_64 + tristate "Ciphers: Camellia with modes: ECB, CBC (AES-NI/AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_CAMELLIA_X86_64 + select CRYPTO_SIMD + imply CRYPTO_XTS + help + Length-preserving ciphers: Camellia with ECB and CBC modes + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - AVX (Advanced Vector Extensions) + +config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 + tristate "Ciphers: Camellia with modes: ECB, CBC (AES-NI/AVX2)" + depends on X86 && 64BIT + select CRYPTO_CAMELLIA_AESNI_AVX_X86_64 + help + Length-preserving ciphers: Camellia with ECB and CBC modes + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - AVX2 (Advanced Vector Extensions 2) + +config CRYPTO_CAST5_AVX_X86_64 + tristate "Ciphers: CAST5 with modes: ECB, CBC (AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_CAST5 + select CRYPTO_CAST_COMMON + select CRYPTO_SIMD + imply CRYPTO_CTR + help + Length-preserving ciphers: CAST5 (CAST-128) cipher algorithm + (RFC2144) with ECB and CBC modes + + Architecture: x86_64 using: + - AVX (Advanced Vector Extensions) + + Processes 16 blocks in parallel. + +config CRYPTO_CAST6_AVX_X86_64 + tristate "Ciphers: CAST6 with modes: ECB, CBC (AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_CAST6 + select CRYPTO_CAST_COMMON + select CRYPTO_SIMD + imply CRYPTO_XTS + imply CRYPTO_CTR + help + Length-preserving ciphers: CAST6 (CAST-256) cipher algorithm + (RFC2612) with ECB and CBC modes + + Architecture: x86_64 using: + - AVX (Advanced Vector Extensions) + + Processes eight blocks in parallel. + +config CRYPTO_DES3_EDE_X86_64 + tristate "Ciphers: Triple DES EDE with modes: ECB, CBC" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_LIB_DES + imply CRYPTO_CTR + help + Block cipher: Triple DES EDE (FIPS 46-3) cipher algorithm + Length-preserving ciphers: Triple DES EDE with ECB and CBC modes + + Architecture: x86_64 + + Processes one or three blocks in parallel. + +config CRYPTO_SERPENT_SSE2_X86_64 + tristate "Ciphers: Serpent with modes: ECB, CBC (SSE2)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SERPENT + select CRYPTO_SIMD + imply CRYPTO_CTR + help + Length-preserving ciphers: Serpent cipher algorithm + with ECB and CBC modes + + Architecture: x86_64 using: + - SSE2 (Streaming SIMD Extensions 2) + + Processes eight blocks in parallel. + +config CRYPTO_SERPENT_SSE2_586 + tristate "Ciphers: Serpent with modes: ECB, CBC (32-bit with SSE2)" + depends on X86 && !64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SERPENT + select CRYPTO_SIMD + imply CRYPTO_CTR + help + Length-preserving ciphers: Serpent cipher algorithm + with ECB and CBC modes + + Architecture: x86 (32-bit) using: + - SSE2 (Streaming SIMD Extensions 2) + + Processes four blocks in parallel. + +config CRYPTO_SERPENT_AVX_X86_64 + tristate "Ciphers: Serpent with modes: ECB, CBC (AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SERPENT + select CRYPTO_SIMD + imply CRYPTO_XTS + imply CRYPTO_CTR + help + Length-preserving ciphers: Serpent cipher algorithm + with ECB and CBC modes + + Architecture: x86_64 using: + - AVX (Advanced Vector Extensions) + + Processes eight blocks in parallel. + +config CRYPTO_SERPENT_AVX2_X86_64 + tristate "Ciphers: Serpent with modes: ECB, CBC (AVX2)" + depends on X86 && 64BIT + select CRYPTO_SERPENT_AVX_X86_64 + help + Length-preserving ciphers: Serpent cipher algorithm + with ECB and CBC modes + + Architecture: x86_64 using: + - AVX2 (Advanced Vector Extensions 2) + + Processes 16 blocks in parallel. + +config CRYPTO_SM4_AESNI_AVX_X86_64 + tristate "Ciphers: SM4 with modes: ECB, CBC, CFB, CTR (AES-NI/AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_SM4 + help + Length-preserving ciphers: SM4 cipher algorithms + (OSCCA GB/T 32907-2016) with ECB, CBC, CFB, and CTR modes + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - AVX (Advanced Vector Extensions) + + Through two affine transforms, + we can use the AES S-Box to simulate the SM4 S-Box to achieve the + effect of instruction acceleration. + + If unsure, say N. + +config CRYPTO_SM4_AESNI_AVX2_X86_64 + tristate "Ciphers: SM4 with modes: ECB, CBC, CFB, CTR (AES-NI/AVX2)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_SM4 + select CRYPTO_SM4_AESNI_AVX_X86_64 + help + Length-preserving ciphers: SM4 cipher algorithms + (OSCCA GB/T 32907-2016) with ECB, CBC, CFB, and CTR modes + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - AVX2 (Advanced Vector Extensions 2) + + Through two affine transforms, + we can use the AES S-Box to simulate the SM4 S-Box to achieve the + effect of instruction acceleration. + + If unsure, say N. + +config CRYPTO_TWOFISH_586 + tristate "Ciphers: Twofish (32-bit)" + depends on (X86 || UML_X86) && !64BIT + select CRYPTO_ALGAPI + select CRYPTO_TWOFISH_COMMON + imply CRYPTO_CTR + help + Block cipher: Twofish cipher algorithm + + Architecture: x86 (32-bit) + +config CRYPTO_TWOFISH_X86_64 + tristate "Ciphers: Twofish" + depends on (X86 || UML_X86) && 64BIT + select CRYPTO_ALGAPI + select CRYPTO_TWOFISH_COMMON + imply CRYPTO_CTR + help + Block cipher: Twofish cipher algorithm + + Architecture: x86_64 + +config CRYPTO_TWOFISH_X86_64_3WAY + tristate "Ciphers: Twofish with modes: ECB, CBC (3-way parallel)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_TWOFISH_COMMON + select CRYPTO_TWOFISH_X86_64 + help + Length-preserving cipher: Twofish cipher algorithm + with ECB and CBC modes + + Architecture: x86_64 + + Processes three blocks in parallel, better utilizing resources of + out-of-order CPUs. + +config CRYPTO_TWOFISH_AVX_X86_64 + tristate "Ciphers: Twofish with modes: ECB, CBC (AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_TWOFISH_COMMON + select CRYPTO_TWOFISH_X86_64 + select CRYPTO_TWOFISH_X86_64_3WAY + imply CRYPTO_XTS + help + Length-preserving cipher: Twofish cipher algorithm + with ECB and CBC modes + + Architecture: x86_64 using: + - AVX (Advanced Vector Extensions) + + Processes eight blocks in parallel. + +config CRYPTO_ARIA_AESNI_AVX_X86_64 + tristate "Ciphers: ARIA with modes: ECB, CTR (AES-NI/AVX/GFNI)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_ARIA + help + Length-preserving cipher: ARIA cipher algorithms + (RFC 5794) with ECB and CTR modes + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - AVX (Advanced Vector Extensions) + - GFNI (Galois Field New Instructions) + + Processes 16 blocks in parallel. + +config CRYPTO_CHACHA20_X86_64 + tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (SSSE3/AVX2/AVX-512VL)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_LIB_CHACHA_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CHACHA + help + Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12 + stream cipher algorithms + + Architecture: x86_64 using: + - SSSE3 (Supplemental SSE3) + - AVX2 (Advanced Vector Extensions 2) + - AVX-512VL (Advanced Vector Extensions-512VL) + +config CRYPTO_AEGIS128_AESNI_SSE2 + tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE2)" + depends on X86 && 64BIT + select CRYPTO_AEAD + select CRYPTO_SIMD + help + AEGIS-128 AEAD algorithm + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - SSE2 (Streaming SIMD Extensions 2) + +config CRYPTO_NHPOLY1305_SSE2 + tristate "Hash functions: NHPoly1305 (SSE2)" + depends on X86 && 64BIT + select CRYPTO_NHPOLY1305 + help + NHPoly1305 hash function for Adiantum + + Architecture: x86_64 using: + - SSE2 (Streaming SIMD Extensions 2) + +config CRYPTO_NHPOLY1305_AVX2 + tristate "Hash functions: NHPoly1305 (AVX2)" + depends on X86 && 64BIT + select CRYPTO_NHPOLY1305 + help + NHPoly1305 hash function for Adiantum + + Architecture: x86_64 using: + - AVX2 (Advanced Vector Extensions 2) + +config CRYPTO_BLAKE2S_X86 + bool "Hash functions: BLAKE2s (SSSE3/AVX-512)" + depends on X86 && 64BIT + select CRYPTO_LIB_BLAKE2S_GENERIC + select CRYPTO_ARCH_HAVE_LIB_BLAKE2S + help + BLAKE2s cryptographic hash function (RFC 7693) + + Architecture: x86_64 using: + - SSSE3 (Supplemental SSE3) + - AVX-512 (Advanced Vector Extensions-512) + +config CRYPTO_POLYVAL_CLMUL_NI + tristate "Hash functions: POLYVAL (CLMUL-NI)" + depends on X86 && 64BIT + select CRYPTO_POLYVAL + help + POLYVAL hash function for HCTR2 + + Architecture: x86_64 using: + - CLMUL-NI (carry-less multiplication new instructions) + +config CRYPTO_POLY1305_X86_64 + tristate "Hash functions: Poly1305 (SSE2/AVX2)" + depends on X86 && 64BIT + select CRYPTO_LIB_POLY1305_GENERIC + select CRYPTO_ARCH_HAVE_LIB_POLY1305 + help + Poly1305 authenticator algorithm (RFC7539) + + Architecture: x86_64 using: + - SSE2 (Streaming SIMD Extensions 2) + - AVX2 (Advanced Vector Extensions 2) + +config CRYPTO_SHA1_SSSE3 + tristate "Hash functions: SHA-1 (SSSE3/AVX/AVX2/SHA-NI)" + depends on X86 && 64BIT + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash algorithm (FIPS 180) + + Architecture: x86_64 using: + - SSSE3 (Supplemental SSE3) + - AVX (Advanced Vector Extensions) + - AVX2 (Advanced Vector Extensions 2) + - SHA-NI (SHA Extensions New Instructions) + +config CRYPTO_SHA256_SSSE3 + tristate "Hash functions: SHA-224 and SHA-256 (SSSE3/AVX/AVX2/SHA-NI)" + depends on X86 && 64BIT + select CRYPTO_SHA256 + select CRYPTO_HASH + help + SHA-224 and SHA-256 secure hash algorithms (FIPS 180) + + Architecture: x86_64 using: + - SSSE3 (Supplemental SSE3) + - AVX (Advanced Vector Extensions) + - AVX2 (Advanced Vector Extensions 2) + - SHA-NI (SHA Extensions New Instructions) + +config CRYPTO_SHA512_SSSE3 + tristate "Hash functions: SHA-384 and SHA-512 (SSSE3/AVX/AVX2)" + depends on X86 && 64BIT + select CRYPTO_SHA512 + select CRYPTO_HASH + help + SHA-384 and SHA-512 secure hash algorithms (FIPS 180) + + Architecture: x86_64 using: + - SSSE3 (Supplemental SSE3) + - AVX (Advanced Vector Extensions) + - AVX2 (Advanced Vector Extensions 2) + +config CRYPTO_SM3_AVX_X86_64 + tristate "Hash functions: SM3 (AVX)" + depends on X86 && 64BIT + select CRYPTO_HASH + select CRYPTO_SM3 + help + SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3 + + Architecture: x86_64 using: + - AVX (Advanced Vector Extensions) + + If unsure, say N. + +config CRYPTO_GHASH_CLMUL_NI_INTEL + tristate "Hash functions: GHASH (CLMUL-NI)" + depends on X86 && 64BIT + select CRYPTO_CRYPTD + help + GCM GHASH hash function (NIST SP800-38D) + + Architecture: x86_64 using: + - CLMUL-NI (carry-less multiplication new instructions) + +config CRYPTO_CRC32C_INTEL + tristate "CRC32c (SSE4.2/PCLMULQDQ)" + depends on X86 + select CRYPTO_HASH + help + CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) + + Architecture: x86 (32-bit and 64-bit) using: + - SSE4.2 (Streaming SIMD Extensions 4.2) CRC32 instruction + - PCLMULQDQ (carry-less multiplication) + +config CRYPTO_CRC32_PCLMUL + tristate "CRC32 (PCLMULQDQ)" + depends on X86 + select CRYPTO_HASH + select CRC32 + help + CRC32 CRC algorithm (IEEE 802.3) + + Architecture: x86 (32-bit and 64-bit) using: + - PCLMULQDQ (carry-less multiplication) + +config CRYPTO_CRCT10DIF_PCLMUL + tristate "CRCT10DIF (PCLMULQDQ)" + depends on X86 && 64BIT && CRC_T10DIF + select CRYPTO_HASH + help + CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF) + + Architecture: x86_64 using: + - PCLMULQDQ (carry-less multiplication) + +endmenu diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 04d07ab744b2..3b1d701a4f6c 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -100,6 +100,9 @@ sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o +obj-$(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) += aria-aesni-avx-x86_64.o +aria-aesni-avx-x86_64-y := aria-aesni-avx-asm_64.o aria_aesni_avx_glue.o + quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $< > $@ $(obj)/%.S: $(src)/%.pl FORCE diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S new file mode 100644 index 000000000000..c75fd7d015ed --- /dev/null +++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S @@ -0,0 +1,1303 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * ARIA Cipher 16-way parallel algorithm (AVX) + * + * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> + * + */ + +#include <linux/linkage.h> +#include <asm/frame.h> + +/* struct aria_ctx: */ +#define enc_key 0 +#define dec_key 272 +#define rounds 544 + +/* register macros */ +#define CTX %rdi + + +#define BV8(a0, a1, a2, a3, a4, a5, a6, a7) \ + ( (((a0) & 1) << 0) | \ + (((a1) & 1) << 1) | \ + (((a2) & 1) << 2) | \ + (((a3) & 1) << 3) | \ + (((a4) & 1) << 4) | \ + (((a5) & 1) << 5) | \ + (((a6) & 1) << 6) | \ + (((a7) & 1) << 7) ) + +#define BM8X8(l0, l1, l2, l3, l4, l5, l6, l7) \ + ( ((l7) << (0 * 8)) | \ + ((l6) << (1 * 8)) | \ + ((l5) << (2 * 8)) | \ + ((l4) << (3 * 8)) | \ + ((l3) << (4 * 8)) | \ + ((l2) << (5 * 8)) | \ + ((l1) << (6 * 8)) | \ + ((l0) << (7 * 8)) ) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +#define byteslice_16x16b(a0, b0, c0, d0, \ + a1, b1, c1, d1, \ + a2, b2, c2, d2, \ + a3, b3, c3, d3, \ + st0, st1) \ + vmovdqu d2, st0; \ + vmovdqu d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu a0, st0; \ + vmovdqu a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vmovdqu .Lshufb_16x16b, a0; \ + vmovdqu st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu d3, st1; \ + vmovdqu st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu d2, st0; \ + \ + transpose_4x4(a0, b0, c0, d0, d2, d3); \ + transpose_4x4(a1, b1, c1, d1, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu b0, st0; \ + vmovdqu b1, st1; \ + transpose_4x4(a2, b2, c2, d2, b0, b1); \ + transpose_4x4(a3, b3, c3, d3, b0, b1); \ + vmovdqu st0, b0; \ + vmovdqu st1, b1; \ + /* does not adjust output bytes inside vectors */ + +#define debyteslice_16x16b(a0, b0, c0, d0, \ + a1, b1, c1, d1, \ + a2, b2, c2, d2, \ + a3, b3, c3, d3, \ + st0, st1) \ + vmovdqu d2, st0; \ + vmovdqu d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu a0, st0; \ + vmovdqu a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vmovdqu .Lshufb_16x16b, a0; \ + vmovdqu st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu d3, st1; \ + vmovdqu st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu d2, st0; \ + \ + transpose_4x4(c0, d0, a0, b0, d2, d3); \ + transpose_4x4(c1, d1, a1, b1, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu b0, st0; \ + vmovdqu b1, st1; \ + transpose_4x4(c2, d2, a2, b2, b0, b1); \ + transpose_4x4(c3, d3, a3, b3, b0, b1); \ + vmovdqu st0, b0; \ + vmovdqu st1, b1; \ + /* does not adjust output bytes inside vectors */ + +/* load blocks to registers and apply pre-whitening */ +#define inpack16_pre(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + rio) \ + vmovdqu (0 * 16)(rio), x0; \ + vmovdqu (1 * 16)(rio), x1; \ + vmovdqu (2 * 16)(rio), x2; \ + vmovdqu (3 * 16)(rio), x3; \ + vmovdqu (4 * 16)(rio), x4; \ + vmovdqu (5 * 16)(rio), x5; \ + vmovdqu (6 * 16)(rio), x6; \ + vmovdqu (7 * 16)(rio), x7; \ + vmovdqu (8 * 16)(rio), y0; \ + vmovdqu (9 * 16)(rio), y1; \ + vmovdqu (10 * 16)(rio), y2; \ + vmovdqu (11 * 16)(rio), y3; \ + vmovdqu (12 * 16)(rio), y4; \ + vmovdqu (13 * 16)(rio), y5; \ + vmovdqu (14 * 16)(rio), y6; \ + vmovdqu (15 * 16)(rio), y7; + +/* byteslice pre-whitened blocks and store to temporary memory */ +#define inpack16_post(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_ab, mem_cd) \ + byteslice_16x16b(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + (mem_ab), (mem_cd)); \ + \ + vmovdqu x0, 0 * 16(mem_ab); \ + vmovdqu x1, 1 * 16(mem_ab); \ + vmovdqu x2, 2 * 16(mem_ab); \ + vmovdqu x3, 3 * 16(mem_ab); \ + vmovdqu x4, 4 * 16(mem_ab); \ + vmovdqu x5, 5 * 16(mem_ab); \ + vmovdqu x6, 6 * 16(mem_ab); \ + vmovdqu x7, 7 * 16(mem_ab); \ + vmovdqu y0, 0 * 16(mem_cd); \ + vmovdqu y1, 1 * 16(mem_cd); \ + vmovdqu y2, 2 * 16(mem_cd); \ + vmovdqu y3, 3 * 16(mem_cd); \ + vmovdqu y4, 4 * 16(mem_cd); \ + vmovdqu y5, 5 * 16(mem_cd); \ + vmovdqu y6, 6 * 16(mem_cd); \ + vmovdqu y7, 7 * 16(mem_cd); + +#define write_output(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem) \ + vmovdqu x0, 0 * 16(mem); \ + vmovdqu x1, 1 * 16(mem); \ + vmovdqu x2, 2 * 16(mem); \ + vmovdqu x3, 3 * 16(mem); \ + vmovdqu x4, 4 * 16(mem); \ + vmovdqu x5, 5 * 16(mem); \ + vmovdqu x6, 6 * 16(mem); \ + vmovdqu x7, 7 * 16(mem); \ + vmovdqu y0, 8 * 16(mem); \ + vmovdqu y1, 9 * 16(mem); \ + vmovdqu y2, 10 * 16(mem); \ + vmovdqu y3, 11 * 16(mem); \ + vmovdqu y4, 12 * 16(mem); \ + vmovdqu y5, 13 * 16(mem); \ + vmovdqu y6, 14 * 16(mem); \ + vmovdqu y7, 15 * 16(mem); \ + +#define aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, idx) \ + vmovdqu x0, ((idx + 0) * 16)(mem_tmp); \ + vmovdqu x1, ((idx + 1) * 16)(mem_tmp); \ + vmovdqu x2, ((idx + 2) * 16)(mem_tmp); \ + vmovdqu x3, ((idx + 3) * 16)(mem_tmp); \ + vmovdqu x4, ((idx + 4) * 16)(mem_tmp); \ + vmovdqu x5, ((idx + 5) * 16)(mem_tmp); \ + vmovdqu x6, ((idx + 6) * 16)(mem_tmp); \ + vmovdqu x7, ((idx + 7) * 16)(mem_tmp); + +#define aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, idx) \ + vmovdqu ((idx + 0) * 16)(mem_tmp), x0; \ + vmovdqu ((idx + 1) * 16)(mem_tmp), x1; \ + vmovdqu ((idx + 2) * 16)(mem_tmp), x2; \ + vmovdqu ((idx + 3) * 16)(mem_tmp), x3; \ + vmovdqu ((idx + 4) * 16)(mem_tmp), x4; \ + vmovdqu ((idx + 5) * 16)(mem_tmp), x5; \ + vmovdqu ((idx + 6) * 16)(mem_tmp), x6; \ + vmovdqu ((idx + 7) * 16)(mem_tmp), x7; + +#define aria_ark_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + t0, rk, idx, round) \ + /* AddRoundKey */ \ + vpbroadcastb ((round * 16) + idx + 3)(rk), t0; \ + vpxor t0, x0, x0; \ + vpbroadcastb ((round * 16) + idx + 2)(rk), t0; \ + vpxor t0, x1, x1; \ + vpbroadcastb ((round * 16) + idx + 1)(rk), t0; \ + vpxor t0, x2, x2; \ + vpbroadcastb ((round * 16) + idx + 0)(rk), t0; \ + vpxor t0, x3, x3; \ + vpbroadcastb ((round * 16) + idx + 7)(rk), t0; \ + vpxor t0, x4, x4; \ + vpbroadcastb ((round * 16) + idx + 6)(rk), t0; \ + vpxor t0, x5, x5; \ + vpbroadcastb ((round * 16) + idx + 5)(rk), t0; \ + vpxor t0, x6, x6; \ + vpbroadcastb ((round * 16) + idx + 4)(rk), t0; \ + vpxor t0, x7, x7; + +#define aria_sbox_8way_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + t0, t1, t2, t3, \ + t4, t5, t6, t7) \ + vpbroadcastq .Ltf_s2_bitmatrix, t0; \ + vpbroadcastq .Ltf_inv_bitmatrix, t1; \ + vpbroadcastq .Ltf_id_bitmatrix, t2; \ + vpbroadcastq .Ltf_aff_bitmatrix, t3; \ + vpbroadcastq .Ltf_x2_bitmatrix, t4; \ + vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ + vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ + vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ + vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \ + vgf2p8affineinvqb $0, t2, x2, x2; \ + vgf2p8affineinvqb $0, t2, x6, x6; \ + vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \ + vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \ + vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \ + vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \ + vgf2p8affineinvqb $0, t2, x3, x3; \ + vgf2p8affineinvqb $0, t2, x7, x7 + +#define aria_sbox_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + t0, t1, t2, t3, \ + t4, t5, t6, t7) \ + vpxor t7, t7, t7; \ + vmovdqa .Linv_shift_row, t0; \ + vmovdqa .Lshift_row, t1; \ + vpbroadcastd .L0f0f0f0f, t6; \ + vmovdqa .Ltf_lo__inv_aff__and__s2, t2; \ + vmovdqa .Ltf_hi__inv_aff__and__s2, t3; \ + vmovdqa .Ltf_lo__x2__and__fwd_aff, t4; \ + vmovdqa .Ltf_hi__x2__and__fwd_aff, t5; \ + \ + vaesenclast t7, x0, x0; \ + vaesenclast t7, x4, x4; \ + vaesenclast t7, x1, x1; \ + vaesenclast t7, x5, x5; \ + vaesdeclast t7, x2, x2; \ + vaesdeclast t7, x6, x6; \ + \ + /* AES inverse shift rows */ \ + vpshufb t0, x0, x0; \ + vpshufb t0, x4, x4; \ + vpshufb t0, x1, x1; \ + vpshufb t0, x5, x5; \ + vpshufb t1, x3, x3; \ + vpshufb t1, x7, x7; \ + vpshufb t1, x2, x2; \ + vpshufb t1, x6, x6; \ + \ + /* affine transformation for S2 */ \ + filter_8bit(x1, t2, t3, t6, t0); \ + /* affine transformation for S2 */ \ + filter_8bit(x5, t2, t3, t6, t0); \ + \ + /* affine transformation for X2 */ \ + filter_8bit(x3, t4, t5, t6, t0); \ + /* affine transformation for X2 */ \ + filter_8bit(x7, t4, t5, t6, t0); \ + vaesdeclast t7, x3, x3; \ + vaesdeclast t7, x7, x7; + +#define aria_diff_m(x0, x1, x2, x3, \ + t0, t1, t2, t3) \ + /* T = rotr32(X, 8); */ \ + /* X ^= T */ \ + vpxor x0, x3, t0; \ + vpxor x1, x0, t1; \ + vpxor x2, x1, t2; \ + vpxor x3, x2, t3; \ + /* X = T ^ rotr(X, 16); */ \ + vpxor t2, x0, x0; \ + vpxor x1, t3, t3; \ + vpxor t0, x2, x2; \ + vpxor t1, x3, x1; \ + vmovdqu t3, x3; + +#define aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7) \ + /* t1 ^= t2; */ \ + vpxor y0, x4, x4; \ + vpxor y1, x5, x5; \ + vpxor y2, x6, x6; \ + vpxor y3, x7, x7; \ + \ + /* t2 ^= t3; */ \ + vpxor y4, y0, y0; \ + vpxor y5, y1, y1; \ + vpxor y6, y2, y2; \ + vpxor y7, y3, y3; \ + \ + /* t0 ^= t1; */ \ + vpxor x4, x0, x0; \ + vpxor x5, x1, x1; \ + vpxor x6, x2, x2; \ + vpxor x7, x3, x3; \ + \ + /* t3 ^= t1; */ \ + vpxor x4, y4, y4; \ + vpxor x5, y5, y5; \ + vpxor x6, y6, y6; \ + vpxor x7, y7, y7; \ + \ + /* t2 ^= t0; */ \ + vpxor x0, y0, y0; \ + vpxor x1, y1, y1; \ + vpxor x2, y2, y2; \ + vpxor x3, y3, y3; \ + \ + /* t1 ^= t2; */ \ + vpxor y0, x4, x4; \ + vpxor y1, x5, x5; \ + vpxor y2, x6, x6; \ + vpxor y3, x7, x7; + +#define aria_fe(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T3 = ABCD -> BADC \ + * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \ + * T0 = ABCD -> CDAB \ + * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \ + * T1 = ABCD -> DCBA \ + * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \ + */ \ + aria_diff_word(x2, x3, x0, x1, \ + x7, x6, x5, x4, \ + y0, y1, y2, y3, \ + y5, y4, y7, y6); \ + aria_store_state_8way(x3, x2, x1, x0, \ + x6, x7, x4, x5, \ + mem_tmp, 0); + +#define aria_fo(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T1 = ABCD -> BADC \ + * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \ + * T2 = ABCD -> CDAB \ + * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \ + * T3 = ABCD -> DCBA \ + * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \ + */ \ + aria_diff_word(x0, x1, x2, x3, \ + x5, x4, x7, x6, \ + y2, y3, y0, y1, \ + y7, y6, y5, y4); \ + aria_store_state_8way(x3, x2, x1, x0, \ + x6, x7, x4, x5, \ + mem_tmp, 0); + +#define aria_ff(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round, last_round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, last_round); \ + \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, last_round); \ + \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); + +#define aria_fe_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T3 = ABCD -> BADC \ + * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \ + * T0 = ABCD -> CDAB \ + * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \ + * T1 = ABCD -> DCBA \ + * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \ + */ \ + aria_diff_word(x2, x3, x0, x1, \ + x7, x6, x5, x4, \ + y0, y1, y2, y3, \ + y5, y4, y7, y6); \ + aria_store_state_8way(x3, x2, x1, x0, \ + x6, x7, x4, x5, \ + mem_tmp, 0); + +#define aria_fo_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T1 = ABCD -> BADC \ + * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \ + * T2 = ABCD -> CDAB \ + * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \ + * T3 = ABCD -> DCBA \ + * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \ + */ \ + aria_diff_word(x0, x1, x2, x3, \ + x5, x4, x7, x6, \ + y2, y3, y0, y1, \ + y7, y6, y5, y4); \ + aria_store_state_8way(x3, x2, x1, x0, \ + x6, x7, x4, x5, \ + mem_tmp, 0); + +#define aria_ff_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round, last_round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, last_round); \ + \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, last_round); \ + \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); + +/* NB: section is mergeable, all elements must be aligned 16-byte blocks */ +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +#define SHUFB_BYTES(idx) \ + 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) + +.Lshufb_16x16b: + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3); +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 +.Lshift_row: + .byte 0x00, 0x05, 0x0a, 0x0f, 0x04, 0x09, 0x0e, 0x03 + .byte 0x08, 0x0d, 0x02, 0x07, 0x0c, 0x01, 0x06, 0x0b +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + .byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + +/* AES inverse affine and S2 combined: + * 1 1 0 0 0 0 0 1 x0 0 + * 0 1 0 0 1 0 0 0 x1 0 + * 1 1 0 0 1 1 1 1 x2 0 + * 0 1 1 0 1 0 0 1 x3 1 + * 0 1 0 0 1 1 0 0 * x4 + 0 + * 0 1 0 1 1 0 0 0 x5 0 + * 0 0 0 0 0 1 0 1 x6 0 + * 1 1 1 0 0 1 1 1 x7 1 + */ +.Ltf_lo__inv_aff__and__s2: + .octa 0x92172DA81A9FA520B2370D883ABF8500 +.Ltf_hi__inv_aff__and__s2: + .octa 0x2B15FFC1AF917B45E6D8320C625CB688 + +/* X2 and AES forward affine combined: + * 1 0 1 1 0 0 0 1 x0 0 + * 0 1 1 1 1 0 1 1 x1 0 + * 0 0 0 1 1 0 1 0 x2 1 + * 0 1 0 0 0 1 0 0 x3 0 + * 0 0 1 1 1 0 1 1 * x4 + 0 + * 0 1 0 0 1 0 0 0 x5 0 + * 1 1 0 1 0 0 1 1 x6 0 + * 0 1 0 0 1 0 1 0 x7 0 + */ +.Ltf_lo__x2__and__fwd_aff: + .octa 0xEFAE0544FCBD1657B8F95213ABEA4100 +.Ltf_hi__x2__and__fwd_aff: + .octa 0x3F893781E95FE1576CDA64D2BA0CB204 + +.section .rodata.cst8, "aM", @progbits, 8 +.align 8 +/* AES affine: */ +#define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0) +.Ltf_aff_bitmatrix: + .quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1), + BV8(1, 1, 0, 0, 0, 1, 1, 1), + BV8(1, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 1, 0, 0, 0, 1), + BV8(1, 1, 1, 1, 1, 0, 0, 0), + BV8(0, 1, 1, 1, 1, 1, 0, 0), + BV8(0, 0, 1, 1, 1, 1, 1, 0), + BV8(0, 0, 0, 1, 1, 1, 1, 1)) + +/* AES inverse affine: */ +#define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0) +.Ltf_inv_bitmatrix: + .quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1), + BV8(1, 0, 0, 1, 0, 0, 1, 0), + BV8(0, 1, 0, 0, 1, 0, 0, 1), + BV8(1, 0, 1, 0, 0, 1, 0, 0), + BV8(0, 1, 0, 1, 0, 0, 1, 0), + BV8(0, 0, 1, 0, 1, 0, 0, 1), + BV8(1, 0, 0, 1, 0, 1, 0, 0), + BV8(0, 1, 0, 0, 1, 0, 1, 0)) + +/* S2: */ +#define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1) +.Ltf_s2_bitmatrix: + .quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1), + BV8(0, 0, 1, 1, 1, 1, 1, 1), + BV8(1, 1, 1, 0, 1, 1, 0, 1), + BV8(1, 1, 0, 0, 0, 0, 1, 1), + BV8(0, 1, 0, 0, 0, 0, 1, 1), + BV8(1, 1, 0, 0, 1, 1, 1, 0), + BV8(0, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 1, 0, 1, 1, 0)) + +/* X2: */ +#define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0) +.Ltf_x2_bitmatrix: + .quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0), + BV8(0, 0, 1, 0, 0, 1, 1, 0), + BV8(0, 0, 0, 0, 1, 0, 1, 0), + BV8(1, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 0, 1, 1, 0, 0), + BV8(0, 1, 1, 0, 1, 0, 1, 1), + BV8(1, 0, 1, 1, 1, 1, 0, 1), + BV8(1, 0, 0, 1, 0, 0, 1, 1)) + +/* Identity matrix: */ +.Ltf_id_bitmatrix: + .quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0), + BV8(0, 1, 0, 0, 0, 0, 0, 0), + BV8(0, 0, 1, 0, 0, 0, 0, 0), + BV8(0, 0, 0, 1, 0, 0, 0, 0), + BV8(0, 0, 0, 0, 1, 0, 0, 0), + BV8(0, 0, 0, 0, 0, 1, 0, 0), + BV8(0, 0, 0, 0, 0, 0, 1, 0), + BV8(0, 0, 0, 0, 0, 0, 0, 1)) + +/* 4-bit mask */ +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 +.align 4 +.L0f0f0f0f: + .long 0x0f0f0f0f + +.text + +SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way) + /* input: + * %r9: rk + * %rsi: dst + * %rdx: src + * %xmm0..%xmm15: 16 byte-sliced blocks + */ + + FRAME_BEGIN + + movq %rsi, %rax; + leaq 8 * 16(%rax), %r8; + + inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r8); + aria_fo(%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 0); + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 1); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 2); + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 3); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 4); + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 5); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 6); + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 7); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 8); + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 9); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 10); + cmpl $12, rounds(CTX); + jne .Laria_192; + aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 11, 12); + jmp .Laria_end; +.Laria_192: + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 11); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 12); + cmpl $14, rounds(CTX); + jne .Laria_256; + aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 13, 14); + jmp .Laria_end; +.Laria_256: + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 13); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 14); + aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 15, 16); +.Laria_end: + debyteslice_16x16b(%xmm8, %xmm12, %xmm1, %xmm4, + %xmm9, %xmm13, %xmm0, %xmm5, + %xmm10, %xmm14, %xmm3, %xmm6, + %xmm11, %xmm15, %xmm2, %xmm7, + (%rax), (%r8)); + + FRAME_END + RET; +SYM_FUNC_END(__aria_aesni_avx_crypt_16way) + +SYM_FUNC_START(aria_aesni_avx_encrypt_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq enc_key(CTX), %r9; + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx); + + call __aria_aesni_avx_crypt_16way; + + write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx_encrypt_16way) + +SYM_FUNC_START(aria_aesni_avx_decrypt_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq dec_key(CTX), %r9; + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx); + + call __aria_aesni_avx_crypt_16way; + + write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx_decrypt_16way) + +SYM_FUNC_START_LOCAL(__aria_aesni_avx_ctr_gen_keystream_16way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: keystream + * %r8: iv (big endian, 128bit) + */ + + FRAME_BEGIN + /* load IV and byteswap */ + vmovdqu (%r8), %xmm8; + + vmovdqa .Lbswap128_mask (%rip), %xmm1; + vpshufb %xmm1, %xmm8, %xmm3; /* be => le */ + + vpcmpeqd %xmm0, %xmm0, %xmm0; + vpsrldq $8, %xmm0, %xmm0; /* low: -1, high: 0 */ + + /* construct IVs */ + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm9; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm10; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm11; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm12; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm13; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm14; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm15; + vmovdqu %xmm8, (0 * 16)(%rcx); + vmovdqu %xmm9, (1 * 16)(%rcx); + vmovdqu %xmm10, (2 * 16)(%rcx); + vmovdqu %xmm11, (3 * 16)(%rcx); + vmovdqu %xmm12, (4 * 16)(%rcx); + vmovdqu %xmm13, (5 * 16)(%rcx); + vmovdqu %xmm14, (6 * 16)(%rcx); + vmovdqu %xmm15, (7 * 16)(%rcx); + + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm8; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm9; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm10; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm11; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm12; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm13; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm14; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm15; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm4; + vmovdqu %xmm4, (%r8); + + vmovdqu (0 * 16)(%rcx), %xmm0; + vmovdqu (1 * 16)(%rcx), %xmm1; + vmovdqu (2 * 16)(%rcx), %xmm2; + vmovdqu (3 * 16)(%rcx), %xmm3; + vmovdqu (4 * 16)(%rcx), %xmm4; + vmovdqu (5 * 16)(%rcx), %xmm5; + vmovdqu (6 * 16)(%rcx), %xmm6; + vmovdqu (7 * 16)(%rcx), %xmm7; + + FRAME_END + RET; +SYM_FUNC_END(__aria_aesni_avx_ctr_gen_keystream_16way) + +SYM_FUNC_START(aria_aesni_avx_ctr_crypt_16way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: keystream + * %r8: iv (big endian, 128bit) + */ + FRAME_BEGIN + + call __aria_aesni_avx_ctr_gen_keystream_16way; + + leaq (%rsi), %r10; + leaq (%rdx), %r11; + leaq (%rcx), %rsi; + leaq (%rcx), %rdx; + leaq enc_key(CTX), %r9; + + call __aria_aesni_avx_crypt_16way; + + vpxor (0 * 16)(%r11), %xmm1, %xmm1; + vpxor (1 * 16)(%r11), %xmm0, %xmm0; + vpxor (2 * 16)(%r11), %xmm3, %xmm3; + vpxor (3 * 16)(%r11), %xmm2, %xmm2; + vpxor (4 * 16)(%r11), %xmm4, %xmm4; + vpxor (5 * 16)(%r11), %xmm5, %xmm5; + vpxor (6 * 16)(%r11), %xmm6, %xmm6; + vpxor (7 * 16)(%r11), %xmm7, %xmm7; + vpxor (8 * 16)(%r11), %xmm8, %xmm8; + vpxor (9 * 16)(%r11), %xmm9, %xmm9; + vpxor (10 * 16)(%r11), %xmm10, %xmm10; + vpxor (11 * 16)(%r11), %xmm11, %xmm11; + vpxor (12 * 16)(%r11), %xmm12, %xmm12; + vpxor (13 * 16)(%r11), %xmm13, %xmm13; + vpxor (14 * 16)(%r11), %xmm14, %xmm14; + vpxor (15 * 16)(%r11), %xmm15, %xmm15; + write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %r10); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx_ctr_crypt_16way) + +SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way) + /* input: + * %r9: rk + * %rsi: dst + * %rdx: src + * %xmm0..%xmm15: 16 byte-sliced blocks + */ + + FRAME_BEGIN + + movq %rsi, %rax; + leaq 8 * 16(%rax), %r8; + + inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r8); + aria_fo_gfni(%xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 0); + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 1); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 2); + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 3); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 4); + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 5); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 6); + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 7); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 8); + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 9); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 10); + cmpl $12, rounds(CTX); + jne .Laria_gfni_192; + aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 11, 12); + jmp .Laria_gfni_end; +.Laria_gfni_192: + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 11); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 12); + cmpl $14, rounds(CTX); + jne .Laria_gfni_256; + aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 13, 14); + jmp .Laria_gfni_end; +.Laria_gfni_256: + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 13); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 14); + aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 15, 16); +.Laria_gfni_end: + debyteslice_16x16b(%xmm8, %xmm12, %xmm1, %xmm4, + %xmm9, %xmm13, %xmm0, %xmm5, + %xmm10, %xmm14, %xmm3, %xmm6, + %xmm11, %xmm15, %xmm2, %xmm7, + (%rax), (%r8)); + + FRAME_END + RET; +SYM_FUNC_END(__aria_aesni_avx_gfni_crypt_16way) + +SYM_FUNC_START(aria_aesni_avx_gfni_encrypt_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq enc_key(CTX), %r9; + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx); + + call __aria_aesni_avx_gfni_crypt_16way; + + write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx_gfni_encrypt_16way) + +SYM_FUNC_START(aria_aesni_avx_gfni_decrypt_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq dec_key(CTX), %r9; + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx); + + call __aria_aesni_avx_gfni_crypt_16way; + + write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx_gfni_decrypt_16way) + +SYM_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: keystream + * %r8: iv (big endian, 128bit) + */ + FRAME_BEGIN + + call __aria_aesni_avx_ctr_gen_keystream_16way + + leaq (%rsi), %r10; + leaq (%rdx), %r11; + leaq (%rcx), %rsi; + leaq (%rcx), %rdx; + leaq enc_key(CTX), %r9; + + call __aria_aesni_avx_gfni_crypt_16way; + + vpxor (0 * 16)(%r11), %xmm1, %xmm1; + vpxor (1 * 16)(%r11), %xmm0, %xmm0; + vpxor (2 * 16)(%r11), %xmm3, %xmm3; + vpxor (3 * 16)(%r11), %xmm2, %xmm2; + vpxor (4 * 16)(%r11), %xmm4, %xmm4; + vpxor (5 * 16)(%r11), %xmm5, %xmm5; + vpxor (6 * 16)(%r11), %xmm6, %xmm6; + vpxor (7 * 16)(%r11), %xmm7, %xmm7; + vpxor (8 * 16)(%r11), %xmm8, %xmm8; + vpxor (9 * 16)(%r11), %xmm9, %xmm9; + vpxor (10 * 16)(%r11), %xmm10, %xmm10; + vpxor (11 * 16)(%r11), %xmm11, %xmm11; + vpxor (12 * 16)(%r11), %xmm12, %xmm12; + vpxor (13 * 16)(%r11), %xmm13, %xmm13; + vpxor (14 * 16)(%r11), %xmm14, %xmm14; + vpxor (15 * 16)(%r11), %xmm15, %xmm15; + write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %r10); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx_gfni_ctr_crypt_16way) diff --git a/arch/x86/crypto/aria-avx.h b/arch/x86/crypto/aria-avx.h new file mode 100644 index 000000000000..01e9a01dc157 --- /dev/null +++ b/arch/x86/crypto/aria-avx.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef ASM_X86_ARIA_AVX_H +#define ASM_X86_ARIA_AVX_H + +#include <linux/types.h> + +#define ARIA_AESNI_PARALLEL_BLOCKS 16 +#define ARIA_AESNI_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * 16) + +struct aria_avx_ops { + void (*aria_encrypt_16way)(const void *ctx, u8 *dst, const u8 *src); + void (*aria_decrypt_16way)(const void *ctx, u8 *dst, const u8 *src); + void (*aria_ctr_crypt_16way)(const void *ctx, u8 *dst, const u8 *src, + u8 *keystream, u8 *iv); +}; +#endif diff --git a/arch/x86/crypto/aria_aesni_avx_glue.c b/arch/x86/crypto/aria_aesni_avx_glue.c new file mode 100644 index 000000000000..c561ea4fefa5 --- /dev/null +++ b/arch/x86/crypto/aria_aesni_avx_glue.c @@ -0,0 +1,213 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Glue Code for the AVX/AES-NI/GFNI assembler implementation of the ARIA Cipher + * + * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> + */ + +#include <crypto/algapi.h> +#include <crypto/internal/simd.h> +#include <crypto/aria.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/types.h> + +#include "ecb_cbc_helpers.h" +#include "aria-avx.h" + +asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst, + const u8 *src, + u8 *keystream, u8 *iv); +asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst, + const u8 *src, + u8 *keystream, u8 *iv); + +static struct aria_avx_ops aria_ops; + +static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey) +{ + ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_encrypt_16way); + ECB_BLOCK(1, aria_encrypt); + ECB_WALK_END(); +} + +static int ecb_do_decrypt(struct skcipher_request *req, const u32 *rkey) +{ + ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_decrypt_16way); + ECB_BLOCK(1, aria_decrypt); + ECB_WALK_END(); +} + +static int aria_avx_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_encrypt(req, ctx->enc_key[0]); +} + +static int aria_avx_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_decrypt(req, ctx->dec_key[0]); +} + +static int aria_avx_set_key(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + return aria_set_key(&tfm->base, key, keylen); +} + +static int aria_avx_ctr_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) { + u8 keystream[ARIA_AESNI_PARALLEL_BLOCK_SIZE]; + + kernel_fpu_begin(); + aria_ops.aria_ctr_crypt_16way(ctx, dst, src, keystream, + walk.iv); + kernel_fpu_end(); + dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE; + src += ARIA_AESNI_PARALLEL_BLOCK_SIZE; + nbytes -= ARIA_AESNI_PARALLEL_BLOCK_SIZE; + } + + while (nbytes >= ARIA_BLOCK_SIZE) { + u8 keystream[ARIA_BLOCK_SIZE]; + + memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE); + crypto_inc(walk.iv, ARIA_BLOCK_SIZE); + + aria_encrypt(ctx, keystream, keystream); + + crypto_xor_cpy(dst, src, keystream, ARIA_BLOCK_SIZE); + dst += ARIA_BLOCK_SIZE; + src += ARIA_BLOCK_SIZE; + nbytes -= ARIA_BLOCK_SIZE; + } + + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[ARIA_BLOCK_SIZE]; + + memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE); + crypto_inc(walk.iv, ARIA_BLOCK_SIZE); + + aria_encrypt(ctx, keystream, keystream); + + crypto_xor_cpy(dst, src, keystream, nbytes); + dst += nbytes; + src += nbytes; + nbytes = 0; + } + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static struct skcipher_alg aria_algs[] = { + { + .base.cra_name = "__ecb(aria)", + .base.cra_driver_name = "__ecb-aria-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = ARIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aria_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = ARIA_MIN_KEY_SIZE, + .max_keysize = ARIA_MAX_KEY_SIZE, + .setkey = aria_avx_set_key, + .encrypt = aria_avx_ecb_encrypt, + .decrypt = aria_avx_ecb_decrypt, + }, { + .base.cra_name = "__ctr(aria)", + .base.cra_driver_name = "__ctr-aria-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aria_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = ARIA_MIN_KEY_SIZE, + .max_keysize = ARIA_MAX_KEY_SIZE, + .ivsize = ARIA_BLOCK_SIZE, + .chunksize = ARIA_BLOCK_SIZE, + .walksize = 16 * ARIA_BLOCK_SIZE, + .setkey = aria_avx_set_key, + .encrypt = aria_avx_ctr_encrypt, + .decrypt = aria_avx_ctr_encrypt, + } +}; + +static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)]; + +static int __init aria_avx_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + if (boot_cpu_has(X86_FEATURE_GFNI)) { + aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; + aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; + aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way; + } else { + aria_ops.aria_encrypt_16way = aria_aesni_avx_encrypt_16way; + aria_ops.aria_decrypt_16way = aria_aesni_avx_decrypt_16way; + aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_ctr_crypt_16way; + } + + return simd_register_skciphers_compat(aria_algs, + ARRAY_SIZE(aria_algs), + aria_simd_algs); +} + +static void __exit aria_avx_exit(void) +{ + simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs), + aria_simd_algs); +} + +module_init(aria_avx_init); +module_exit(aria_avx_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>"); +MODULE_DESCRIPTION("ARIA Cipher Algorithm, AVX/AES-NI/GFNI optimized"); +MODULE_ALIAS_CRYPTO("aria"); +MODULE_ALIAS_CRYPTO("aria-aesni-avx"); diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 30e70f4fe2f7..6d3b85e53d0e 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -36,6 +36,7 @@ #include <linux/types.h> #include <crypto/sha2.h> #include <crypto/sha512_base.h> +#include <asm/cpu_device_id.h> #include <asm/simd.h> asmlinkage void sha512_transform_ssse3(struct sha512_state *state, @@ -284,6 +285,13 @@ static int register_sha512_avx2(void) ARRAY_SIZE(sha512_avx2_algs)); return 0; } +static const struct x86_cpu_id module_cpu_ids[] = { + X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), + X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), + X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); static void unregister_sha512_avx2(void) { @@ -294,6 +302,8 @@ static void unregister_sha512_avx2(void) static int __init sha512_ssse3_mod_init(void) { + if (!x86_match_cpu(module_cpu_ids)) + return -ENODEV; if (register_sha512_ssse3()) goto fail; diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 381d3333b996..3e88b9df8c8f 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -11,6 +11,9 @@ include $(srctree)/lib/vdso/Makefile # Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n +KMSAN_SANITIZE_vclock_gettime.o := n +KMSAN_SANITIZE_vgetcpu.o := n + UBSAN_SANITIZE := n KCSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 1000d457c332..311eae30e089 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -127,17 +127,17 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) { struct mm_struct *mm = task->mm; struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); mmap_read_lock(mm); - - for (vma = mm->mmap; vma; vma = vma->vm_next) { + for_each_vma(vmi, vma) { unsigned long size = vma->vm_end - vma->vm_start; if (vma_is_special_mapping(vma, &vvar_mapping)) zap_page_range(vma, vma->vm_start, size); } - mmap_read_unlock(mm); + return 0; } #else @@ -327,7 +327,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) end -= len; if (end > start) { - offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); + offset = prandom_u32_max(((end - start) >> PAGE_SHIFT) + 1); addr = start + (offset << PAGE_SHIFT); } else { addr = start; @@ -354,6 +354,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); mmap_write_lock(mm); /* @@ -363,7 +364,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr) * We could search vma near context.vdso, but it's a slowpath, * so let's explicitly check all VMAs to be completely sure. */ - for (vma = mm->mmap; vma; vma = vma->vm_next) { + for_each_vma(vmi, vma) { if (vma_is_special_mapping(vma, &vdso_mapping) || vma_is_special_mapping(vma, &vvar_mapping)) { mmap_write_unlock(mm); diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index 9933c0e8e97a..86a76efa8bb6 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += core.o probe.o +obj-y += core.o probe.o utils.o obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o obj-y += amd/ obj-$(CONFIG_X86_LOCAL_APIC) += msr.o diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile index b9f5d4610256..527d947eb76b 100644 --- a/arch/x86/events/amd/Makefile +++ b/arch/x86/events/amd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CPU_SUP_AMD) += core.o +obj-$(CONFIG_CPU_SUP_AMD) += core.o lbr.o obj-$(CONFIG_PERF_EVENTS_AMD_BRS) += brs.o obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c index bee8765a1e9b..f1bff153d945 100644 --- a/arch/x86/events/amd/brs.c +++ b/arch/x86/events/amd/brs.c @@ -81,7 +81,7 @@ static bool __init amd_brs_detect(void) * a br_sel_map. Software filtering is not supported because it would not correlate well * with a sampling period. */ -int amd_brs_setup_filter(struct perf_event *event) +static int amd_brs_setup_filter(struct perf_event *event) { u64 type = event->attr.branch_sample_type; @@ -96,6 +96,73 @@ int amd_brs_setup_filter(struct perf_event *event) return 0; } +static inline int amd_is_brs_event(struct perf_event *e) +{ + return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT; +} + +int amd_brs_hw_config(struct perf_event *event) +{ + int ret = 0; + + /* + * Due to interrupt holding, BRS is not recommended in + * counting mode. + */ + if (!is_sampling_event(event)) + return -EINVAL; + + /* + * Due to the way BRS operates by holding the interrupt until + * lbr_nr entries have been captured, it does not make sense + * to allow sampling on BRS with an event that does not match + * what BRS is capturing, i.e., retired taken branches. + * Otherwise the correlation with the event's period is even + * more loose: + * + * With retired taken branch: + * Effective P = P + 16 + X + * With any other event: + * Effective P = P + Y + X + * + * Where X is the number of taken branches due to interrupt + * skid. Skid is large. + * + * Where Y is the occurences of the event while BRS is + * capturing the lbr_nr entries. + * + * By using retired taken branches, we limit the impact on the + * Y variable. We know it cannot be more than the depth of + * BRS. + */ + if (!amd_is_brs_event(event)) + return -EINVAL; + + /* + * BRS implementation does not work with frequency mode + * reprogramming of the period. + */ + if (event->attr.freq) + return -EINVAL; + /* + * The kernel subtracts BRS depth from period, so it must + * be big enough. + */ + if (event->attr.sample_period <= x86_pmu.lbr_nr) + return -EINVAL; + + /* + * Check if we can allow PERF_SAMPLE_BRANCH_STACK + */ + ret = amd_brs_setup_filter(event); + + /* only set in case of success */ + if (!ret) + event->hw.flags |= PERF_X86_EVENT_AMD_BRS; + + return ret; +} + /* tos = top of stack, i.e., last valid entry written */ static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg) { diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 9ac3718410ce..8b70237c33f7 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -330,16 +330,10 @@ static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc) } } -#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */ -static inline int amd_is_brs_event(struct perf_event *e) -{ - return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT; -} +DEFINE_STATIC_CALL_RET0(amd_pmu_branch_hw_config, *x86_pmu.hw_config); static int amd_core_hw_config(struct perf_event *event) { - int ret = 0; - if (event->attr.exclude_host && event->attr.exclude_guest) /* * When HO == GO == 1 the hardware treats that as GO == HO == 0 @@ -356,66 +350,10 @@ static int amd_core_hw_config(struct perf_event *event) if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw)) event->hw.flags |= PERF_X86_EVENT_PAIR; - /* - * if branch stack is requested - */ - if (has_branch_stack(event)) { - /* - * Due to interrupt holding, BRS is not recommended in - * counting mode. - */ - if (!is_sampling_event(event)) - return -EINVAL; + if (has_branch_stack(event)) + return static_call(amd_pmu_branch_hw_config)(event); - /* - * Due to the way BRS operates by holding the interrupt until - * lbr_nr entries have been captured, it does not make sense - * to allow sampling on BRS with an event that does not match - * what BRS is capturing, i.e., retired taken branches. - * Otherwise the correlation with the event's period is even - * more loose: - * - * With retired taken branch: - * Effective P = P + 16 + X - * With any other event: - * Effective P = P + Y + X - * - * Where X is the number of taken branches due to interrupt - * skid. Skid is large. - * - * Where Y is the occurences of the event while BRS is - * capturing the lbr_nr entries. - * - * By using retired taken branches, we limit the impact on the - * Y variable. We know it cannot be more than the depth of - * BRS. - */ - if (!amd_is_brs_event(event)) - return -EINVAL; - - /* - * BRS implementation does not work with frequency mode - * reprogramming of the period. - */ - if (event->attr.freq) - return -EINVAL; - /* - * The kernel subtracts BRS depth from period, so it must - * be big enough. - */ - if (event->attr.sample_period <= x86_pmu.lbr_nr) - return -EINVAL; - - /* - * Check if we can allow PERF_SAMPLE_BRANCH_STACK - */ - ret = amd_brs_setup_filter(event); - - /* only set in case of success */ - if (!ret) - event->hw.flags |= PERF_X86_EVENT_AMD_BRS; - } - return ret; + return 0; } static inline int amd_is_nb_event(struct hw_perf_event *hwc) @@ -582,8 +520,14 @@ static struct amd_nb *amd_alloc_nb(int cpu) return nb; } +typedef void (amd_pmu_branch_reset_t)(void); +DEFINE_STATIC_CALL_NULL(amd_pmu_branch_reset, amd_pmu_branch_reset_t); + static void amd_pmu_cpu_reset(int cpu) { + if (x86_pmu.lbr_nr) + static_call(amd_pmu_branch_reset)(); + if (x86_pmu.version < 2) return; @@ -598,16 +542,24 @@ static int amd_pmu_cpu_prepare(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + cpuc->lbr_sel = kzalloc_node(sizeof(struct er_account), GFP_KERNEL, + cpu_to_node(cpu)); + if (!cpuc->lbr_sel) + return -ENOMEM; + WARN_ON_ONCE(cpuc->amd_nb); if (!x86_pmu.amd_nb_constraints) return 0; cpuc->amd_nb = amd_alloc_nb(cpu); - if (!cpuc->amd_nb) - return -ENOMEM; + if (cpuc->amd_nb) + return 0; - return 0; + kfree(cpuc->lbr_sel); + cpuc->lbr_sel = NULL; + + return -ENOMEM; } static void amd_pmu_cpu_starting(int cpu) @@ -640,19 +592,19 @@ static void amd_pmu_cpu_starting(int cpu) cpuc->amd_nb->nb_id = nb_id; cpuc->amd_nb->refcnt++; - amd_brs_reset(); amd_pmu_cpu_reset(cpu); } static void amd_pmu_cpu_dead(int cpu) { - struct cpu_hw_events *cpuhw; + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + kfree(cpuhw->lbr_sel); + cpuhw->lbr_sel = NULL; if (!x86_pmu.amd_nb_constraints) return; - cpuhw = &per_cpu(cpu_hw_events, cpu); - if (cpuhw->amd_nb) { struct amd_nb *nb = cpuhw->amd_nb; @@ -677,7 +629,7 @@ static inline u64 amd_pmu_get_global_status(void) /* PerfCntrGlobalStatus is read-only */ rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status); - return status & amd_pmu_global_cntr_mask; + return status; } static inline void amd_pmu_ack_global_status(u64 status) @@ -688,8 +640,6 @@ static inline void amd_pmu_ack_global_status(u64 status) * clears the same bit in PerfCntrGlobalStatus */ - /* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */ - status &= amd_pmu_global_cntr_mask; wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); } @@ -799,11 +749,17 @@ static void amd_pmu_v2_enable_event(struct perf_event *event) __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } -static void amd_pmu_v2_enable_all(int added) +static __always_inline void amd_pmu_core_enable_all(void) { amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask); } +static void amd_pmu_v2_enable_all(int added) +{ + amd_pmu_lbr_enable_all(); + amd_pmu_core_enable_all(); +} + static void amd_pmu_disable_event(struct perf_event *event) { x86_pmu_disable_event(event); @@ -828,23 +784,32 @@ static void amd_pmu_disable_all(void) amd_pmu_check_overflow(); } -static void amd_pmu_v2_disable_all(void) +static __always_inline void amd_pmu_core_disable_all(void) { - /* Disable all PMCs */ amd_pmu_set_global_ctl(0); +} + +static void amd_pmu_v2_disable_all(void) +{ + amd_pmu_core_disable_all(); + amd_pmu_lbr_disable_all(); amd_pmu_check_overflow(); } +DEFINE_STATIC_CALL_NULL(amd_pmu_branch_add, *x86_pmu.add); + static void amd_pmu_add_event(struct perf_event *event) { if (needs_branch_stack(event)) - amd_pmu_brs_add(event); + static_call(amd_pmu_branch_add)(event); } +DEFINE_STATIC_CALL_NULL(amd_pmu_branch_del, *x86_pmu.del); + static void amd_pmu_del_event(struct perf_event *event) { if (needs_branch_stack(event)) - amd_pmu_brs_del(event); + static_call(amd_pmu_branch_del)(event); } /* @@ -930,8 +895,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) pmu_enabled = cpuc->enabled; cpuc->enabled = 0; - /* Stop counting */ - amd_pmu_v2_disable_all(); + /* Stop counting but do not disable LBR */ + amd_pmu_core_disable_all(); status = amd_pmu_get_global_status(); @@ -939,6 +904,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!status) goto done; + /* Read branch records before unfreezing */ + if (status & GLOBAL_STATUS_LBRS_FROZEN) { + amd_pmu_lbr_read(); + status &= ~GLOBAL_STATUS_LBRS_FROZEN; + } + for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) continue; @@ -958,6 +929,11 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!x86_perf_event_set_period(event)) continue; + if (has_branch_stack(event)) { + data.br_stack = &cpuc->lbr_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); @@ -971,7 +947,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) */ WARN_ON(status > 0); - /* Clear overflow bits */ + /* Clear overflow and freeze bits */ amd_pmu_ack_global_status(~status); /* @@ -985,7 +961,7 @@ done: /* Resume counting only if PMU is active */ if (pmu_enabled) - amd_pmu_v2_enable_all(0); + amd_pmu_core_enable_all(); return amd_pmu_adjust_nmi_window(handled); } @@ -1248,23 +1224,14 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config) return x86_event_sysfs_show(page, config, event); } -static void amd_pmu_sched_task(struct perf_event_context *ctx, - bool sched_in) -{ - if (sched_in && x86_pmu.lbr_nr) - amd_pmu_brs_sched_task(ctx, sched_in); -} - -static u64 amd_pmu_limit_period(struct perf_event *event, u64 left) +static void amd_pmu_limit_period(struct perf_event *event, s64 *left) { /* * Decrease period by the depth of the BRS feature to get the last N * taken branches and approximate the desired period */ - if (has_branch_stack(event) && left > x86_pmu.lbr_nr) - left -= x86_pmu.lbr_nr; - - return left; + if (has_branch_stack(event) && *left > x86_pmu.lbr_nr) + *left -= x86_pmu.lbr_nr; } static __initconst const struct x86_pmu amd_pmu = { @@ -1311,23 +1278,25 @@ static ssize_t branches_show(struct device *cdev, static DEVICE_ATTR_RO(branches); -static struct attribute *amd_pmu_brs_attrs[] = { +static struct attribute *amd_pmu_branches_attrs[] = { &dev_attr_branches.attr, NULL, }; static umode_t -amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i) +amd_branches_is_visible(struct kobject *kobj, struct attribute *attr, int i) { return x86_pmu.lbr_nr ? attr->mode : 0; } -static struct attribute_group group_caps_amd_brs = { +static struct attribute_group group_caps_amd_branches = { .name = "caps", - .attrs = amd_pmu_brs_attrs, - .is_visible = amd_brs_is_visible, + .attrs = amd_pmu_branches_attrs, + .is_visible = amd_branches_is_visible, }; +#ifdef CONFIG_PERF_EVENTS_AMD_BRS + EVENT_ATTR_STR(branch-brs, amd_branch_brs, "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n"); @@ -1336,15 +1305,26 @@ static struct attribute *amd_brs_events_attrs[] = { NULL, }; +static umode_t +amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return static_cpu_has(X86_FEATURE_BRS) && x86_pmu.lbr_nr ? + attr->mode : 0; +} + static struct attribute_group group_events_amd_brs = { .name = "events", .attrs = amd_brs_events_attrs, .is_visible = amd_brs_is_visible, }; +#endif /* CONFIG_PERF_EVENTS_AMD_BRS */ + static const struct attribute_group *amd_attr_update[] = { - &group_caps_amd_brs, + &group_caps_amd_branches, +#ifdef CONFIG_PERF_EVENTS_AMD_BRS &group_events_amd_brs, +#endif NULL, }; @@ -1421,13 +1401,27 @@ static int __init amd_core_pmu_init(void) x86_pmu.flags |= PMU_FL_PAIR; } - /* - * BRS requires special event constraints and flushing on ctxsw. - */ - if (boot_cpu_data.x86 >= 0x19 && !amd_brs_init()) { + /* LBR and BRS are mutually exclusive features */ + if (!amd_pmu_lbr_init()) { + /* LBR requires flushing on context switch */ + x86_pmu.sched_task = amd_pmu_lbr_sched_task; + static_call_update(amd_pmu_branch_hw_config, amd_pmu_lbr_hw_config); + static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset); + static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add); + static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del); + } else if (!amd_brs_init()) { + /* + * BRS requires special event constraints and flushing on ctxsw. + */ x86_pmu.get_event_constraints = amd_get_event_constraints_f19h; - x86_pmu.sched_task = amd_pmu_sched_task; + x86_pmu.sched_task = amd_pmu_brs_sched_task; x86_pmu.limit_period = amd_pmu_limit_period; + + static_call_update(amd_pmu_branch_hw_config, amd_brs_hw_config); + static_call_update(amd_pmu_branch_reset, amd_brs_reset); + static_call_update(amd_pmu_branch_add, amd_pmu_brs_add); + static_call_update(amd_pmu_branch_del, amd_pmu_brs_del); + /* * put_event_constraints callback same as Fam17h, set above */ diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index c251bc44c088..3271735f0070 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -300,16 +300,6 @@ static int perf_ibs_init(struct perf_event *event) hwc->config_base = perf_ibs->msr; hwc->config = config; - /* - * rip recorded by IbsOpRip will not be consistent with rsp and rbp - * recorded as part of interrupt regs. Thus we need to use rip from - * interrupt regs while unwinding call stack. Setting _EARLY flag - * makes sure we unwind call-stack before perf sample rip is set to - * IbsOpRip. - */ - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) - event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; - return 0; } @@ -688,6 +678,339 @@ static struct perf_ibs perf_ibs_op = { .get_count = get_ibs_op_count, }; +static void perf_ibs_get_mem_op(union ibs_op_data3 *op_data3, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + + data_src->mem_op = PERF_MEM_OP_NA; + + if (op_data3->ld_op) + data_src->mem_op = PERF_MEM_OP_LOAD; + else if (op_data3->st_op) + data_src->mem_op = PERF_MEM_OP_STORE; +} + +/* + * Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has + * more fine granular DataSrc encodings. Others have coarse. + */ +static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2) +{ + if (ibs_caps & IBS_CAPS_ZEN4) + return (op_data2->data_src_hi << 3) | op_data2->data_src_lo; + + return op_data2->data_src_lo; +} + +static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, + union ibs_op_data3 *op_data3, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + u8 ibs_data_src = perf_ibs_data_src(op_data2); + + data_src->mem_lvl = 0; + + /* + * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached + * memory accesses. So, check DcUcMemAcc bit early. + */ + if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) { + data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT; + return; + } + + /* L1 Hit */ + if (op_data3->dc_miss == 0) { + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + return; + } + + /* L2 Hit */ + if (op_data3->l2_miss == 0) { + /* Erratum #1293 */ + if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF || + !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + return; + } + } + + /* + * OP_DATA2 is valid only for load ops. Skip all checks which + * uses OP_DATA2[DataSrc]. + */ + if (data_src->mem_op != PERF_MEM_OP_LOAD) + goto check_mab; + + /* L3 Hit */ + if (ibs_caps & IBS_CAPS_ZEN4) { + if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + return; + } + } else { + if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 | + PERF_MEM_LVL_HIT; + return; + } + } + + /* A peer cache in a near CCX */ + if (ibs_caps & IBS_CAPS_ZEN4 && + ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + return; + } + + /* A peer cache in a far CCX */ + if (ibs_caps & IBS_CAPS_ZEN4) { + if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT; + return; + } + } else { + if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT; + return; + } + } + + /* DRAM */ + if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) { + if (op_data2->rmt_node == 0) + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + else + data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; + return; + } + + /* PMEM */ + if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) { + data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM; + if (op_data2->rmt_node) { + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + /* IBS doesn't provide Remote socket detail */ + data_src->mem_hops = PERF_MEM_HOPS_1; + } + return; + } + + /* Extension Memory */ + if (ibs_caps & IBS_CAPS_ZEN4 && + ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) { + data_src->mem_lvl_num = PERF_MEM_LVLNUM_EXTN_MEM; + if (op_data2->rmt_node) { + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + /* IBS doesn't provide Remote socket detail */ + data_src->mem_hops = PERF_MEM_HOPS_1; + } + return; + } + + /* IO */ + if (ibs_data_src == IBS_DATA_SRC_EXT_IO) { + data_src->mem_lvl = PERF_MEM_LVL_IO; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; + if (op_data2->rmt_node) { + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + /* IBS doesn't provide Remote socket detail */ + data_src->mem_hops = PERF_MEM_HOPS_1; + } + return; + } + +check_mab: + /* + * MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding + * DC misses. However, such data may come from any level in mem + * hierarchy. IBS provides detail about both MAB as well as actual + * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set + * MAB only when IBS fails to provide DataSrc. + */ + if (op_data3->dc_miss_no_mab_alloc) { + data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; + return; + } + + data_src->mem_lvl = PERF_MEM_LVL_NA; +} + +static bool perf_ibs_cache_hit_st_valid(void) +{ + /* 0: Uninitialized, 1: Valid, -1: Invalid */ + static int cache_hit_st_valid; + + if (unlikely(!cache_hit_st_valid)) { + if (boot_cpu_data.x86 == 0x19 && + (boot_cpu_data.x86_model <= 0xF || + (boot_cpu_data.x86_model >= 0x20 && + boot_cpu_data.x86_model <= 0x5F))) { + cache_hit_st_valid = -1; + } else { + cache_hit_st_valid = 1; + } + } + + return cache_hit_st_valid == 1; +} + +static void perf_ibs_get_mem_snoop(union ibs_op_data2 *op_data2, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + u8 ibs_data_src; + + data_src->mem_snoop = PERF_MEM_SNOOP_NA; + + if (!perf_ibs_cache_hit_st_valid() || + data_src->mem_op != PERF_MEM_OP_LOAD || + data_src->mem_lvl & PERF_MEM_LVL_L1 || + data_src->mem_lvl & PERF_MEM_LVL_L2 || + op_data2->cache_hit_st) + return; + + ibs_data_src = perf_ibs_data_src(op_data2); + + if (ibs_caps & IBS_CAPS_ZEN4) { + if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE || + ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE || + ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + } else if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) { + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + } +} + +static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + + data_src->mem_dtlb = PERF_MEM_TLB_NA; + + if (!op_data3->dc_lin_addr_valid) + return; + + if (!op_data3->dc_l1tlb_miss) { + data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT; + return; + } + + if (!op_data3->dc_l2tlb_miss) { + data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT; + return; + } + + data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS; +} + +static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + + data_src->mem_lock = PERF_MEM_LOCK_NA; + + if (op_data3->dc_locked_op) + data_src->mem_lock = PERF_MEM_LOCK_LOCKED; +} + +#define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL) + +static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, + struct perf_sample_data *data, + union ibs_op_data2 *op_data2, + union ibs_op_data3 *op_data3) +{ + perf_ibs_get_mem_lvl(op_data2, op_data3, data); + perf_ibs_get_mem_snoop(op_data2, data); + perf_ibs_get_tlb_lvl(op_data3, data); + perf_ibs_get_mem_lock(op_data3, data); +} + +static __u64 perf_ibs_get_op_data2(struct perf_ibs_data *ibs_data, + union ibs_op_data3 *op_data3) +{ + __u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)]; + + /* Erratum #1293 */ + if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF && + (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { + /* + * OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode. + * DataSrc=0 is 'No valid status' and RmtNode is invalid when + * DataSrc=0. + */ + val = 0; + } + return val; +} + +static void perf_ibs_parse_ld_st_data(__u64 sample_type, + struct perf_ibs_data *ibs_data, + struct perf_sample_data *data) +{ + union ibs_op_data3 op_data3; + union ibs_op_data2 op_data2; + union ibs_op_data op_data; + + data->data_src.val = PERF_MEM_NA; + op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; + + perf_ibs_get_mem_op(&op_data3, data); + if (data->data_src.mem_op != PERF_MEM_OP_LOAD && + data->data_src.mem_op != PERF_MEM_OP_STORE) + return; + + op_data2.val = perf_ibs_get_op_data2(ibs_data, &op_data3); + + if (sample_type & PERF_SAMPLE_DATA_SRC) { + perf_ibs_get_data_src(ibs_data, data, &op_data2, &op_data3); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } + + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE && op_data3.dc_miss && + data->data_src.mem_op == PERF_MEM_OP_LOAD) { + op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; + + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { + data->weight.var1_dw = op_data3.dc_miss_lat; + data->weight.var2_w = op_data.tag_to_ret_ctr; + } else if (sample_type & PERF_SAMPLE_WEIGHT) { + data->weight.full = op_data3.dc_miss_lat; + } + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } + + if (sample_type & PERF_SAMPLE_ADDR && op_data3.dc_lin_addr_valid) { + data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; + data->sample_flags |= PERF_SAMPLE_ADDR; + } + + if (sample_type & PERF_SAMPLE_PHYS_ADDR && op_data3.dc_phy_addr_valid) { + data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)]; + data->sample_flags |= PERF_SAMPLE_PHYS_ADDR; + } +} + +static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, + int check_rip) +{ + if (sample_type & PERF_SAMPLE_RAW || + (perf_ibs == &perf_ibs_op && + (sample_type & PERF_SAMPLE_DATA_SRC || + sample_type & PERF_SAMPLE_WEIGHT_TYPE || + sample_type & PERF_SAMPLE_ADDR || + sample_type & PERF_SAMPLE_PHYS_ADDR))) + return perf_ibs->offset_max; + else if (check_rip) + return 3; + return 1; +} + static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) { struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); @@ -735,12 +1058,9 @@ fail: size = 1; offset = 1; check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); - if (event->attr.sample_type & PERF_SAMPLE_RAW) - offset_max = perf_ibs->offset_max; - else if (check_rip) - offset_max = 3; - else - offset_max = 1; + + offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip); + do { rdmsrl(msr + offset, *buf++); size++; @@ -791,15 +1111,21 @@ fail: }, }; data.raw = &raw; + data.sample_flags |= PERF_SAMPLE_RAW; } + if (perf_ibs == &perf_ibs_op) + perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data); + /* * rip recorded by IbsOpRip will not be consistent with rsp and rbp * recorded as part of interrupt regs. Thus we need to use rip from * interrupt regs while unwinding call stack. */ - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { data.callchain = perf_callchain(event, iregs); + data.sample_flags |= PERF_SAMPLE_CALLCHAIN; + } throttle = perf_event_overflow(event, &data, ®s); out: diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c new file mode 100644 index 000000000000..38a75216c12c --- /dev/null +++ b/arch/x86/events/amd/lbr.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/perf_event.h> +#include <asm/perf_event.h> + +#include "../perf_event.h" + +/* LBR Branch Select valid bits */ +#define LBR_SELECT_MASK 0x1ff + +/* + * LBR Branch Select filter bits which when set, ensures that the + * corresponding type of branches are not recorded + */ +#define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */ +#define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */ +#define LBR_SELECT_JCC 2 /* Conditional branches */ +#define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */ +#define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */ +#define LBR_SELECT_RET_NEAR 5 /* Near returns */ +#define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */ +#define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */ +#define LBR_SELECT_FAR_BRANCH 8 /* Far branches */ + +#define LBR_KERNEL BIT(LBR_SELECT_KERNEL) +#define LBR_USER BIT(LBR_SELECT_USER) +#define LBR_JCC BIT(LBR_SELECT_JCC) +#define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL) +#define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND) +#define LBR_RETURN BIT(LBR_SELECT_RET_NEAR) +#define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL) +#define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND) +#define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH) +#define LBR_NOT_SUPP -1 /* unsupported filter */ +#define LBR_IGNORE 0 + +#define LBR_ANY \ + (LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \ + LBR_REL_JMP | LBR_IND_JMP | LBR_FAR) + +struct branch_entry { + union { + struct { + u64 ip:58; + u64 ip_sign_ext:5; + u64 mispredict:1; + } split; + u64 full; + } from; + + union { + struct { + u64 ip:58; + u64 ip_sign_ext:3; + u64 reserved:1; + u64 spec:1; + u64 valid:1; + } split; + u64 full; + } to; +}; + +static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val) +{ + wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); +} + +static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val) +{ + wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); +} + +static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx) +{ + u64 val; + + rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); + + return val; +} + +static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx) +{ + u64 val; + + rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); + + return val; +} + +static __always_inline u64 sign_ext_branch_ip(u64 ip) +{ + u32 shift = 64 - boot_cpu_data.x86_virt_bits; + + return (u64)(((s64)ip << shift) >> shift); +} + +static void amd_pmu_lbr_filter(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int br_sel = cpuc->br_sel, offset, type, i, j; + bool compress = false; + bool fused_only = false; + u64 from, to; + + /* If sampling all branches, there is nothing to filter */ + if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && + ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) + fused_only = true; + + for (i = 0; i < cpuc->lbr_stack.nr; i++) { + from = cpuc->lbr_entries[i].from; + to = cpuc->lbr_entries[i].to; + type = branch_type_fused(from, to, 0, &offset); + + /* + * Adjust the branch from address in case of instruction + * fusion where it points to an instruction preceding the + * actual branch + */ + if (offset) { + cpuc->lbr_entries[i].from += offset; + if (fused_only) + continue; + } + + /* If type does not correspond, then discard */ + if (type == X86_BR_NONE || (br_sel & type) != type) { + cpuc->lbr_entries[i].from = 0; /* mark invalid */ + compress = true; + } + + if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) + cpuc->lbr_entries[i].type = common_branch_type(type); + } + + if (!compress) + return; + + /* Remove all invalid entries */ + for (i = 0; i < cpuc->lbr_stack.nr; ) { + if (!cpuc->lbr_entries[i].from) { + j = i; + while (++j < cpuc->lbr_stack.nr) + cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j]; + cpuc->lbr_stack.nr--; + if (!cpuc->lbr_entries[i].from) + continue; + } + i++; + } +} + +static const int lbr_spec_map[PERF_BR_SPEC_MAX] = { + PERF_BR_SPEC_NA, + PERF_BR_SPEC_WRONG_PATH, + PERF_BR_NON_SPEC_CORRECT_PATH, + PERF_BR_SPEC_CORRECT_PATH, +}; + +void amd_pmu_lbr_read(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_branch_entry *br = cpuc->lbr_entries; + struct branch_entry entry; + int out = 0, idx, i; + + if (!cpuc->lbr_users) + return; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + entry.from.full = amd_pmu_lbr_get_from(i); + entry.to.full = amd_pmu_lbr_get_to(i); + + /* + * Check if a branch has been logged; if valid = 0, spec = 0 + * then no branch was recorded + */ + if (!entry.to.split.valid && !entry.to.split.spec) + continue; + + perf_clear_branch_entry_bitfields(br + out); + + br[out].from = sign_ext_branch_ip(entry.from.split.ip); + br[out].to = sign_ext_branch_ip(entry.to.split.ip); + br[out].mispred = entry.from.split.mispredict; + br[out].predicted = !br[out].mispred; + + /* + * Set branch speculation information using the status of + * the valid and spec bits. + * + * When valid = 0, spec = 0, no branch was recorded and the + * entry is discarded as seen above. + * + * When valid = 0, spec = 1, the recorded branch was + * speculative but took the wrong path. + * + * When valid = 1, spec = 0, the recorded branch was + * non-speculative but took the correct path. + * + * When valid = 1, spec = 1, the recorded branch was + * speculative and took the correct path + */ + idx = (entry.to.split.valid << 1) | entry.to.split.spec; + br[out].spec = lbr_spec_map[idx]; + out++; + } + + cpuc->lbr_stack.nr = out; + + /* + * Internal register renaming always ensures that LBR From[0] and + * LBR To[0] always represent the TOS + */ + cpuc->lbr_stack.hw_idx = 0; + + /* Perform further software filtering */ + amd_pmu_lbr_filter(); +} + +static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE, + + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP, + [PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP, + [PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, + + [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP, + [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, + + [PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP, + [PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP, +}; + +static int amd_pmu_lbr_setup_filter(struct perf_event *event) +{ + struct hw_perf_event_extra *reg = &event->hw.branch_reg; + u64 br_type = event->attr.branch_sample_type; + u64 mask = 0, v; + int i; + + /* No LBR support */ + if (!x86_pmu.lbr_nr) + return -EOPNOTSUPP; + + if (br_type & PERF_SAMPLE_BRANCH_USER) + mask |= X86_BR_USER; + + if (br_type & PERF_SAMPLE_BRANCH_KERNEL) + mask |= X86_BR_KERNEL; + + /* Ignore BRANCH_HV here */ + + if (br_type & PERF_SAMPLE_BRANCH_ANY) + mask |= X86_BR_ANY; + + if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) + mask |= X86_BR_ANY_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; + + if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) + mask |= X86_BR_IND_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_COND) + mask |= X86_BR_JCC; + + if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) + mask |= X86_BR_IND_JMP; + + if (br_type & PERF_SAMPLE_BRANCH_CALL) + mask |= X86_BR_CALL | X86_BR_ZERO_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) + mask |= X86_BR_TYPE_SAVE; + + reg->reg = mask; + mask = 0; + + for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { + if (!(br_type & BIT_ULL(i))) + continue; + + v = lbr_select_map[i]; + if (v == LBR_NOT_SUPP) + return -EOPNOTSUPP; + + if (v != LBR_IGNORE) + mask |= v; + } + + /* Filter bits operate in suppress mode */ + reg->config = mask ^ LBR_SELECT_MASK; + + return 0; +} + +int amd_pmu_lbr_hw_config(struct perf_event *event) +{ + int ret = 0; + + /* LBR is not recommended in counting mode */ + if (!is_sampling_event(event)) + return -EINVAL; + + ret = amd_pmu_lbr_setup_filter(event); + if (!ret) + event->attach_state |= PERF_ATTACH_SCHED_CB; + + return ret; +} + +void amd_pmu_lbr_reset(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int i; + + if (!x86_pmu.lbr_nr) + return; + + /* Reset all branch records individually */ + for (i = 0; i < x86_pmu.lbr_nr; i++) { + amd_pmu_lbr_set_from(i, 0); + amd_pmu_lbr_set_to(i, 0); + } + + cpuc->last_task_ctx = NULL; + cpuc->last_log_id = 0; + wrmsrl(MSR_AMD64_LBR_SELECT, 0); +} + +void amd_pmu_lbr_add(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event_extra *reg = &event->hw.branch_reg; + + if (!x86_pmu.lbr_nr) + return; + + if (has_branch_stack(event)) { + cpuc->lbr_select = 1; + cpuc->lbr_sel->config = reg->config; + cpuc->br_sel = reg->reg; + } + + perf_sched_cb_inc(event->ctx->pmu); + + if (!cpuc->lbr_users++ && !event->total_time_running) + amd_pmu_lbr_reset(); +} + +void amd_pmu_lbr_del(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + if (has_branch_stack(event)) + cpuc->lbr_select = 0; + + cpuc->lbr_users--; + WARN_ON_ONCE(cpuc->lbr_users < 0); + perf_sched_cb_dec(event->ctx->pmu); +} + +void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * A context switch can flip the address space and LBR entries are + * not tagged with an identifier. Hence, branches cannot be resolved + * from the old address space and the LBR records should be wiped. + */ + if (cpuc->lbr_users && sched_in) + amd_pmu_lbr_reset(); +} + +void amd_pmu_lbr_enable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 lbr_select, dbg_ctl, dbg_extn_cfg; + + if (!cpuc->lbr_users || !x86_pmu.lbr_nr) + return; + + /* Set hardware branch filter */ + if (cpuc->lbr_select) { + lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK; + wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); + } + + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); +} + +void amd_pmu_lbr_disable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 dbg_ctl, dbg_extn_cfg; + + if (!cpuc->lbr_users || !x86_pmu.lbr_nr) + return; + + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + + wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); +} + +__init int amd_pmu_lbr_init(void) +{ + union cpuid_0x80000022_ebx ebx; + + if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2)) + return -EOPNOTSUPP; + + /* Set number of entries */ + ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); + x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz; + + pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); + + return 0; +} diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index f969410d0c90..b30b8bbcd1e2 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -72,6 +72,10 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add); DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del); DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read); +DEFINE_STATIC_CALL_NULL(x86_pmu_set_period, *x86_pmu.set_period); +DEFINE_STATIC_CALL_NULL(x86_pmu_update, *x86_pmu.update); +DEFINE_STATIC_CALL_NULL(x86_pmu_limit_period, *x86_pmu.limit_period); + DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events); DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints); DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints); @@ -116,9 +120,6 @@ u64 x86_perf_event_update(struct perf_event *event) if (unlikely(!hwc->event_base)) return 0; - if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event) - return x86_pmu.update_topdown_event(event); - /* * Careful: an NMI might modify the previous event value. * @@ -621,8 +622,9 @@ int x86_pmu_hw_config(struct perf_event *event) event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; if (event->attr.sample_period && x86_pmu.limit_period) { - if (x86_pmu.limit_period(event, event->attr.sample_period) > - event->attr.sample_period) + s64 left = event->attr.sample_period; + x86_pmu.limit_period(event, &left); + if (left > event->attr.sample_period) return -EINVAL; } @@ -1354,7 +1356,7 @@ static void x86_pmu_enable(struct pmu *pmu) static_call(x86_pmu_enable_all)(added); } -static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); +DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); /* * Set the next IRQ period, based on the hwc->period_left value. @@ -1370,10 +1372,6 @@ int x86_perf_event_set_period(struct perf_event *event) if (unlikely(!hwc->event_base)) return 0; - if (unlikely(is_topdown_count(event)) && - x86_pmu.set_topdown_event_period) - return x86_pmu.set_topdown_event_period(event); - /* * If we are way outside a reasonable range then just skip forward: */ @@ -1399,10 +1397,9 @@ int x86_perf_event_set_period(struct perf_event *event) if (left > x86_pmu.max_period) left = x86_pmu.max_period; - if (x86_pmu.limit_period) - left = x86_pmu.limit_period(event, left); + static_call_cond(x86_pmu_limit_period)(event, &left); - per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; + this_cpu_write(pmc_prev_left[idx], left); /* * The hw event starts counting from this event offset, @@ -1419,16 +1416,6 @@ int x86_perf_event_set_period(struct perf_event *event) if (is_counter_pair(hwc)) wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff); - /* - * Due to erratum on certan cpu we need - * a second write to be sure the register - * is updated properly - */ - if (x86_pmu.perfctr_second_write) { - wrmsrl(hwc->event_base, - (u64)(-left) & x86_pmu.cntval_mask); - } - perf_event_update_userpage(event); return ret; @@ -1518,7 +1505,7 @@ static void x86_pmu_start(struct perf_event *event, int flags) if (flags & PERF_EF_RELOAD) { WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); - x86_perf_event_set_period(event); + static_call(x86_pmu_set_period)(event); } event->hw.state = 0; @@ -1610,7 +1597,7 @@ void x86_pmu_stop(struct perf_event *event, int flags) * Drain the remaining delta count out of a event * that we are disabling: */ - x86_perf_event_update(event); + static_call(x86_pmu_update)(event); hwc->state |= PERF_HES_UPTODATE; } } @@ -1700,7 +1687,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) event = cpuc->events[idx]; - val = x86_perf_event_update(event); + val = static_call(x86_pmu_update)(event); if (val & (1ULL << (x86_pmu.cntval_bits - 1))) continue; @@ -1709,13 +1696,15 @@ int x86_pmu_handle_irq(struct pt_regs *regs) */ handled++; - if (!x86_perf_event_set_period(event)) + if (!static_call(x86_pmu_set_period)(event)) continue; perf_sample_data_init(&data, 0, event->hw.last_period); - if (has_branch_stack(event)) + if (has_branch_stack(event)) { data.br_stack = &cpuc->lbr_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); @@ -2023,6 +2012,10 @@ static void x86_pmu_static_call_update(void) static_call_update(x86_pmu_del, x86_pmu.del); static_call_update(x86_pmu_read, x86_pmu.read); + static_call_update(x86_pmu_set_period, x86_pmu.set_period); + static_call_update(x86_pmu_update, x86_pmu.update); + static_call_update(x86_pmu_limit_period, x86_pmu.limit_period); + static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events); static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints); static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints); @@ -2042,7 +2035,7 @@ static void x86_pmu_static_call_update(void) static void _x86_pmu_read(struct perf_event *event) { - x86_perf_event_update(event); + static_call(x86_pmu_update)(event); } void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, @@ -2149,6 +2142,12 @@ static int __init init_hw_perf_events(void) if (!x86_pmu.guest_get_msrs) x86_pmu.guest_get_msrs = (void *)&__static_call_return0; + if (!x86_pmu.set_period) + x86_pmu.set_period = x86_perf_event_set_period; + + if (!x86_pmu.update) + x86_pmu.update = x86_perf_event_update; + x86_pmu_static_call_update(); /* @@ -2670,7 +2669,9 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value) return -EINVAL; if (value && x86_pmu.limit_period) { - if (x86_pmu.limit_period(event, value) > value) + s64 left = value; + x86_pmu.limit_period(event, &left); + if (left > value) return -EINVAL; } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index c20d8cd47c48..a646a5f9a235 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2199,6 +2199,12 @@ static void __intel_pmu_enable_all(int added, bool pmi) u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); intel_pmu_lbr_enable_all(pmi); + + if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) { + wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val); + cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val; + } + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, intel_ctrl & ~cpuc->intel_ctrl_guest_mask); @@ -2311,7 +2317,7 @@ static void intel_pmu_nhm_workaround(void) for (i = 0; i < 4; i++) { event = cpuc->events[i]; if (event) - x86_perf_event_update(event); + static_call(x86_pmu_update)(event); } for (i = 0; i < 4; i++) { @@ -2326,7 +2332,7 @@ static void intel_pmu_nhm_workaround(void) event = cpuc->events[i]; if (event) { - x86_perf_event_set_period(event); + static_call(x86_pmu_set_period)(event); __x86_pmu_enable_event(&event->hw, ARCH_PERFMON_EVENTSEL_ENABLE); } else @@ -2416,9 +2422,10 @@ static inline void intel_clear_masks(struct perf_event *event, int idx) static void intel_pmu_disable_fixed(struct perf_event *event) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - u64 ctrl_val, mask; int idx = hwc->idx; + u64 mask; if (is_topdown_idx(idx)) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -2435,9 +2442,7 @@ static void intel_pmu_disable_fixed(struct perf_event *event) intel_clear_masks(event, idx); mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4); - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - wrmsrl(hwc->config_base, ctrl_val); + cpuc->fixed_ctrl_val &= ~mask; } static void intel_pmu_disable_event(struct perf_event *event) @@ -2530,6 +2535,8 @@ static int adl_set_topdown_event_period(struct perf_event *event) return icl_set_topdown_event_period(event); } +DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period); + static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx) { u32 val; @@ -2680,6 +2687,7 @@ static u64 adl_update_topdown_event(struct perf_event *event) return icl_update_topdown_event(event); } +DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update); static void intel_pmu_read_topdown_event(struct perf_event *event) { @@ -2691,7 +2699,7 @@ static void intel_pmu_read_topdown_event(struct perf_event *event) return; perf_pmu_disable(event->pmu); - x86_pmu.update_topdown_event(event); + static_call(intel_pmu_update_topdown_event)(event); perf_pmu_enable(event->pmu); } @@ -2699,7 +2707,7 @@ static void intel_pmu_read_event(struct perf_event *event) { if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) intel_pmu_auto_reload_read(event); - else if (is_topdown_count(event) && x86_pmu.update_topdown_event) + else if (is_topdown_count(event)) intel_pmu_read_topdown_event(event); else x86_perf_event_update(event); @@ -2707,8 +2715,9 @@ static void intel_pmu_read_event(struct perf_event *event) static void intel_pmu_enable_fixed(struct perf_event *event) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - u64 ctrl_val, mask, bits = 0; + u64 mask, bits = 0; int idx = hwc->idx; if (is_topdown_idx(idx)) { @@ -2752,10 +2761,8 @@ static void intel_pmu_enable_fixed(struct perf_event *event) mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4); } - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - ctrl_val |= bits; - wrmsrl(hwc->config_base, ctrl_val); + cpuc->fixed_ctrl_val &= ~mask; + cpuc->fixed_ctrl_val |= bits; } static void intel_pmu_enable_event(struct perf_event *event) @@ -2803,7 +2810,7 @@ static void intel_pmu_add_event(struct perf_event *event) */ int intel_pmu_save_and_restart(struct perf_event *event) { - x86_perf_event_update(event); + static_call(x86_pmu_update)(event); /* * For a checkpointed counter always reset back to 0. This * avoids a situation where the counter overflows, aborts the @@ -2815,9 +2822,25 @@ int intel_pmu_save_and_restart(struct perf_event *event) wrmsrl(event->hw.event_base, 0); local64_set(&event->hw.prev_count, 0); } + return static_call(x86_pmu_set_period)(event); +} + +static int intel_pmu_set_period(struct perf_event *event) +{ + if (unlikely(is_topdown_count(event))) + return static_call(intel_pmu_set_topdown_event_period)(event); + return x86_perf_event_set_period(event); } +static u64 intel_pmu_update(struct perf_event *event) +{ + if (unlikely(is_topdown_count(event))) + return static_call(intel_pmu_update_topdown_event)(event); + + return x86_perf_event_update(event); +} + static void intel_pmu_reset(void) { struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); @@ -2980,8 +3003,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) { handled++; - if (x86_pmu.update_topdown_event) - x86_pmu.update_topdown_event(NULL); + static_call(intel_pmu_update_topdown_event)(NULL); } /* @@ -3004,8 +3026,10 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) perf_sample_data_init(&data, 0, event->hw.last_period); - if (has_branch_stack(event)) + if (has_branch_stack(event)) { data.br_stack = &cpuc->lbr_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); @@ -3853,9 +3877,6 @@ static int intel_pmu_hw_config(struct perf_event *event) } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); - - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) - event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; } if (needs_branch_stack(event)) { @@ -4334,28 +4355,25 @@ static u8 adl_get_hybrid_cpu_type(void) * Therefore the effective (average) period matches the requested period, * despite coarser hardware granularity. */ -static u64 bdw_limit_period(struct perf_event *event, u64 left) +static void bdw_limit_period(struct perf_event *event, s64 *left) { if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xc0, .umask=0x01)) { - if (left < 128) - left = 128; - left &= ~0x3fULL; + if (*left < 128) + *left = 128; + *left &= ~0x3fULL; } - return left; } -static u64 nhm_limit_period(struct perf_event *event, u64 left) +static void nhm_limit_period(struct perf_event *event, s64 *left) { - return max(left, 32ULL); + *left = max(*left, 32LL); } -static u64 spr_limit_period(struct perf_event *event, u64 left) +static void spr_limit_period(struct perf_event *event, s64 *left) { if (event->attr.precise_ip == 3) - return max(left, 128ULL); - - return left; + *left = max(*left, 128LL); } PMU_FORMAT_ATTR(event, "config:0-7" ); @@ -4794,6 +4812,8 @@ static __initconst const struct x86_pmu intel_pmu = { .add = intel_pmu_add_event, .del = intel_pmu_del_event, .read = intel_pmu_read_event, + .set_period = intel_pmu_set_period, + .update = intel_pmu_update, .hw_config = intel_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, @@ -6312,8 +6332,10 @@ __init int intel_pmu_init(void) x86_pmu.lbr_pt_coexist = true; intel_pmu_pebs_data_source_skl(pmem); x86_pmu.num_topdown_events = 4; - x86_pmu.update_topdown_event = icl_update_topdown_event; - x86_pmu.set_topdown_event_period = icl_set_topdown_event_period; + static_call_update(intel_pmu_update_topdown_event, + &icl_update_topdown_event); + static_call_update(intel_pmu_set_topdown_event_period, + &icl_set_topdown_event_period); pr_cont("Icelake events, "); name = "icelake"; break; @@ -6348,8 +6370,10 @@ __init int intel_pmu_init(void) x86_pmu.lbr_pt_coexist = true; intel_pmu_pebs_data_source_skl(pmem); x86_pmu.num_topdown_events = 8; - x86_pmu.update_topdown_event = icl_update_topdown_event; - x86_pmu.set_topdown_event_period = icl_set_topdown_event_period; + static_call_update(intel_pmu_update_topdown_event, + &icl_update_topdown_event); + static_call_update(intel_pmu_set_topdown_event_period, + &icl_set_topdown_event_period); pr_cont("Sapphire Rapids events, "); name = "sapphire_rapids"; break; @@ -6358,6 +6382,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ALDERLAKE_L: case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: + case INTEL_FAM6_RAPTORLAKE_S: /* * Alder Lake has 2 types of CPU, core and atom. * @@ -6382,8 +6407,10 @@ __init int intel_pmu_init(void) intel_pmu_pebs_data_source_adl(); x86_pmu.pebs_latency_data = adl_latency_data_small; x86_pmu.num_topdown_events = 8; - x86_pmu.update_topdown_event = adl_update_topdown_event; - x86_pmu.set_topdown_event_period = adl_set_topdown_event_period; + static_call_update(intel_pmu_update_topdown_event, + &adl_update_topdown_event); + static_call_update(intel_pmu_set_topdown_event_period, + &adl_set_topdown_event_period); x86_pmu.filter_match = intel_pmu_filter_match; x86_pmu.get_event_constraints = adl_get_event_constraints; diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 8ec23f47fee9..a2834bc93149 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -685,6 +685,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ac973c6f82ad..7839507b3844 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1540,14 +1540,18 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, /* * Use latency for weight (only avail with PEBS-LL) */ - if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) + if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) { data->weight.full = pebs->lat; + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } /* * data.data_src encodes the data source */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (sample_type & PERF_SAMPLE_DATA_SRC) { data->data_src.val = get_data_src(event, pebs->dse); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } /* * We must however always use iregs for the unwinder to stay sane; the @@ -1555,8 +1559,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, * previous PMI context or an (I)RET happened between the record and * PMI. */ - if (sample_type & PERF_SAMPLE_CALLCHAIN) + if (sample_type & PERF_SAMPLE_CALLCHAIN) { data->callchain = perf_callchain(event, iregs); + data->sample_flags |= PERF_SAMPLE_CALLCHAIN; + } /* * We use the interrupt regs as a base because the PEBS record does not @@ -1628,17 +1634,22 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, if ((sample_type & PERF_SAMPLE_ADDR_TYPE) && - x86_pmu.intel_cap.pebs_format >= 1) + x86_pmu.intel_cap.pebs_format >= 1) { data->addr = pebs->dla; + data->sample_flags |= PERF_SAMPLE_ADDR; + } if (x86_pmu.intel_cap.pebs_format >= 2) { /* Only set the TSX weight when no memory weight. */ - if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) + if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) { data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning); - - if (sample_type & PERF_SAMPLE_TRANSACTION) + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } + if (sample_type & PERF_SAMPLE_TRANSACTION) { data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, pebs->ax); + data->sample_flags |= PERF_SAMPLE_TRANSACTION; + } } /* @@ -1648,11 +1659,15 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, * We can only do this for the default trace clock. */ if (x86_pmu.intel_cap.pebs_format >= 3 && - event->attr.use_clockid == 0) + event->attr.use_clockid == 0) { data->time = native_sched_clock_from_tsc(pebs->tsc); + data->sample_flags |= PERF_SAMPLE_TIME; + } - if (has_branch_stack(event)) + if (has_branch_stack(event)) { data->br_stack = &cpuc->lbr_stack; + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } } static void adaptive_pebs_save_regs(struct pt_regs *regs, @@ -1710,8 +1725,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, perf_sample_data_init(data, 0, event->hw.last_period); data->period = event->hw.last_period; - if (event->attr.use_clockid == 0) + if (event->attr.use_clockid == 0) { data->time = native_sched_clock_from_tsc(basic->tsc); + data->sample_flags |= PERF_SAMPLE_TIME; + } /* * We must however always use iregs for the unwinder to stay sane; the @@ -1719,8 +1736,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, * previous PMI context or an (I)RET happened between the record and * PMI. */ - if (sample_type & PERF_SAMPLE_CALLCHAIN) + if (sample_type & PERF_SAMPLE_CALLCHAIN) { data->callchain = perf_callchain(event, iregs); + data->sample_flags |= PERF_SAMPLE_CALLCHAIN; + } *regs = *iregs; /* The ip in basic is EventingIP */ @@ -1771,17 +1790,24 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?: intel_get_tsx_weight(meminfo->tsx_tuning); } + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; } - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (sample_type & PERF_SAMPLE_DATA_SRC) { data->data_src.val = get_data_src(event, meminfo->aux); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } - if (sample_type & PERF_SAMPLE_ADDR_TYPE) + if (sample_type & PERF_SAMPLE_ADDR_TYPE) { data->addr = meminfo->address; + data->sample_flags |= PERF_SAMPLE_ADDR; + } - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (sample_type & PERF_SAMPLE_TRANSACTION) { data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning, gprs ? gprs->ax : 0); + data->sample_flags |= PERF_SAMPLE_TRANSACTION; + } } if (format_size & PEBS_DATACFG_XMMS) { @@ -1800,6 +1826,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, if (has_branch_stack(event)) { intel_pmu_store_pebs_lbrs(lbr); data->br_stack = &cpuc->lbr_stack; + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; } } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 47fca6a7a8bc..4fce1a4226e3 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -4,7 +4,6 @@ #include <asm/perf_event.h> #include <asm/msr.h> -#include <asm/insn.h> #include "../perf_event.h" @@ -66,65 +65,6 @@ #define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) /* - * x86control flow change classification - * x86control flow changes include branches, interrupts, traps, faults - */ -enum { - X86_BR_NONE = 0, /* unknown */ - - X86_BR_USER = 1 << 0, /* branch target is user */ - X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ - - X86_BR_CALL = 1 << 2, /* call */ - X86_BR_RET = 1 << 3, /* return */ - X86_BR_SYSCALL = 1 << 4, /* syscall */ - X86_BR_SYSRET = 1 << 5, /* syscall return */ - X86_BR_INT = 1 << 6, /* sw interrupt */ - X86_BR_IRET = 1 << 7, /* return from interrupt */ - X86_BR_JCC = 1 << 8, /* conditional */ - X86_BR_JMP = 1 << 9, /* jump */ - X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ - X86_BR_IND_CALL = 1 << 11,/* indirect calls */ - X86_BR_ABORT = 1 << 12,/* transaction abort */ - X86_BR_IN_TX = 1 << 13,/* in transaction */ - X86_BR_NO_TX = 1 << 14,/* not in transaction */ - X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ - X86_BR_CALL_STACK = 1 << 16,/* call stack */ - X86_BR_IND_JMP = 1 << 17,/* indirect jump */ - - X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */ - -}; - -#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) -#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) - -#define X86_BR_ANY \ - (X86_BR_CALL |\ - X86_BR_RET |\ - X86_BR_SYSCALL |\ - X86_BR_SYSRET |\ - X86_BR_INT |\ - X86_BR_IRET |\ - X86_BR_JCC |\ - X86_BR_JMP |\ - X86_BR_IRQ |\ - X86_BR_ABORT |\ - X86_BR_IND_CALL |\ - X86_BR_IND_JMP |\ - X86_BR_ZERO_CALL) - -#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) - -#define X86_BR_ANY_CALL \ - (X86_BR_CALL |\ - X86_BR_IND_CALL |\ - X86_BR_ZERO_CALL |\ - X86_BR_SYSCALL |\ - X86_BR_IRQ |\ - X86_BR_INT) - -/* * Intel LBR_CTL bits * * Hardware branch filter for Arch LBR @@ -1151,219 +1091,6 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) return ret; } -/* - * return the type of control flow change at address "from" - * instruction is not necessarily a branch (in case of interrupt). - * - * The branch type returned also includes the priv level of the - * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). - * - * If a branch type is unknown OR the instruction cannot be - * decoded (e.g., text page not present), then X86_BR_NONE is - * returned. - */ -static int branch_type(unsigned long from, unsigned long to, int abort) -{ - struct insn insn; - void *addr; - int bytes_read, bytes_left; - int ret = X86_BR_NONE; - int ext, to_plm, from_plm; - u8 buf[MAX_INSN_SIZE]; - int is64 = 0; - - to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; - from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; - - /* - * maybe zero if lbr did not fill up after a reset by the time - * we get a PMU interrupt - */ - if (from == 0 || to == 0) - return X86_BR_NONE; - - if (abort) - return X86_BR_ABORT | to_plm; - - if (from_plm == X86_BR_USER) { - /* - * can happen if measuring at the user level only - * and we interrupt in a kernel thread, e.g., idle. - */ - if (!current->mm) - return X86_BR_NONE; - - /* may fail if text not present */ - bytes_left = copy_from_user_nmi(buf, (void __user *)from, - MAX_INSN_SIZE); - bytes_read = MAX_INSN_SIZE - bytes_left; - if (!bytes_read) - return X86_BR_NONE; - - addr = buf; - } else { - /* - * The LBR logs any address in the IP, even if the IP just - * faulted. This means userspace can control the from address. - * Ensure we don't blindly read any address by validating it is - * a known text address. - */ - if (kernel_text_address(from)) { - addr = (void *)from; - /* - * Assume we can get the maximum possible size - * when grabbing kernel data. This is not - * _strictly_ true since we could possibly be - * executing up next to a memory hole, but - * it is very unlikely to be a problem. - */ - bytes_read = MAX_INSN_SIZE; - } else { - return X86_BR_NONE; - } - } - - /* - * decoder needs to know the ABI especially - * on 64-bit systems running 32-bit apps - */ -#ifdef CONFIG_X86_64 - is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs()); -#endif - insn_init(&insn, addr, bytes_read, is64); - if (insn_get_opcode(&insn)) - return X86_BR_ABORT; - - switch (insn.opcode.bytes[0]) { - case 0xf: - switch (insn.opcode.bytes[1]) { - case 0x05: /* syscall */ - case 0x34: /* sysenter */ - ret = X86_BR_SYSCALL; - break; - case 0x07: /* sysret */ - case 0x35: /* sysexit */ - ret = X86_BR_SYSRET; - break; - case 0x80 ... 0x8f: /* conditional */ - ret = X86_BR_JCC; - break; - default: - ret = X86_BR_NONE; - } - break; - case 0x70 ... 0x7f: /* conditional */ - ret = X86_BR_JCC; - break; - case 0xc2: /* near ret */ - case 0xc3: /* near ret */ - case 0xca: /* far ret */ - case 0xcb: /* far ret */ - ret = X86_BR_RET; - break; - case 0xcf: /* iret */ - ret = X86_BR_IRET; - break; - case 0xcc ... 0xce: /* int */ - ret = X86_BR_INT; - break; - case 0xe8: /* call near rel */ - if (insn_get_immediate(&insn) || insn.immediate1.value == 0) { - /* zero length call */ - ret = X86_BR_ZERO_CALL; - break; - } - fallthrough; - case 0x9a: /* call far absolute */ - ret = X86_BR_CALL; - break; - case 0xe0 ... 0xe3: /* loop jmp */ - ret = X86_BR_JCC; - break; - case 0xe9 ... 0xeb: /* jmp */ - ret = X86_BR_JMP; - break; - case 0xff: /* call near absolute, call far absolute ind */ - if (insn_get_modrm(&insn)) - return X86_BR_ABORT; - - ext = (insn.modrm.bytes[0] >> 3) & 0x7; - switch (ext) { - case 2: /* near ind call */ - case 3: /* far ind call */ - ret = X86_BR_IND_CALL; - break; - case 4: - case 5: - ret = X86_BR_IND_JMP; - break; - } - break; - default: - ret = X86_BR_NONE; - } - /* - * interrupts, traps, faults (and thus ring transition) may - * occur on any instructions. Thus, to classify them correctly, - * we need to first look at the from and to priv levels. If they - * are different and to is in the kernel, then it indicates - * a ring transition. If the from instruction is not a ring - * transition instr (syscall, systenter, int), then it means - * it was a irq, trap or fault. - * - * we have no way of detecting kernel to kernel faults. - */ - if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL - && ret != X86_BR_SYSCALL && ret != X86_BR_INT) - ret = X86_BR_IRQ; - - /* - * branch priv level determined by target as - * is done by HW when LBR_SELECT is implemented - */ - if (ret != X86_BR_NONE) - ret |= to_plm; - - return ret; -} - -#define X86_BR_TYPE_MAP_MAX 16 - -static int branch_map[X86_BR_TYPE_MAP_MAX] = { - PERF_BR_CALL, /* X86_BR_CALL */ - PERF_BR_RET, /* X86_BR_RET */ - PERF_BR_SYSCALL, /* X86_BR_SYSCALL */ - PERF_BR_SYSRET, /* X86_BR_SYSRET */ - PERF_BR_UNKNOWN, /* X86_BR_INT */ - PERF_BR_ERET, /* X86_BR_IRET */ - PERF_BR_COND, /* X86_BR_JCC */ - PERF_BR_UNCOND, /* X86_BR_JMP */ - PERF_BR_IRQ, /* X86_BR_IRQ */ - PERF_BR_IND_CALL, /* X86_BR_IND_CALL */ - PERF_BR_UNKNOWN, /* X86_BR_ABORT */ - PERF_BR_UNKNOWN, /* X86_BR_IN_TX */ - PERF_BR_UNKNOWN, /* X86_BR_NO_TX */ - PERF_BR_CALL, /* X86_BR_ZERO_CALL */ - PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */ - PERF_BR_IND, /* X86_BR_IND_JMP */ -}; - -static int -common_branch_type(int type) -{ - int i; - - type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */ - - if (type) { - i = __ffs(type); - if (i < X86_BR_TYPE_MAP_MAX) - return branch_map[i]; - } - - return PERF_BR_UNKNOWN; -} - enum { ARCH_LBR_BR_TYPE_JCC = 0, ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1, diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index 7951a5dc73b6..03bbcc2fa2ff 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -1006,6 +1006,29 @@ static void p4_pmu_enable_all(int added) } } +static int p4_pmu_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + s64 left = this_cpu_read(pmc_prev_left[hwc->idx]); + int ret; + + ret = x86_perf_event_set_period(event); + + if (hwc->event_base) { + /* + * This handles erratum N15 in intel doc 249199-029, + * the counter may not be updated correctly on write + * so we need a second write operation to do the trick + * (the official workaround didn't work) + * + * the former idea is taken from OProfile code + */ + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); + } + + return ret; +} + static int p4_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; @@ -1044,7 +1067,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) /* event overflow for sure */ perf_sample_data_init(&data, 0, hwc->last_period); - if (!x86_perf_event_set_period(event)) + if (!static_call(x86_pmu_set_period)(event)) continue; @@ -1316,6 +1339,9 @@ static __initconst const struct x86_pmu p4_pmu = { .enable_all = p4_pmu_enable_all, .enable = p4_pmu_enable_event, .disable = p4_pmu_disable_event, + + .set_period = p4_pmu_set_period, + .eventsel = MSR_P4_BPU_CCCR0, .perfctr = MSR_P4_BPU_PERFCTR0, .event_map = p4_pmu_event_map, @@ -1334,15 +1360,6 @@ static __initconst const struct x86_pmu p4_pmu = { .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, .hw_config = p4_hw_config, .schedule_events = p4_pmu_schedule_events, - /* - * This handles erratum N15 in intel doc 249199-029, - * the counter may not be updated correctly on write - * so we need a second write operation to do the trick - * (the official workaround didn't work) - * - * the former idea is taken from OProfile code - */ - .perfctr_second_write = 1, .format_attrs = intel_p4_formats_attr, }; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index db6c31bca809..6f1ccc57a692 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1831,6 +1831,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), {}, diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index ac542f98c070..ecced3a52668 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -106,6 +106,7 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_ALDERLAKE_N: case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: + case INTEL_FAM6_RAPTORLAKE_S: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 266143abcbd8..332d2e6d8ae4 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -64,27 +64,25 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode) return ((ecode & c->cmask) - c->code) <= (u64)c->size; } +#define PERF_ARCH(name, val) \ + PERF_X86_EVENT_##name = val, + /* * struct hw_perf_event.flags flags */ -#define PERF_X86_EVENT_PEBS_LDLAT 0x00001 /* ld+ldlat data address sampling */ -#define PERF_X86_EVENT_PEBS_ST 0x00002 /* st data address sampling */ -#define PERF_X86_EVENT_PEBS_ST_HSW 0x00004 /* haswell style datala, store */ -#define PERF_X86_EVENT_PEBS_LD_HSW 0x00008 /* haswell style datala, load */ -#define PERF_X86_EVENT_PEBS_NA_HSW 0x00010 /* haswell style datala, unknown */ -#define PERF_X86_EVENT_EXCL 0x00020 /* HT exclusivity on counter */ -#define PERF_X86_EVENT_DYNAMIC 0x00040 /* dynamic alloc'd constraint */ - -#define PERF_X86_EVENT_EXCL_ACCT 0x00100 /* accounted EXCL event */ -#define PERF_X86_EVENT_AUTO_RELOAD 0x00200 /* use PEBS auto-reload */ -#define PERF_X86_EVENT_LARGE_PEBS 0x00400 /* use large PEBS */ -#define PERF_X86_EVENT_PEBS_VIA_PT 0x00800 /* use PT buffer for PEBS */ -#define PERF_X86_EVENT_PAIR 0x01000 /* Large Increment per Cycle */ -#define PERF_X86_EVENT_LBR_SELECT 0x02000 /* Save/Restore MSR_LBR_SELECT */ -#define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics events */ -#define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address sampling */ -#define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */ -#define PERF_X86_EVENT_PEBS_LAT_HYBRID 0x20000 /* ld and st lat for hybrid */ +enum { +#include "perf_event_flags.h" +}; + +#undef PERF_ARCH + +#define PERF_ARCH(name, val) \ + static_assert((PERF_X86_EVENT_##name & PERF_EVENT_FLAG_ARCH) == \ + PERF_X86_EVENT_##name); + +#include "perf_event_flags.h" + +#undef PERF_ARCH static inline bool is_topdown_count(struct perf_event *event) { @@ -272,6 +270,10 @@ struct cpu_hw_events { u64 active_pebs_data_cfg; int pebs_record_size; + /* Intel Fixed counter configuration */ + u64 fixed_ctrl_val; + u64 active_fixed_ctrl_val; + /* * Intel LBR bits */ @@ -745,6 +747,8 @@ struct x86_pmu { void (*add)(struct perf_event *); void (*del)(struct perf_event *); void (*read)(struct perf_event *event); + int (*set_period)(struct perf_event *event); + u64 (*update)(struct perf_event *event); int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; @@ -780,8 +784,7 @@ struct x86_pmu { struct event_constraint *event_constraints; struct x86_pmu_quirk *quirks; - int perfctr_second_write; - u64 (*limit_period)(struct perf_event *event, u64 l); + void (*limit_period)(struct perf_event *event, s64 *l); /* PMI handler bits */ unsigned int late_ack :1, @@ -889,8 +892,6 @@ struct x86_pmu { * Intel perf metrics */ int num_topdown_events; - u64 (*update_topdown_event)(struct perf_event *event); - int (*set_topdown_event_period)(struct perf_event *event); /* * perf task context (i.e. struct perf_event_context::task_ctx_data) @@ -1044,6 +1045,9 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\ struct pmu *x86_get_pmu(unsigned int cpu); extern struct x86_pmu x86_pmu __read_mostly; +DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period); +DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update); + static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) { if (static_cpu_has(X86_FEATURE_ARCH_LBR)) @@ -1059,6 +1063,7 @@ static inline bool x86_pmu_has_lbr_callstack(void) } DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); +DECLARE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); int x86_perf_event_set_period(struct perf_event *event); @@ -1210,6 +1215,70 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) regs->ip = ip; } +/* + * x86control flow change classification + * x86control flow changes include branches, interrupts, traps, faults + */ +enum { + X86_BR_NONE = 0, /* unknown */ + + X86_BR_USER = 1 << 0, /* branch target is user */ + X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ + + X86_BR_CALL = 1 << 2, /* call */ + X86_BR_RET = 1 << 3, /* return */ + X86_BR_SYSCALL = 1 << 4, /* syscall */ + X86_BR_SYSRET = 1 << 5, /* syscall return */ + X86_BR_INT = 1 << 6, /* sw interrupt */ + X86_BR_IRET = 1 << 7, /* return from interrupt */ + X86_BR_JCC = 1 << 8, /* conditional */ + X86_BR_JMP = 1 << 9, /* jump */ + X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ + X86_BR_IND_CALL = 1 << 11,/* indirect calls */ + X86_BR_ABORT = 1 << 12,/* transaction abort */ + X86_BR_IN_TX = 1 << 13,/* in transaction */ + X86_BR_NO_TX = 1 << 14,/* not in transaction */ + X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ + X86_BR_CALL_STACK = 1 << 16,/* call stack */ + X86_BR_IND_JMP = 1 << 17,/* indirect jump */ + + X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */ + +}; + +#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) +#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) + +#define X86_BR_ANY \ + (X86_BR_CALL |\ + X86_BR_RET |\ + X86_BR_SYSCALL |\ + X86_BR_SYSRET |\ + X86_BR_INT |\ + X86_BR_IRET |\ + X86_BR_JCC |\ + X86_BR_JMP |\ + X86_BR_IRQ |\ + X86_BR_ABORT |\ + X86_BR_IND_CALL |\ + X86_BR_IND_JMP |\ + X86_BR_ZERO_CALL) + +#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) + +#define X86_BR_ANY_CALL \ + (X86_BR_CALL |\ + X86_BR_IND_CALL |\ + X86_BR_ZERO_CALL |\ + X86_BR_SYSCALL |\ + X86_BR_IRQ |\ + X86_BR_INT) + +int common_branch_type(int type); +int branch_type(unsigned long from, unsigned long to, int abort); +int branch_type_fused(unsigned long from, unsigned long to, int abort, + int *offset); + ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); ssize_t intel_event_sysfs_show(char *page, u64 config); @@ -1232,7 +1301,20 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu) int amd_pmu_init(void); +int amd_pmu_lbr_init(void); +void amd_pmu_lbr_reset(void); +void amd_pmu_lbr_read(void); +void amd_pmu_lbr_add(struct perf_event *event); +void amd_pmu_lbr_del(struct perf_event *event); +void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); +void amd_pmu_lbr_enable_all(void); +void amd_pmu_lbr_disable_all(void); +int amd_pmu_lbr_hw_config(struct perf_event *event); + #ifdef CONFIG_PERF_EVENTS_AMD_BRS + +#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */ + int amd_brs_init(void); void amd_brs_disable(void); void amd_brs_enable(void); @@ -1241,7 +1323,7 @@ void amd_brs_disable_all(void); void amd_brs_drain(void); void amd_brs_lopwr_init(void); void amd_brs_disable_all(void); -int amd_brs_setup_filter(struct perf_event *event); +int amd_brs_hw_config(struct perf_event *event); void amd_brs_reset(void); static inline void amd_pmu_brs_add(struct perf_event *event) @@ -1277,7 +1359,7 @@ static inline void amd_brs_enable(void) {} static inline void amd_brs_drain(void) {} static inline void amd_brs_lopwr_init(void) {} static inline void amd_brs_disable_all(void) {} -static inline int amd_brs_setup_filter(struct perf_event *event) +static inline int amd_brs_hw_config(struct perf_event *event) { return 0; } diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h new file mode 100644 index 000000000000..1dc19b9b4426 --- /dev/null +++ b/arch/x86/events/perf_event_flags.h @@ -0,0 +1,22 @@ + +/* + * struct hw_perf_event.flags flags + */ +PERF_ARCH(PEBS_LDLAT, 0x00001) /* ld+ldlat data address sampling */ +PERF_ARCH(PEBS_ST, 0x00002) /* st data address sampling */ +PERF_ARCH(PEBS_ST_HSW, 0x00004) /* haswell style datala, store */ +PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */ +PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */ +PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */ +PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */ + /* 0x00080 */ +PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */ +PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */ +PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */ +PERF_ARCH(PEBS_VIA_PT, 0x00800) /* use PT buffer for PEBS */ +PERF_ARCH(PAIR, 0x01000) /* Large Increment per Cycle */ +PERF_ARCH(LBR_SELECT, 0x02000) /* Save/Restore MSR_LBR_SELECT */ +PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */ +PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */ +PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */ +PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */ diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c new file mode 100644 index 000000000000..76b1f8bb0fd5 --- /dev/null +++ b/arch/x86/events/utils.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <asm/insn.h> + +#include "perf_event.h" + +static int decode_branch_type(struct insn *insn) +{ + int ext; + + if (insn_get_opcode(insn)) + return X86_BR_ABORT; + + switch (insn->opcode.bytes[0]) { + case 0xf: + switch (insn->opcode.bytes[1]) { + case 0x05: /* syscall */ + case 0x34: /* sysenter */ + return X86_BR_SYSCALL; + case 0x07: /* sysret */ + case 0x35: /* sysexit */ + return X86_BR_SYSRET; + case 0x80 ... 0x8f: /* conditional */ + return X86_BR_JCC; + } + return X86_BR_NONE; + case 0x70 ... 0x7f: /* conditional */ + return X86_BR_JCC; + case 0xc2: /* near ret */ + case 0xc3: /* near ret */ + case 0xca: /* far ret */ + case 0xcb: /* far ret */ + return X86_BR_RET; + case 0xcf: /* iret */ + return X86_BR_IRET; + case 0xcc ... 0xce: /* int */ + return X86_BR_INT; + case 0xe8: /* call near rel */ + if (insn_get_immediate(insn) || insn->immediate1.value == 0) { + /* zero length call */ + return X86_BR_ZERO_CALL; + } + fallthrough; + case 0x9a: /* call far absolute */ + return X86_BR_CALL; + case 0xe0 ... 0xe3: /* loop jmp */ + return X86_BR_JCC; + case 0xe9 ... 0xeb: /* jmp */ + return X86_BR_JMP; + case 0xff: /* call near absolute, call far absolute ind */ + if (insn_get_modrm(insn)) + return X86_BR_ABORT; + + ext = (insn->modrm.bytes[0] >> 3) & 0x7; + switch (ext) { + case 2: /* near ind call */ + case 3: /* far ind call */ + return X86_BR_IND_CALL; + case 4: + case 5: + return X86_BR_IND_JMP; + } + return X86_BR_NONE; + } + + return X86_BR_NONE; +} + +/* + * return the type of control flow change at address "from" + * instruction is not necessarily a branch (in case of interrupt). + * + * The branch type returned also includes the priv level of the + * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). + * + * If a branch type is unknown OR the instruction cannot be + * decoded (e.g., text page not present), then X86_BR_NONE is + * returned. + * + * While recording branches, some processors can report the "from" + * address to be that of an instruction preceding the actual branch + * when instruction fusion occurs. If fusion is expected, attempt to + * find the type of the first branch instruction within the next + * MAX_INSN_SIZE bytes and if found, provide the offset between the + * reported "from" address and the actual branch instruction address. + */ +static int get_branch_type(unsigned long from, unsigned long to, int abort, + bool fused, int *offset) +{ + struct insn insn; + void *addr; + int bytes_read, bytes_left, insn_offset; + int ret = X86_BR_NONE; + int to_plm, from_plm; + u8 buf[MAX_INSN_SIZE]; + int is64 = 0; + + /* make sure we initialize offset */ + if (offset) + *offset = 0; + + to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; + from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; + + /* + * maybe zero if lbr did not fill up after a reset by the time + * we get a PMU interrupt + */ + if (from == 0 || to == 0) + return X86_BR_NONE; + + if (abort) + return X86_BR_ABORT | to_plm; + + if (from_plm == X86_BR_USER) { + /* + * can happen if measuring at the user level only + * and we interrupt in a kernel thread, e.g., idle. + */ + if (!current->mm) + return X86_BR_NONE; + + /* may fail if text not present */ + bytes_left = copy_from_user_nmi(buf, (void __user *)from, + MAX_INSN_SIZE); + bytes_read = MAX_INSN_SIZE - bytes_left; + if (!bytes_read) + return X86_BR_NONE; + + addr = buf; + } else { + /* + * The LBR logs any address in the IP, even if the IP just + * faulted. This means userspace can control the from address. + * Ensure we don't blindly read any address by validating it is + * a known text address. + */ + if (kernel_text_address(from)) { + addr = (void *)from; + /* + * Assume we can get the maximum possible size + * when grabbing kernel data. This is not + * _strictly_ true since we could possibly be + * executing up next to a memory hole, but + * it is very unlikely to be a problem. + */ + bytes_read = MAX_INSN_SIZE; + } else { + return X86_BR_NONE; + } + } + + /* + * decoder needs to know the ABI especially + * on 64-bit systems running 32-bit apps + */ +#ifdef CONFIG_X86_64 + is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs()); +#endif + insn_init(&insn, addr, bytes_read, is64); + ret = decode_branch_type(&insn); + insn_offset = 0; + + /* Check for the possibility of branch fusion */ + while (fused && ret == X86_BR_NONE) { + /* Check for decoding errors */ + if (insn_get_length(&insn) || !insn.length) + break; + + insn_offset += insn.length; + bytes_read -= insn.length; + if (bytes_read < 0) + break; + + insn_init(&insn, addr + insn_offset, bytes_read, is64); + ret = decode_branch_type(&insn); + } + + if (offset) + *offset = insn_offset; + + /* + * interrupts, traps, faults (and thus ring transition) may + * occur on any instructions. Thus, to classify them correctly, + * we need to first look at the from and to priv levels. If they + * are different and to is in the kernel, then it indicates + * a ring transition. If the from instruction is not a ring + * transition instr (syscall, systenter, int), then it means + * it was a irq, trap or fault. + * + * we have no way of detecting kernel to kernel faults. + */ + if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL + && ret != X86_BR_SYSCALL && ret != X86_BR_INT) + ret = X86_BR_IRQ; + + /* + * branch priv level determined by target as + * is done by HW when LBR_SELECT is implemented + */ + if (ret != X86_BR_NONE) + ret |= to_plm; + + return ret; +} + +int branch_type(unsigned long from, unsigned long to, int abort) +{ + return get_branch_type(from, to, abort, false, NULL); +} + +int branch_type_fused(unsigned long from, unsigned long to, int abort, + int *offset) +{ + return get_branch_type(from, to, abort, true, offset); +} + +#define X86_BR_TYPE_MAP_MAX 16 + +static int branch_map[X86_BR_TYPE_MAP_MAX] = { + PERF_BR_CALL, /* X86_BR_CALL */ + PERF_BR_RET, /* X86_BR_RET */ + PERF_BR_SYSCALL, /* X86_BR_SYSCALL */ + PERF_BR_SYSRET, /* X86_BR_SYSRET */ + PERF_BR_UNKNOWN, /* X86_BR_INT */ + PERF_BR_ERET, /* X86_BR_IRET */ + PERF_BR_COND, /* X86_BR_JCC */ + PERF_BR_UNCOND, /* X86_BR_JMP */ + PERF_BR_IRQ, /* X86_BR_IRQ */ + PERF_BR_IND_CALL, /* X86_BR_IND_CALL */ + PERF_BR_UNKNOWN, /* X86_BR_ABORT */ + PERF_BR_UNKNOWN, /* X86_BR_IN_TX */ + PERF_BR_NO_TX, /* X86_BR_NO_TX */ + PERF_BR_CALL, /* X86_BR_ZERO_CALL */ + PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */ + PERF_BR_IND, /* X86_BR_IND_JMP */ +}; + +int common_branch_type(int type) +{ + int i; + + type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */ + + if (type) { + i = __ffs(type); + if (i < X86_BR_TYPE_MAP_MAX) + return branch_map[i]; + } + + return PERF_BR_UNKNOWN; +} diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 3de6d8b53367..29774126e931 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -426,7 +426,7 @@ void __init hyperv_init(void) * 1. Register the guest ID * 2. Enable the hypercall and register the hypercall page */ - guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); + guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */ @@ -459,13 +459,13 @@ void __init hyperv_init(void) wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); pg = vmalloc_to_page(hv_hypercall_pg); - dst = kmap(pg); + dst = kmap_local_page(pg); src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE, MEMREMAP_WB); BUG_ON(!(src && dst)); memcpy(dst, src, HV_HYP_PAGE_SIZE); memunmap(src); - kunmap(pg); + kunmap_local(dst); } else { hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd-ibs.h index f3eb098d63d4..cb2a5e113daa 100644 --- a/arch/x86/include/asm/amd-ibs.h +++ b/arch/x86/include/asm/amd-ibs.h @@ -6,6 +6,22 @@ #include <asm/msr-index.h> +/* IBS_OP_DATA2 DataSrc */ +#define IBS_DATA_SRC_LOC_CACHE 2 +#define IBS_DATA_SRC_DRAM 3 +#define IBS_DATA_SRC_REM_CACHE 4 +#define IBS_DATA_SRC_IO 7 + +/* IBS_OP_DATA2 DataSrc Extension */ +#define IBS_DATA_SRC_EXT_LOC_CACHE 1 +#define IBS_DATA_SRC_EXT_NEAR_CCX_CACHE 2 +#define IBS_DATA_SRC_EXT_DRAM 3 +#define IBS_DATA_SRC_EXT_FAR_CCX_CACHE 5 +#define IBS_DATA_SRC_EXT_PMEM 6 +#define IBS_DATA_SRC_EXT_IO 7 +#define IBS_DATA_SRC_EXT_EXT_MEM 8 +#define IBS_DATA_SRC_EXT_PEER_AGENT_MEM 12 + /* * IBS Hardware MSRs */ diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h index bca625a60186..6df6ece8a28e 100644 --- a/arch/x86/include/asm/checksum.h +++ b/arch/x86/include/asm/checksum.h @@ -1,9 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1 -#define HAVE_CSUM_COPY_USER -#define _HAVE_ARCH_CSUM_AND_COPY -#ifdef CONFIG_X86_32 -# include <asm/checksum_32.h> +#ifdef CONFIG_GENERIC_CSUM +# include <asm-generic/checksum.h> #else -# include <asm/checksum_64.h> +# define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1 +# define HAVE_CSUM_COPY_USER +# define _HAVE_ARCH_CSUM_AND_COPY +# ifdef CONFIG_X86_32 +# include <asm/checksum_32.h> +# else +# include <asm/checksum_64.h> +# endif #endif diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ef4775c6db01..b71f4f2ecdd5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -/* FREE! ( 3*32+17) */ +#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b5ef474be858..908d99b127d3 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -23,7 +23,6 @@ #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #ifndef __ASSEMBLY__ -extern atomic_t modifying_ftrace_code; extern void __fentry__(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index a1f0e90d0818..0bc931cd0698 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -44,10 +44,7 @@ struct arch_hw_breakpoint { /* Total number of available HW breakpoint registers */ #define HBP_NUM 4 -static inline int hw_breakpoint_slots(int type) -{ - return HBP_NUM; -} +#define hw_breakpoint_slots(type) (HBP_NUM) struct perf_event_attr; struct perf_event; diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 0a9407dc0859..3089ec352743 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -138,6 +138,9 @@ #define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) #define HV_X64_NESTED_MSR_BITMAP BIT(19) +/* Nested features #2. These are HYPERV_CPUID_NESTED_FEATURES.EBX bits. */ +#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL BIT(0) + /* * This is specific to AMD and specifies that enlightened TLB flush is * supported. If guest opts in to this feature, ASID invalidations only @@ -546,7 +549,7 @@ struct hv_enlightened_vmcs { u64 guest_rip; u32 hv_clean_fields; - u32 hv_padding_32; + u32 padding32_1; u32 hv_synthetic_controls; struct { u32 nested_flush_hypercall:1; @@ -554,14 +557,25 @@ struct hv_enlightened_vmcs { u32 reserved:30; } __packed hv_enlightenments_control; u32 hv_vp_id; - + u32 padding32_2; u64 hv_vm_id; u64 partition_assist_page; u64 padding64_4[4]; u64 guest_bndcfgs; - u64 padding64_5[7]; + u64 guest_ia32_perf_global_ctrl; + u64 guest_ia32_s_cet; + u64 guest_ssp; + u64 guest_ia32_int_ssp_table_addr; + u64 guest_ia32_lbr_ctl; + u64 padding64_5[2]; u64 xss_exit_bitmap; - u64 padding64_6[7]; + u64 encls_exiting_bitmap; + u64 host_ia32_perf_global_ctrl; + u64 tsc_multiplier; + u64 host_ia32_s_cet; + u64 host_ssp; + u64 host_ia32_int_ssp_table_addr; + u64 padding64_6; } __packed; #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h new file mode 100644 index 000000000000..8fa6ac0e2d76 --- /dev/null +++ b/arch/x86/include/asm/kmsan.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * x86 KMSAN support. + * + * Copyright (C) 2022, Google LLC + * Author: Alexander Potapenko <glider@google.com> + */ + +#ifndef _ASM_X86_KMSAN_H +#define _ASM_X86_KMSAN_H + +#ifndef MODULE + +#include <asm/cpu_entry_area.h> +#include <asm/processor.h> +#include <linux/mmzone.h> + +DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow); +DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin); + +/* + * Functions below are declared in the header to make sure they are inlined. + * They all are called from kmsan_get_metadata() for every memory access in + * the kernel, so speed is important here. + */ + +/* + * Compute metadata addresses for the CPU entry area on x86. + */ +static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin) +{ + unsigned long addr64 = (unsigned long)addr; + char *metadata_array; + unsigned long off; + int cpu; + + if ((addr64 < CPU_ENTRY_AREA_BASE) || + (addr64 >= (CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE))) + return NULL; + cpu = (addr64 - CPU_ENTRY_AREA_BASE) / CPU_ENTRY_AREA_SIZE; + off = addr64 - (unsigned long)get_cpu_entry_area(cpu); + if ((off < 0) || (off >= CPU_ENTRY_AREA_SIZE)) + return NULL; + metadata_array = is_origin ? cpu_entry_area_origin : + cpu_entry_area_shadow; + return &per_cpu(metadata_array[off], cpu); +} + +/* + * Taken from arch/x86/mm/physaddr.h to avoid using an instrumented version. + */ +static inline bool kmsan_phys_addr_valid(unsigned long addr) +{ + if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)) + return !(addr >> boot_cpu_data.x86_phys_bits); + else + return true; +} + +/* + * Taken from arch/x86/mm/physaddr.c to avoid using an instrumented version. + */ +static inline bool kmsan_virt_addr_valid(void *addr) +{ + unsigned long x = (unsigned long)addr; + unsigned long y = x - __START_KERNEL_map; + + /* use the carry flag to determine if x was < __START_KERNEL_map */ + if (unlikely(x > y)) { + x = y + phys_base; + + if (y >= KERNEL_IMAGE_SIZE) + return false; + } else { + x = y + (__START_KERNEL_map - PAGE_OFFSET); + + /* carry flag will be set if starting x was >= PAGE_OFFSET */ + if ((x > y) || !kmsan_phys_addr_valid(x)) + return false; + } + + return pfn_valid(x >> PAGE_SHIFT); +} + +#endif /* !MODULE */ + +#endif /* _ASM_X86_KMSAN_H */ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 71ea2eab43d5..a2e9317aad49 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -50,8 +50,6 @@ extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); -extern void arch_kprobe_override_function(struct pt_regs *regs); - /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the original instruction */ diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 51f777071584..82ba4a564e58 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -67,7 +67,7 @@ KVM_X86_OP(get_interrupt_shadow) KVM_X86_OP(patch_hypercall) KVM_X86_OP(inject_irq) KVM_X86_OP(inject_nmi) -KVM_X86_OP(queue_exception) +KVM_X86_OP(inject_exception) KVM_X86_OP(cancel_injection) KVM_X86_OP(interrupt_allowed) KVM_X86_OP(nmi_allowed) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index aa381ab69a19..7551b6f9c31c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -615,6 +615,8 @@ struct kvm_vcpu_hv { u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */ u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ + u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */ + u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */ } cpuid_cache; }; @@ -639,6 +641,16 @@ struct kvm_vcpu_xen { struct timer_list poll_timer; }; +struct kvm_queued_exception { + bool pending; + bool injected; + bool has_error_code; + u8 vector; + u32 error_code; + unsigned long payload; + bool has_payload; +}; + struct kvm_vcpu_arch { /* * rip and regs accesses must go through @@ -738,16 +750,12 @@ struct kvm_vcpu_arch { u8 event_exit_inst_len; - struct kvm_queued_exception { - bool pending; - bool injected; - bool has_error_code; - u8 nr; - u32 error_code; - unsigned long payload; - bool has_payload; - u8 nested_apf; - } exception; + bool exception_from_userspace; + + /* Exceptions to be injected to the guest. */ + struct kvm_queued_exception exception; + /* Exception VM-Exits to be synthesized to L1. */ + struct kvm_queued_exception exception_vmexit; struct kvm_queued_interrupt { bool injected; @@ -858,7 +866,6 @@ struct kvm_vcpu_arch { u32 id; bool send_user_only; u32 host_apf_flags; - unsigned long nested_apf_token; bool delivery_as_pf_vmexit; bool pageready_pending; } apf; @@ -1273,8 +1280,8 @@ struct kvm_arch { bool tdp_mmu_enabled; /* - * List of struct kvm_mmu_pages being used as roots. - * All struct kvm_mmu_pages in the list should have + * List of kvm_mmu_page structs being used as roots. + * All kvm_mmu_page structs in the list should have * tdp_mmu_page set. * * For reads, this list is protected by: @@ -1293,8 +1300,8 @@ struct kvm_arch { struct list_head tdp_mmu_roots; /* - * List of struct kvmp_mmu_pages not being used as roots. - * All struct kvm_mmu_pages in the list should have + * List of kvm_mmu_page structs not being used as roots. + * All kvm_mmu_page structs in the list should have * tdp_mmu_page set and a tdp_mmu_root_count of 0. */ struct list_head tdp_mmu_pages; @@ -1304,9 +1311,9 @@ struct kvm_arch { * is held in read mode: * - tdp_mmu_roots (above) * - tdp_mmu_pages (above) - * - the link field of struct kvm_mmu_pages used by the TDP MMU + * - the link field of kvm_mmu_page structs used by the TDP MMU * - lpage_disallowed_mmu_pages - * - the lpage_disallowed_link field of struct kvm_mmu_pages used + * - the lpage_disallowed_link field of kvm_mmu_page structs used * by the TDP MMU * It is acceptable, but not necessary, to acquire this lock when * the thread holds the MMU lock in write mode. @@ -1524,7 +1531,7 @@ struct kvm_x86_ops { unsigned char *hypercall_addr); void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected); void (*inject_nmi)(struct kvm_vcpu *vcpu); - void (*queue_exception)(struct kvm_vcpu *vcpu); + void (*inject_exception)(struct kvm_vcpu *vcpu); void (*cancel_injection)(struct kvm_vcpu *vcpu); int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection); int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); @@ -1634,10 +1641,10 @@ struct kvm_x86_ops { struct kvm_x86_nested_ops { void (*leave_nested)(struct kvm_vcpu *vcpu); + bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector, + u32 error_code); int (*check_events)(struct kvm_vcpu *vcpu); - bool (*handle_page_fault_workaround)(struct kvm_vcpu *vcpu, - struct x86_exception *fault); - bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); + bool (*has_events)(struct kvm_vcpu *vcpu); void (*triple_fault)(struct kvm_vcpu *vcpu); int (*get_state)(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, @@ -1863,7 +1870,7 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long pay void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); -bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, +void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1e086b37a307..10ac52705892 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -590,6 +590,9 @@ #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 +/* AMD Last Branch Record MSRs */ +#define MSR_AMD64_LBR_SELECT 0xc000010e + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 @@ -761,6 +764,8 @@ #define MSR_AMD_DBG_EXTN_CFG 0xc000010f #define MSR_AMD_SAMP_BR_FROM 0xc0010300 +#define DBG_EXTN_CFG_LBRV2EN BIT_ULL(6) + #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index baa70451b8df..198e03e59ca1 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -8,6 +8,8 @@ #include <asm/cpufeatures.h> #include <asm/alternative.h> +#include <linux/kmsan-checks.h> + /* duplicated to the one in bootmem.h */ extern unsigned long max_pfn; extern unsigned long phys_base; @@ -47,6 +49,11 @@ void clear_page_erms(void *page); static inline void clear_page(void *page) { + /* + * Clean up KMSAN metadata for the page being cleared. The assembly call + * below clobbers @page, so we perform unpoisoning before it. + */ + kmsan_unpoison_memory(page, PAGE_SIZE); alternative_call_2(clear_page_orig, clear_page_rep, X86_FEATURE_REP_GOOD, clear_page_erms, X86_FEATURE_ERMS, diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index f6fc8dd51ef4..9ac46dbe57d4 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -207,7 +207,8 @@ union cpuid_0x80000022_ebx { struct { /* Number of Core Performance Counters */ unsigned int num_core_pmc:4; - unsigned int reserved:6; + /* Number of available LBR Stack Entries */ + unsigned int lbr_v2_stack_sz:6; /* Number of Data Fabric Counters */ unsigned int num_df_pmc:6; } split; diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index e896ebef8c24..28421a887209 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -256,10 +256,10 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp) /* We always extract/encode the offset by shifting it all the way up, and then down again */ #define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS) -#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) -#define __swp_type(x) (((x).val) & 0x1f) -#define __swp_offset(x) ((x).val >> 5) -#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) +#define __swp_type(x) (((x).val) & ((1UL << SWP_TYPE_BITS) - 1)) +#define __swp_offset(x) ((x).val >> SWP_TYPE_BITS) +#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << SWP_TYPE_BITS}) /* * Normally, __swp_entry() converts from arch-independent swp_entry_t to diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 44e2d6f1dbaa..5059799bebe3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -815,7 +815,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) static inline int pmd_bad(pmd_t pmd) { - return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; + return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) != + (_KERNPG_TABLE & ~_PAGE_ACCESSED); } static inline unsigned long pages_to_mb(unsigned long npg) @@ -1431,10 +1432,10 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } -#define arch_faults_on_old_pte arch_faults_on_old_pte -static inline bool arch_faults_on_old_pte(void) +#define arch_has_hw_pte_young arch_has_hw_pte_young +static inline bool arch_has_hw_pte_young(void) { - return false; + return true; } #ifdef CONFIG_PAGE_TABLE_CHECK diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 70e360a2e5fb..04f36063ad54 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -139,7 +139,52 @@ extern unsigned int ptrs_per_p4d; # define VMEMMAP_START __VMEMMAP_BASE_L4 #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ -#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) +/* + * End of the region for which vmalloc page tables are pre-allocated. + * For non-KMSAN builds, this is the same as VMALLOC_END. + * For KMSAN builds, VMALLOC_START..VMEMORY_END is 4 times bigger than + * VMALLOC_START..VMALLOC_END (see below). + */ +#define VMEMORY_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) + +#ifndef CONFIG_KMSAN +#define VMALLOC_END VMEMORY_END +#else +/* + * In KMSAN builds vmalloc area is four times smaller, and the remaining 3/4 + * are used to keep the metadata for virtual pages. The memory formerly + * belonging to vmalloc area is now laid out as follows: + * + * 1st quarter: VMALLOC_START to VMALLOC_END - new vmalloc area + * 2nd quarter: KMSAN_VMALLOC_SHADOW_START to + * VMALLOC_END+KMSAN_VMALLOC_SHADOW_OFFSET - vmalloc area shadow + * 3rd quarter: KMSAN_VMALLOC_ORIGIN_START to + * VMALLOC_END+KMSAN_VMALLOC_ORIGIN_OFFSET - vmalloc area origins + * 4th quarter: KMSAN_MODULES_SHADOW_START to KMSAN_MODULES_ORIGIN_START + * - shadow for modules, + * KMSAN_MODULES_ORIGIN_START to + * KMSAN_MODULES_ORIGIN_START + MODULES_LEN - origins for modules. + */ +#define VMALLOC_QUARTER_SIZE ((VMALLOC_SIZE_TB << 40) >> 2) +#define VMALLOC_END (VMALLOC_START + VMALLOC_QUARTER_SIZE - 1) + +/* + * vmalloc metadata addresses are calculated by adding shadow/origin offsets + * to vmalloc address. + */ +#define KMSAN_VMALLOC_SHADOW_OFFSET VMALLOC_QUARTER_SIZE +#define KMSAN_VMALLOC_ORIGIN_OFFSET (VMALLOC_QUARTER_SIZE << 1) + +#define KMSAN_VMALLOC_SHADOW_START (VMALLOC_START + KMSAN_VMALLOC_SHADOW_OFFSET) +#define KMSAN_VMALLOC_ORIGIN_START (VMALLOC_START + KMSAN_VMALLOC_ORIGIN_OFFSET) + +/* + * The shadow/origin for modules are placed one by one in the last 1/4 of + * vmalloc space. + */ +#define KMSAN_MODULES_SHADOW_START (VMALLOC_END + KMSAN_VMALLOC_ORIGIN_OFFSET + 1) +#define KMSAN_MODULES_ORIGIN_START (KMSAN_MODULES_SHADOW_START + MODULES_LEN) +#endif /* CONFIG_KMSAN */ #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 356308c73951..67c9d73b31fa 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -587,9 +587,6 @@ static inline void load_sp0(unsigned long sp0) #endif /* CONFIG_PARAVIRT_XXL */ -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - unsigned long __get_wchan(struct task_struct *p); /* diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 892fd8c3a6f7..60ece592b220 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -12,7 +12,7 @@ */ #ifdef CONFIG_64BIT -PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); +__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); #define __pv_queued_spin_unlock __pv_queued_spin_unlock #define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" #define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath" @@ -20,9 +20,10 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); /* * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock * which combines the registers saving trunk and the body of the following - * C code: + * C code. Note that it puts the code in the .spinlock.text section which + * is equivalent to adding __lockfunc in the C code: * - * void __pv_queued_spin_unlock(struct qspinlock *lock) + * void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock) * { * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); * @@ -36,7 +37,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); * rsi = lockval (second argument) * rdx = internal variable (set to 0) */ -asm (".pushsection .text;" +asm (".pushsection .spinlock.text;" ".globl " PV_UNLOCK ";" ".type " PV_UNLOCK ", @function;" ".align 4,0x90;" @@ -65,8 +66,8 @@ asm (".pushsection .text;" #else /* CONFIG_64BIT */ -extern void __pv_queued_spin_unlock(struct qspinlock *lock); -PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock); +extern void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock); +__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock, ".spinlock.text"); #endif /* CONFIG_64BIT */ #endif diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 6a9ccc1b2be5..64df897c0ee3 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_SPARSEMEM_H #define _ASM_X86_SPARSEMEM_H +#include <linux/types.h> + #ifdef CONFIG_SPARSEMEM /* * generic non-linear memory support: diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 6e450827f677..3b87d889b6e1 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -11,11 +11,23 @@ function. */ #define __HAVE_ARCH_MEMCPY 1 +#if defined(__SANITIZE_MEMORY__) +#undef memcpy +void *__msan_memcpy(void *dst, const void *src, size_t size); +#define memcpy __msan_memcpy +#else extern void *memcpy(void *to, const void *from, size_t len); +#endif extern void *__memcpy(void *to, const void *from, size_t len); #define __HAVE_ARCH_MEMSET +#if defined(__SANITIZE_MEMORY__) +extern void *__msan_memset(void *s, int c, size_t n); +#undef memset +#define memset __msan_memset +#else void *memset(void *s, int c, size_t n); +#endif void *__memset(void *s, int c, size_t n); #define __HAVE_ARCH_MEMSET16 @@ -55,7 +67,13 @@ static inline void *memset64(uint64_t *s, uint64_t v, size_t n) } #define __HAVE_ARCH_MEMMOVE +#if defined(__SANITIZE_MEMORY__) +#undef memmove +void *__msan_memmove(void *dest, const void *src, size_t len); +#define memmove __msan_memmove +#else void *memmove(void *dest, const void *src, size_t count); +#endif void *__memmove(void *dest, const void *src, size_t count); int memcmp(const void *cs, const void *ct, size_t count); @@ -64,8 +82,7 @@ char *strcpy(char *dest, const char *src); char *strcat(char *dest, const char *src); int strcmp(const char *cs, const char *ct); -#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) - +#if (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)) /* * For files that not instrumented (e.g. mm/slub.c) we * should use not instrumented version of mem* functions. @@ -73,7 +90,9 @@ int strcmp(const char *cs, const char *ct); #undef memcpy #define memcpy(dst, src, len) __memcpy(dst, src, len) +#undef memmove #define memmove(dst, src, len) __memmove(dst, src, len) +#undef memset #define memset(s, c, n) __memset(s, c, n) #ifndef __NO_FORTIFY diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 1ec6a9ea2328..8bc614cfe21b 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -5,6 +5,7 @@ * User space memory access functions */ #include <linux/compiler.h> +#include <linux/instrumented.h> #include <linux/kasan-checks.h> #include <linux/string.h> #include <asm/asm.h> @@ -103,6 +104,7 @@ extern int __get_user_bad(void); : "=a" (__ret_gu), "=r" (__val_gu), \ ASM_CALL_CONSTRAINT \ : "0" (ptr), "i" (sizeof(*(ptr)))); \ + instrument_get_user(__val_gu); \ (x) = (__force __typeof__(*(ptr))) __val_gu; \ __builtin_expect(__ret_gu, 0); \ }) @@ -192,9 +194,11 @@ extern void __put_user_nocheck_8(void); int __ret_pu; \ void __user *__ptr_pu; \ register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX); \ - __chk_user_ptr(ptr); \ - __ptr_pu = (ptr); \ - __val_pu = (x); \ + __typeof__(*(ptr)) __x = (x); /* eval x once */ \ + __typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \ + __chk_user_ptr(__ptr); \ + __ptr_pu = __ptr; \ + __val_pu = __x; \ asm volatile("call __" #fn "_%P[size]" \ : "=c" (__ret_pu), \ ASM_CALL_CONSTRAINT \ @@ -202,6 +206,7 @@ extern void __put_user_nocheck_8(void); "r" (__val_pu), \ [size] "i" (sizeof(*(ptr))) \ :"ebx"); \ + instrument_put_user(__x, __ptr, sizeof(*(ptr))); \ __builtin_expect(__ret_pu, 0); \ }) @@ -248,23 +253,25 @@ extern void __put_user_nocheck_8(void); #define __put_user_size(x, ptr, size, label) \ do { \ + __typeof__(*(ptr)) __x = (x); /* eval x once */ \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ - __put_user_goto(x, ptr, "b", "iq", label); \ + __put_user_goto(__x, ptr, "b", "iq", label); \ break; \ case 2: \ - __put_user_goto(x, ptr, "w", "ir", label); \ + __put_user_goto(__x, ptr, "w", "ir", label); \ break; \ case 4: \ - __put_user_goto(x, ptr, "l", "ir", label); \ + __put_user_goto(__x, ptr, "l", "ir", label); \ break; \ case 8: \ - __put_user_goto_u64(x, ptr, label); \ + __put_user_goto_u64(__x, ptr, label); \ break; \ default: \ __put_user_bad(); \ } \ + instrument_put_user(__x, ptr, size); \ } while (0) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT @@ -305,6 +312,7 @@ do { \ default: \ (x) = __get_user_bad(); \ } \ + instrument_get_user(x); \ } while (0) #define __get_user_asm(x, addr, itype, ltype, label) \ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index c371ef695fcc..498dc600bd5c 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -309,7 +309,7 @@ enum vmcs_field { GUEST_LDTR_AR_BYTES = 0x00004820, GUEST_TR_AR_BYTES = 0x00004822, GUEST_INTERRUPTIBILITY_INFO = 0x00004824, - GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_ACTIVITY_STATE = 0x00004826, GUEST_SYSENTER_CS = 0x0000482A, VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, HOST_IA32_SYSENTER_CS = 0x00004c00, diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 1286a73ebdbc..f901658d9f7c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -3,10 +3,6 @@ # Makefile for the linux kernel. # -extra-y := head_$(BITS).o -extra-y += head$(BITS).o -extra-y += ebda.o -extra-y += platform-quirks.o extra-y += vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) @@ -33,6 +29,8 @@ KASAN_SANITIZE_sev.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. KCSAN_SANITIZE := n +KMSAN_SANITIZE_head$(BITS).o := n +KMSAN_SANITIZE_nmi.o := n # If instrumentation of this dir is enabled, boot hangs during first second. # Probably could be more selective here, but note that files related to irqs, @@ -42,7 +40,11 @@ KCOV_INSTRUMENT := n CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace -obj-y := process_$(BITS).o signal.o +obj-y += head_$(BITS).o +obj-y += head$(BITS).o +obj-y += ebda.o +obj-y += platform-quirks.o +obj-y += process_$(BITS).o signal.o obj-$(CONFIG_COMPAT) += signal_compat.o obj-y += traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o dumpstack.o nmi.o diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 9661e3e802be..f10a921ee756 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -12,6 +12,7 @@ endif # If these files are instrumented, boot hangs during the first second. KCOV_INSTRUMENT_common.o := n KCOV_INSTRUMENT_perf_event.o := n +KMSAN_SANITIZE_common.o := n # As above, instrumenting secondary CPU boot code causes boot hangs. KCSAN_SANITIZE_common.o := n diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 48276c0e479d..860b60273df3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -503,7 +503,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; /* A random value per boot for bit slice [12:upper_bit) */ - va_align.bits = get_random_int() & va_align.mask; + va_align.bits = get_random_u32() & va_align.mask; } if (cpu_has(c, X86_FEATURE_MWAITX)) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index fd44b54c90d5..fc01f81f6e2a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -45,6 +45,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index b3dba35f466e..0bf6779187dd 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -177,6 +177,12 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, } } +/* + * This function reads pointers from the stack and dereferences them. The + * pointers may not have their KMSAN shadow set up properly, which may result + * in false positive reports. Disable instrumentation to avoid those. + */ +__no_kmsan_checks static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, const char *log_lvl) { diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4c3c27b6aea3..eb8bc82846b9 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -59,8 +59,6 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#define stack_addr(regs) ((unsigned long *)regs->sp) - #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b1abf663417c..c032edcd3d95 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -53,7 +53,7 @@ static unsigned long int get_module_load_offset(void) */ if (module_load_offset == 0) module_load_offset = - (get_random_int() % 1024 + 1) * PAGE_SIZE; + (prandom_u32_max(1024) + 1) * PAGE_SIZE; mutex_unlock(&module_kaslr_mutex); } return module_load_offset; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 58a6ea472db9..c21b7347a26d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -965,7 +965,7 @@ early_param("idle", idle_setup); unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; + sp -= prandom_u32_max(8192); return sp & ~0xf; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 1962008fe743..6b3418bff326 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -553,6 +553,7 @@ void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32) * Kprobes not supported here. Set the probe on schedule instead. * Function graph tracer not supported too. */ +__no_kmsan_checks __visible __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f24227bc3220..3f3ea0287f69 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1316,7 +1316,7 @@ static void __init smp_sanity_check(void) nr++; } - nr_cpu_ids = 8; + set_nr_cpu_ids(8); } #endif @@ -1569,7 +1569,7 @@ __init void prefill_possible_map(void) possible = i; } - nr_cpu_ids = possible; + set_nr_cpu_ids(possible); pr_info("Allowing %d CPUs, %d hotplug CPUs\n", possible, max_t(int, possible - num_processors, 0)); diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 3bacd935f840..4c1bcb6053fc 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -95,7 +95,7 @@ void __init tboot_probe(void) static pgd_t *tboot_pg_dir; static struct mm_struct tboot_mm = { - .mm_rb = RB_ROOT, + .mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, tboot_mm.mmap_lock), .pgd = swapper_pg_dir, .mm_users = ATOMIC_INIT(2), .mm_count = ATOMIC_INIT(1), diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 8e1c50c86e5d..d8ba93778ae3 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -183,6 +183,16 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp) } #endif +/* + * While walking the stack, KMSAN may stomp on stale locals from other + * functions that were marked as uninitialized upon function exit, and + * now hold the call frame information for the current function (e.g. the frame + * pointer). Because KMSAN does not specifically mark call frames as + * initialized, false positive reports are possible. To prevent such reports, + * we mark the functions scanning the stack (here and below) with + * __no_kmsan_checks. + */ +__no_kmsan_checks static bool update_stack_state(struct unwind_state *state, unsigned long *next_bp) { @@ -250,6 +260,7 @@ static bool update_stack_state(struct unwind_state *state, return true; } +__no_kmsan_checks bool unwind_next_frame(struct unwind_state *state) { struct pt_regs *regs; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index e3cbd7706136..67be7f217e37 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -28,7 +28,8 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_PFNCACHE select HAVE_KVM_IRQFD - select HAVE_KVM_DIRTY_RING + select HAVE_KVM_DIRTY_RING_TSO + select HAVE_KVM_DIRTY_RING_ACQ_REL select IRQ_BYPASS_MANAGER select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_IRQ_ROUTING diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2796dde06302..7065462378e2 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -311,6 +311,15 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime); +static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent) +{ + struct kvm_cpuid_entry2 *entry; + + entry = cpuid_entry2_find(entries, nent, HYPERV_CPUID_INTERFACE, + KVM_CPUID_INDEX_NOT_SIGNIFICANT); + return entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX; +} + static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; @@ -346,7 +355,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.cr4_guest_rsvd_bits = __cr4_reserved_bits(guest_cpuid_has, vcpu); - kvm_hv_set_cpuid(vcpu); + kvm_hv_set_cpuid(vcpu, kvm_cpuid_has_hyperv(vcpu->arch.cpuid_entries, + vcpu->arch.cpuid_nent)); /* Invoke the vendor callback only after the above state is updated. */ static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu); @@ -409,6 +419,12 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, return 0; } + if (kvm_cpuid_has_hyperv(e2, nent)) { + r = kvm_hv_vcpu_init(vcpu); + if (r) + return r; + } + r = kvm_check_cpuid(vcpu, e2, nent); if (r) return r; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index aacb28c83e43..3b27622d4642 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1137,9 +1137,11 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) static void decode_register_operand(struct x86_emulate_ctxt *ctxt, struct operand *op) { - unsigned reg = ctxt->modrm_reg; + unsigned int reg; - if (!(ctxt->d & ModRM)) + if (ctxt->d & ModRM) + reg = ctxt->modrm_reg; + else reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3); if (ctxt->d & Sse) { @@ -1953,7 +1955,7 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - if (ctxt->modrm_reg == VCPU_SREG_SS) + if (seg == VCPU_SREG_SS) ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; if (ctxt->op_bytes > 2) rsp_increment(ctxt, ctxt->op_bytes - 2); @@ -3645,13 +3647,10 @@ static int em_wrmsr(struct x86_emulate_ctxt *ctxt) | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32); r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data); - if (r == X86EMUL_IO_NEEDED) - return r; - - if (r > 0) + if (r == X86EMUL_PROPAGATE_FAULT) return emulate_gp(ctxt, 0); - return r < 0 ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; + return r; } static int em_rdmsr(struct x86_emulate_ctxt *ctxt) @@ -3662,15 +3661,14 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt) r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data); - if (r == X86EMUL_IO_NEEDED) - return r; - - if (r) + if (r == X86EMUL_PROPAGATE_FAULT) return emulate_gp(ctxt, 0); - *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data; - *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32; - return X86EMUL_CONTINUE; + if (r == X86EMUL_CONTINUE) { + *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data; + *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32; + } + return r; } static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment) @@ -4171,8 +4169,7 @@ static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) ctxt->ops->get_dr(ctxt, 7, &dr7); - /* Check if DR7.Global_Enable is set */ - return dr7 & (1 << 13); + return dr7 & DR7_GD; } static int check_dr_read(struct x86_emulate_ctxt *ctxt) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index ed804447589c..0adf4a437e85 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -38,9 +38,6 @@ #include "irq.h" #include "fpu.h" -/* "Hv#1" signature */ -#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648 - #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, @@ -934,11 +931,14 @@ static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) stimer_prepare_msg(stimer); } -static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) +int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) { - struct kvm_vcpu_hv *hv_vcpu; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); int i; + if (hv_vcpu) + return 0; + hv_vcpu = kzalloc(sizeof(struct kvm_vcpu_hv), GFP_KERNEL_ACCOUNT); if (!hv_vcpu) return -ENOMEM; @@ -962,11 +962,9 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) struct kvm_vcpu_hv_synic *synic; int r; - if (!to_hv_vcpu(vcpu)) { - r = kvm_hv_vcpu_init(vcpu); - if (r) - return r; - } + r = kvm_hv_vcpu_init(vcpu); + if (r) + return r; synic = to_hv_synic(vcpu); @@ -1660,10 +1658,8 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) if (!host && !vcpu->arch.hyperv_enabled) return 1; - if (!to_hv_vcpu(vcpu)) { - if (kvm_hv_vcpu_init(vcpu)) - return 1; - } + if (kvm_hv_vcpu_init(vcpu)) + return 1; if (kvm_hv_msr_partition_wide(msr)) { int r; @@ -1683,10 +1679,8 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) if (!host && !vcpu->arch.hyperv_enabled) return 1; - if (!to_hv_vcpu(vcpu)) { - if (kvm_hv_vcpu_init(vcpu)) - return 1; - } + if (kvm_hv_vcpu_init(vcpu)) + return 1; if (kvm_hv_msr_partition_wide(msr)) { int r; @@ -1987,49 +1981,49 @@ ret_success: return HV_STATUS_SUCCESS; } -void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu) +void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct kvm_cpuid_entry2 *entry; - struct kvm_vcpu_hv *hv_vcpu; - entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE); - if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) { - vcpu->arch.hyperv_enabled = true; - } else { - vcpu->arch.hyperv_enabled = false; + vcpu->arch.hyperv_enabled = hyperv_enabled; + + if (!hv_vcpu) { + /* + * KVM should have already allocated kvm_vcpu_hv if Hyper-V is + * enabled in CPUID. + */ + WARN_ON_ONCE(vcpu->arch.hyperv_enabled); return; } - if (!to_hv_vcpu(vcpu) && kvm_hv_vcpu_init(vcpu)) - return; + memset(&hv_vcpu->cpuid_cache, 0, sizeof(hv_vcpu->cpuid_cache)); - hv_vcpu = to_hv_vcpu(vcpu); + if (!vcpu->arch.hyperv_enabled) + return; entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES); if (entry) { hv_vcpu->cpuid_cache.features_eax = entry->eax; hv_vcpu->cpuid_cache.features_ebx = entry->ebx; hv_vcpu->cpuid_cache.features_edx = entry->edx; - } else { - hv_vcpu->cpuid_cache.features_eax = 0; - hv_vcpu->cpuid_cache.features_ebx = 0; - hv_vcpu->cpuid_cache.features_edx = 0; } entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO); if (entry) { hv_vcpu->cpuid_cache.enlightenments_eax = entry->eax; hv_vcpu->cpuid_cache.enlightenments_ebx = entry->ebx; - } else { - hv_vcpu->cpuid_cache.enlightenments_eax = 0; - hv_vcpu->cpuid_cache.enlightenments_ebx = 0; } entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES); if (entry) hv_vcpu->cpuid_cache.syndbg_cap_eax = entry->eax; - else - hv_vcpu->cpuid_cache.syndbg_cap_eax = 0; + + entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_NESTED_FEATURES); + if (entry) { + hv_vcpu->cpuid_cache.nested_eax = entry->eax; + hv_vcpu->cpuid_cache.nested_ebx = entry->ebx; + } } int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce) @@ -2552,7 +2546,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, case HYPERV_CPUID_NESTED_FEATURES: ent->eax = evmcs_ver; ent->eax |= HV_X64_NESTED_MSR_BITMAP; - + ent->ebx |= HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL; break; case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS: diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index da2737f2a956..1030b1b50552 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -23,6 +23,9 @@ #include <linux/kvm_host.h> +/* "Hv#1" signature */ +#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648 + /* * The #defines related to the synthetic debugger are required by KDNet, but * they are not documented in the Hyper-V TLFS because the synthetic debugger @@ -141,7 +144,8 @@ void kvm_hv_request_tsc_page_update(struct kvm *kvm); void kvm_hv_init_vm(struct kvm *kvm); void kvm_hv_destroy_vm(struct kvm *kvm); -void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu); +int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled); int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce); int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9dda989a1cf0..d7639d126e6c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -3025,17 +3025,8 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u8 sipi_vector; int r; - unsigned long pe; - if (!lapic_in_kernel(vcpu)) - return 0; - - /* - * Read pending events before calling the check_events - * callback. - */ - pe = smp_load_acquire(&apic->pending_events); - if (!pe) + if (!kvm_apic_has_pending_init_or_sipi(vcpu)) return 0; if (is_guest_mode(vcpu)) { @@ -3043,38 +3034,31 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu) if (r < 0) return r == -EBUSY ? 0 : r; /* - * If an event has happened and caused a vmexit, - * we know INITs are latched and therefore - * we will not incorrectly deliver an APIC - * event instead of a vmexit. + * Continue processing INIT/SIPI even if a nested VM-Exit + * occurred, e.g. pending SIPIs should be dropped if INIT+SIPI + * are blocked as a result of transitioning to VMX root mode. */ } /* - * INITs are latched while CPU is in specific states - * (SMM, VMX root mode, SVM with GIF=0). - * Because a CPU cannot be in these states immediately - * after it has processed an INIT signal (and thus in - * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs - * and leave the INIT pending. + * INITs are blocked while CPU is in specific states (SMM, VMX root + * mode, SVM with GIF=0), while SIPIs are dropped if the CPU isn't in + * wait-for-SIPI (WFS). */ - if (kvm_vcpu_latch_init(vcpu)) { + if (!kvm_apic_init_sipi_allowed(vcpu)) { WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); - if (test_bit(KVM_APIC_SIPI, &pe)) - clear_bit(KVM_APIC_SIPI, &apic->pending_events); + clear_bit(KVM_APIC_SIPI, &apic->pending_events); return 0; } - if (test_bit(KVM_APIC_INIT, &pe)) { - clear_bit(KVM_APIC_INIT, &apic->pending_events); + if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { kvm_vcpu_reset(vcpu, true); if (kvm_vcpu_is_bsp(apic->vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; else vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; } - if (test_bit(KVM_APIC_SIPI, &pe)) { - clear_bit(KVM_APIC_SIPI, &apic->pending_events); + if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) { if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { /* evaluate pending_events before reading the vector */ smp_rmb(); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 117a46df5cc1..a5ac4a5a5179 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -7,6 +7,7 @@ #include <linux/kvm_host.h> #include "hyperv.h" +#include "kvm_cache_regs.h" #define KVM_APIC_INIT 0 #define KVM_APIC_SIPI 1 @@ -223,11 +224,17 @@ static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu) return lapic_in_kernel(vcpu) && vcpu->arch.apic->apicv_active; } -static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) +static inline bool kvm_apic_has_pending_init_or_sipi(struct kvm_vcpu *vcpu) { return lapic_in_kernel(vcpu) && vcpu->arch.apic->pending_events; } +static inline bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu) +{ + return !is_smm(vcpu) && + !static_call(kvm_x86_apic_init_signal_blocked)(vcpu); +} + static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) { return (irq->delivery_mode == APIC_DM_LOWEST || diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 3552e6af3684..6f81539061d6 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1667,6 +1667,18 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr) percpu_counter_add(&kvm_total_used_mmu_pages, nr); } +static void kvm_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + kvm_mod_used_mmu_pages(kvm, +1); + kvm_account_pgtable_pages((void *)sp->spt, +1); +} + +static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + kvm_mod_used_mmu_pages(kvm, -1); + kvm_account_pgtable_pages((void *)sp->spt, -1); +} + static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp) { MMU_WARN_ON(!is_empty_shadow_page(sp->spt)); @@ -2124,7 +2136,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm, */ sp->mmu_valid_gen = kvm->arch.mmu_valid_gen; list_add(&sp->link, &kvm->arch.active_mmu_pages); - kvm_mod_used_mmu_pages(kvm, +1); + kvm_account_mmu_page(kvm, sp); sp->gfn = gfn; sp->role = role; @@ -2458,7 +2470,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, list_add(&sp->link, invalid_list); else list_move(&sp->link, invalid_list); - kvm_mod_used_mmu_pages(kvm, -1); + kvm_unaccount_mmu_page(kvm, sp); } else { /* * Remove the active root from the active page list, the root @@ -4292,7 +4304,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, vcpu->arch.l1tf_flush_l1d = true; if (!flags) { - trace_kvm_page_fault(fault_address, error_code); + trace_kvm_page_fault(vcpu, fault_address, error_code); if (kvm_event_needs_reinjection(vcpu)) kvm_mmu_unprotect_page_virt(vcpu, fault_address); @@ -6704,10 +6716,12 @@ int kvm_mmu_vendor_module_init(void) ret = register_shrinker(&mmu_shrinker, "x86-mmu"); if (ret) - goto out; + goto out_shrinker; return 0; +out_shrinker: + percpu_counter_destroy(&kvm_total_used_mmu_pages); out: mmu_destroy_caches(); return ret; diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 39e0205e7300..5ab5f94dcb6f 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -472,7 +472,7 @@ error: #if PTTYPE == PTTYPE_EPT /* - * Use PFERR_RSVD_MASK in error_code to to tell if EPT + * Use PFERR_RSVD_MASK in error_code to tell if EPT * misconfiguration requires to be injected. The detection is * done by is_rsvd_bits_set() above. * diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index bf2ccf9debca..672f0432d777 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -372,6 +372,16 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, } } +static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + kvm_account_pgtable_pages((void *)sp->spt, +1); +} + +static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + kvm_account_pgtable_pages((void *)sp->spt, -1); +} + /** * tdp_mmu_unlink_sp() - Remove a shadow page from the list of used pages * @@ -384,6 +394,7 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, bool shared) { + tdp_unaccount_mmu_page(kvm, sp); if (shared) spin_lock(&kvm->arch.tdp_mmu_pages_lock); else @@ -1132,6 +1143,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, if (account_nx) account_huge_nx_page(kvm, sp); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); + tdp_account_mmu_page(kvm, sp); return 0; } diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 02f9e4f245bd..d9b9a0f0db17 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -106,9 +106,19 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) return; if (pmc->perf_event && pmc->perf_event->attr.precise_ip) { - /* Indicate PEBS overflow PMI to guest. */ - skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, - (unsigned long *)&pmu->global_status); + if (!in_pmi) { + /* + * TODO: KVM is currently _choosing_ to not generate records + * for emulated instructions, avoiding BUFFER_OVF PMI when + * there are no records. Strictly speaking, it should be done + * as well in the right context to improve sampling accuracy. + */ + skip_pmi = true; + } else { + /* Indicate PEBS overflow PMI to guest. */ + skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, + (unsigned long *)&pmu->global_status); + } } else { __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); } @@ -227,8 +237,8 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc) get_sample_period(pmc, pmc->counter))) return false; - if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) && - pmc->perf_event->attr.precise_ip) + if (test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) != + (!!pmc->perf_event->attr.precise_ip)) return false; /* reuse perf_event to serve as pmc_reprogram_counter() does*/ diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 76dcc8a3e849..4c620999d230 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -55,28 +55,6 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, nested_svm_vmexit(svm); } -static bool nested_svm_handle_page_fault_workaround(struct kvm_vcpu *vcpu, - struct x86_exception *fault) -{ - struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb; - - WARN_ON(!is_guest_mode(vcpu)); - - if (vmcb12_is_intercept(&svm->nested.ctl, - INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) && - !WARN_ON_ONCE(svm->nested.nested_run_pending)) { - vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR; - vmcb->control.exit_code_hi = 0; - vmcb->control.exit_info_1 = fault->error_code; - vmcb->control.exit_info_2 = fault->address; - nested_svm_vmexit(svm); - return true; - } - - return false; -} - static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) { struct vcpu_svm *svm = to_svm(vcpu); @@ -468,7 +446,7 @@ static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm, unsigned int nr; if (vcpu->arch.exception.injected) { - nr = vcpu->arch.exception.nr; + nr = vcpu->arch.exception.vector; exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT; if (vcpu->arch.exception.has_error_code) { @@ -781,11 +759,15 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, struct vcpu_svm *svm = to_svm(vcpu); int ret; - trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa, - vmcb12->save.rip, - vmcb12->control.int_ctl, - vmcb12->control.event_inj, - vmcb12->control.nested_ctl); + trace_kvm_nested_vmenter(svm->vmcb->save.rip, + vmcb12_gpa, + vmcb12->save.rip, + vmcb12->control.int_ctl, + vmcb12->control.event_inj, + vmcb12->control.nested_ctl, + vmcb12->control.nested_cr3, + vmcb12->save.cr3, + KVM_ISA_SVM); trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff, vmcb12->control.intercepts[INTERCEPT_CR] >> 16, @@ -1304,44 +1286,46 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu) return 0; } -static bool nested_exit_on_exception(struct vcpu_svm *svm) +static bool nested_svm_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector, + u32 error_code) { - unsigned int nr = svm->vcpu.arch.exception.nr; + struct vcpu_svm *svm = to_svm(vcpu); - return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(nr)); + return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector)); } -static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm) +static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu) { - unsigned int nr = svm->vcpu.arch.exception.nr; + struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit; + struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; + vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->vector; vmcb->control.exit_code_hi = 0; - if (svm->vcpu.arch.exception.has_error_code) - vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code; + if (ex->has_error_code) + vmcb->control.exit_info_1 = ex->error_code; /* * EXITINFO2 is undefined for all exception intercepts other * than #PF. */ - if (nr == PF_VECTOR) { - if (svm->vcpu.arch.exception.nested_apf) - vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; - else if (svm->vcpu.arch.exception.has_payload) - vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; + if (ex->vector == PF_VECTOR) { + if (ex->has_payload) + vmcb->control.exit_info_2 = ex->payload; else - vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; - } else if (nr == DB_VECTOR) { - /* See inject_pending_event. */ - kvm_deliver_exception_payload(&svm->vcpu); - if (svm->vcpu.arch.dr7 & DR7_GD) { - svm->vcpu.arch.dr7 &= ~DR7_GD; - kvm_update_dr7(&svm->vcpu); + vmcb->control.exit_info_2 = vcpu->arch.cr2; + } else if (ex->vector == DB_VECTOR) { + /* See kvm_check_and_inject_events(). */ + kvm_deliver_exception_payload(vcpu, ex); + + if (vcpu->arch.dr7 & DR7_GD) { + vcpu->arch.dr7 &= ~DR7_GD; + kvm_update_dr7(vcpu); } - } else - WARN_ON(svm->vcpu.arch.exception.has_payload); + } else { + WARN_ON(ex->has_payload); + } nested_svm_vmexit(svm); } @@ -1353,10 +1337,22 @@ static inline bool nested_exit_on_init(struct vcpu_svm *svm) static int svm_check_nested_events(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm = to_svm(vcpu); - bool block_nested_events = - kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending; struct kvm_lapic *apic = vcpu->arch.apic; + struct vcpu_svm *svm = to_svm(vcpu); + /* + * Only a pending nested run blocks a pending exception. If there is a + * previously injected event, the pending exception occurred while said + * event was being delivered and thus needs to be handled. + */ + bool block_nested_exceptions = svm->nested.nested_run_pending; + /* + * New events (not exceptions) are only recognized at instruction + * boundaries. If an event needs reinjection, then KVM is handling a + * VM-Exit that occurred _during_ instruction execution; new events are + * blocked until the instruction completes. + */ + bool block_nested_events = block_nested_exceptions || + kvm_event_needs_reinjection(vcpu); if (lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &apic->pending_events)) { @@ -1368,18 +1364,16 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) return 0; } - if (vcpu->arch.exception.pending) { - /* - * Only a pending nested run can block a pending exception. - * Otherwise an injected NMI/interrupt should either be - * lost or delivered to the nested hypervisor in the EXITINTINFO - * vmcb field, while delivering the pending exception. - */ - if (svm->nested.nested_run_pending) + if (vcpu->arch.exception_vmexit.pending) { + if (block_nested_exceptions) return -EBUSY; - if (!nested_exit_on_exception(svm)) - return 0; - nested_svm_inject_exception_vmexit(svm); + nested_svm_inject_exception_vmexit(vcpu); + return 0; + } + + if (vcpu->arch.exception.pending) { + if (block_nested_exceptions) + return -EBUSY; return 0; } @@ -1720,8 +1714,8 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) struct kvm_x86_nested_ops svm_nested_ops = { .leave_nested = svm_leave_nested, + .is_exception_vmexit = nested_svm_is_exception_vmexit, .check_events = svm_check_nested_events, - .handle_page_fault_workaround = nested_svm_handle_page_fault_workaround, .triple_fault = nested_svm_triple_fault, .get_nested_state_pages = svm_get_nested_state_pages, .get_state = svm_get_nested_state, diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index f24613a108c5..b68956299fa8 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -23,107 +23,52 @@ enum pmu_type { PMU_TYPE_EVNTSEL, }; -enum index { - INDEX_ZERO = 0, - INDEX_ONE, - INDEX_TWO, - INDEX_THREE, - INDEX_FOUR, - INDEX_FIVE, - INDEX_ERROR, -}; - -static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type) +static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) { - struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); + unsigned int num_counters = pmu->nr_arch_gp_counters; - if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { - if (type == PMU_TYPE_COUNTER) - return MSR_F15H_PERF_CTR; - else - return MSR_F15H_PERF_CTL; - } else { - if (type == PMU_TYPE_COUNTER) - return MSR_K7_PERFCTR0; - else - return MSR_K7_EVNTSEL0; - } -} + if (pmc_idx >= num_counters) + return NULL; -static enum index msr_to_index(u32 msr) -{ - switch (msr) { - case MSR_F15H_PERF_CTL0: - case MSR_F15H_PERF_CTR0: - case MSR_K7_EVNTSEL0: - case MSR_K7_PERFCTR0: - return INDEX_ZERO; - case MSR_F15H_PERF_CTL1: - case MSR_F15H_PERF_CTR1: - case MSR_K7_EVNTSEL1: - case MSR_K7_PERFCTR1: - return INDEX_ONE; - case MSR_F15H_PERF_CTL2: - case MSR_F15H_PERF_CTR2: - case MSR_K7_EVNTSEL2: - case MSR_K7_PERFCTR2: - return INDEX_TWO; - case MSR_F15H_PERF_CTL3: - case MSR_F15H_PERF_CTR3: - case MSR_K7_EVNTSEL3: - case MSR_K7_PERFCTR3: - return INDEX_THREE; - case MSR_F15H_PERF_CTL4: - case MSR_F15H_PERF_CTR4: - return INDEX_FOUR; - case MSR_F15H_PERF_CTL5: - case MSR_F15H_PERF_CTR5: - return INDEX_FIVE; - default: - return INDEX_ERROR; - } + return &pmu->gp_counters[array_index_nospec(pmc_idx, num_counters)]; } static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, enum pmu_type type) { struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); + unsigned int idx; if (!vcpu->kvm->arch.enable_pmu) return NULL; switch (msr) { - case MSR_F15H_PERF_CTL0: - case MSR_F15H_PERF_CTL1: - case MSR_F15H_PERF_CTL2: - case MSR_F15H_PERF_CTL3: - case MSR_F15H_PERF_CTL4: - case MSR_F15H_PERF_CTL5: + case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) return NULL; - fallthrough; + /* + * Each PMU counter has a pair of CTL and CTR MSRs. CTLn + * MSRs (accessed via EVNTSEL) are even, CTRn MSRs are odd. + */ + idx = (unsigned int)((msr - MSR_F15H_PERF_CTL0) / 2); + if (!(msr & 0x1) != (type == PMU_TYPE_EVNTSEL)) + return NULL; + break; case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: if (type != PMU_TYPE_EVNTSEL) return NULL; + idx = msr - MSR_K7_EVNTSEL0; break; - case MSR_F15H_PERF_CTR0: - case MSR_F15H_PERF_CTR1: - case MSR_F15H_PERF_CTR2: - case MSR_F15H_PERF_CTR3: - case MSR_F15H_PERF_CTR4: - case MSR_F15H_PERF_CTR5: - if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) - return NULL; - fallthrough; case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: if (type != PMU_TYPE_COUNTER) return NULL; + idx = msr - MSR_K7_PERFCTR0; break; default: return NULL; } - return &pmu->gp_counters[msr_to_index(msr)]; + return amd_pmc_idx_to_pmc(pmu, idx); } static bool amd_hw_event_available(struct kvm_pmc *pmc) @@ -139,22 +84,6 @@ static bool amd_pmc_is_enabled(struct kvm_pmc *pmc) return true; } -static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) -{ - unsigned int base = get_msr_base(pmu, PMU_TYPE_COUNTER); - struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); - - if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { - /* - * The idx is contiguous. The MSRs are not. The counter MSRs - * are interleaved with the event select MSRs. - */ - pmc_idx *= 2; - } - - return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER); -} - static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -168,15 +97,7 @@ static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, unsigned int idx, u64 *mask) { - struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - struct kvm_pmc *counters; - - idx &= ~(3u << 30); - if (idx >= pmu->nr_arch_gp_counters) - return NULL; - counters = pmu->gp_counters; - - return &counters[idx]; + return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30)); } static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f3813dbacb9f..58f0077d9357 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -461,24 +461,22 @@ static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu) return 0; } -static void svm_queue_exception(struct kvm_vcpu *vcpu) +static void svm_inject_exception(struct kvm_vcpu *vcpu) { + struct kvm_queued_exception *ex = &vcpu->arch.exception; struct vcpu_svm *svm = to_svm(vcpu); - unsigned nr = vcpu->arch.exception.nr; - bool has_error_code = vcpu->arch.exception.has_error_code; - u32 error_code = vcpu->arch.exception.error_code; - kvm_deliver_exception_payload(vcpu); + kvm_deliver_exception_payload(vcpu, ex); - if (kvm_exception_is_soft(nr) && + if (kvm_exception_is_soft(ex->vector) && svm_update_soft_interrupt_rip(vcpu)) return; - svm->vmcb->control.event_inj = nr + svm->vmcb->control.event_inj = ex->vector | SVM_EVTINJ_VALID - | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) + | (ex->has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | SVM_EVTINJ_TYPE_EXEPT; - svm->vmcb->control.event_inj_err = error_code; + svm->vmcb->control.event_inj_err = ex->error_code; } static void svm_init_erratum_383(void) @@ -1975,7 +1973,7 @@ static int npf_interception(struct kvm_vcpu *vcpu) u64 fault_address = svm->vmcb->control.exit_info_2; u64 error_code = svm->vmcb->control.exit_info_1; - trace_kvm_page_fault(fault_address, error_code); + trace_kvm_page_fault(vcpu, fault_address, error_code); return kvm_mmu_page_fault(vcpu, fault_address, error_code, static_cpu_has(X86_FEATURE_DECODEASSISTS) ? svm->vmcb->control.insn_bytes : NULL, @@ -2341,7 +2339,8 @@ void svm_set_gif(struct vcpu_svm *svm, bool value) enable_gif(svm); if (svm->vcpu.arch.smi_pending || svm->vcpu.arch.nmi_pending || - kvm_cpu_has_injectable_intr(&svm->vcpu)) + kvm_cpu_has_injectable_intr(&svm->vcpu) || + kvm_apic_has_pending_init_or_sipi(&svm->vcpu)) kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); } else { disable_gif(svm); @@ -3522,7 +3521,7 @@ void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, /* Note, this is called iff the local APIC is in-kernel. */ if (!READ_ONCE(vcpu->arch.apic->apicv_active)) { - /* Process the interrupt via inject_pending_event */ + /* Process the interrupt via kvm_check_and_inject_events(). */ kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); return; @@ -4697,15 +4696,7 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - /* - * TODO: Last condition latch INIT signals on vCPU when - * vCPU is in guest-mode and vmcb12 defines intercept on INIT. - * To properly emulate the INIT intercept, - * svm_check_nested_events() should call nested_svm_vmexit() - * if an INIT signal is pending. - */ - return !gif_set(svm) || - (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT)); + return !gif_set(svm); } static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) @@ -4798,7 +4789,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .patch_hypercall = svm_patch_hypercall, .inject_irq = svm_inject_irq, .inject_nmi = svm_inject_nmi, - .queue_exception = svm_queue_exception, + .inject_exception = svm_inject_exception, .cancel_injection = svm_cancel_injection, .interrupt_allowed = svm_interrupt_allowed, .nmi_allowed = svm_nmi_allowed, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 2120d7c060a9..bc25589ad588 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -394,20 +394,25 @@ TRACE_EVENT(kvm_inj_exception, * Tracepoint for page fault. */ TRACE_EVENT(kvm_page_fault, - TP_PROTO(unsigned long fault_address, unsigned int error_code), - TP_ARGS(fault_address, error_code), + TP_PROTO(struct kvm_vcpu *vcpu, u64 fault_address, u64 error_code), + TP_ARGS(vcpu, fault_address, error_code), TP_STRUCT__entry( - __field( unsigned long, fault_address ) - __field( unsigned int, error_code ) + __field( unsigned int, vcpu_id ) + __field( unsigned long, guest_rip ) + __field( u64, fault_address ) + __field( u64, error_code ) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->guest_rip = kvm_rip_read(vcpu); __entry->fault_address = fault_address; __entry->error_code = error_code; ), - TP_printk("address %lx error_code %x", + TP_printk("vcpu %u rip 0x%lx address 0x%016llx error_code 0x%llx", + __entry->vcpu_id, __entry->guest_rip, __entry->fault_address, __entry->error_code) ); @@ -589,10 +594,12 @@ TRACE_EVENT(kvm_pv_eoi, /* * Tracepoint for nested VMRUN */ -TRACE_EVENT(kvm_nested_vmrun, +TRACE_EVENT(kvm_nested_vmenter, TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl, - __u32 event_inj, bool npt), - TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt), + __u32 event_inj, bool tdp_enabled, __u64 guest_tdp_pgd, + __u64 guest_cr3, __u32 isa), + TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, tdp_enabled, + guest_tdp_pgd, guest_cr3, isa), TP_STRUCT__entry( __field( __u64, rip ) @@ -600,7 +607,9 @@ TRACE_EVENT(kvm_nested_vmrun, __field( __u64, nested_rip ) __field( __u32, int_ctl ) __field( __u32, event_inj ) - __field( bool, npt ) + __field( bool, tdp_enabled ) + __field( __u64, guest_pgd ) + __field( __u32, isa ) ), TP_fast_assign( @@ -609,14 +618,24 @@ TRACE_EVENT(kvm_nested_vmrun, __entry->nested_rip = nested_rip; __entry->int_ctl = int_ctl; __entry->event_inj = event_inj; - __entry->npt = npt; - ), - - TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " - "event_inj: 0x%08x npt: %s", - __entry->rip, __entry->vmcb, __entry->nested_rip, - __entry->int_ctl, __entry->event_inj, - __entry->npt ? "on" : "off") + __entry->tdp_enabled = tdp_enabled; + __entry->guest_pgd = tdp_enabled ? guest_tdp_pgd : guest_cr3; + __entry->isa = isa; + ), + + TP_printk("rip: 0x%016llx %s: 0x%016llx nested_rip: 0x%016llx " + "int_ctl: 0x%08x event_inj: 0x%08x nested_%s=%s %s: 0x%016llx", + __entry->rip, + __entry->isa == KVM_ISA_VMX ? "vmcs" : "vmcb", + __entry->vmcb, + __entry->nested_rip, + __entry->int_ctl, + __entry->event_inj, + __entry->isa == KVM_ISA_VMX ? "ept" : "npt", + __entry->tdp_enabled ? "y" : "n", + !__entry->tdp_enabled ? "guest_cr3" : + __entry->isa == KVM_ISA_VMX ? "nested_eptp" : "nested_cr3", + __entry->guest_pgd) ); TRACE_EVENT(kvm_nested_intercepts, diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index c5e5dfef69c7..87c4e46daf37 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -65,6 +65,7 @@ struct vmcs_config { u64 cpu_based_3rd_exec_ctrl; u32 vmexit_ctrl; u32 vmentry_ctrl; + u64 misc; struct nested_vmx_msrs nested; }; extern struct vmcs_config vmcs_config; @@ -82,7 +83,8 @@ static inline bool cpu_has_vmx_basic_inout(void) static inline bool cpu_has_virtual_nmis(void) { - return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS && + vmcs_config.cpu_based_exec_ctrl & CPU_BASED_NMI_WINDOW_EXITING; } static inline bool cpu_has_vmx_preemption_timer(void) @@ -224,11 +226,8 @@ static inline bool cpu_has_vmx_vmfunc(void) static inline bool cpu_has_vmx_shadow_vmcs(void) { - u64 vmx_msr; - /* check if the cpu supports writing r/o exit information fields */ - rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); - if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) + if (!(vmcs_config.misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) return false; return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -370,10 +369,7 @@ static inline bool cpu_has_vmx_invvpid_global(void) static inline bool cpu_has_vmx_intel_pt(void) { - u64 vmx_msr; - - rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); - return (vmx_msr & MSR_IA32_VMX_MISC_INTEL_PT) && + return (vmcs_config.misc & MSR_IA32_VMX_MISC_INTEL_PT) && (vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) && (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL); } diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index 6a61b1ae7942..d8b23c96d627 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -10,6 +10,8 @@ #include "vmx.h" #include "trace.h" +#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK + DEFINE_STATIC_KEY_FALSE(enable_evmcs); #define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x) @@ -28,6 +30,8 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), EVMCS1_FIELD(HOST_IA32_EFER, host_ia32_efer, HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), EVMCS1_FIELD(HOST_CR0, host_cr0, HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), EVMCS1_FIELD(HOST_CR3, host_cr3, @@ -78,6 +82,8 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), EVMCS1_FIELD(GUEST_IA32_EFER, guest_ia32_efer, HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), EVMCS1_FIELD(GUEST_PDPTR0, guest_pdptr0, HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), EVMCS1_FIELD(GUEST_PDPTR1, guest_pdptr1, @@ -126,6 +132,28 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), EVMCS1_FIELD(XSS_EXIT_BITMAP, xss_exit_bitmap, HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2), + EVMCS1_FIELD(ENCLS_EXITING_BITMAP, encls_exiting_bitmap, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2), + EVMCS1_FIELD(TSC_MULTIPLIER, tsc_multiplier, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2), + /* + * Not used by KVM: + * + * EVMCS1_FIELD(0x00006828, guest_ia32_s_cet, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + * EVMCS1_FIELD(0x0000682A, guest_ssp, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC), + * EVMCS1_FIELD(0x0000682C, guest_ia32_int_ssp_table_addr, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + * EVMCS1_FIELD(0x00002816, guest_ia32_lbr_ctl, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + * EVMCS1_FIELD(0x00006C18, host_ia32_s_cet, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + * EVMCS1_FIELD(0x00006C1A, host_ssp, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + * EVMCS1_FIELD(0x00006C1C, host_ia32_int_ssp_table_addr, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + */ /* 64 bit read only */ EVMCS1_FIELD(GUEST_PHYSICAL_ADDRESS, guest_physical_address, @@ -294,19 +322,6 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { }; const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); -#if IS_ENABLED(CONFIG_HYPERV) -__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) -{ - vmcs_conf->cpu_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_EXEC_CTRL; - vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL; - vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC; - vmcs_conf->cpu_based_3rd_exec_ctrl = 0; - - vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; - vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; -} -#endif - bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa) { struct hv_vp_assist_page assist_page; @@ -334,6 +349,9 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) * versions: lower 8 bits is the minimal version, higher 8 bits is the * maximum supported version. KVM supports versions from 1 to * KVM_EVMCS_VERSION. + * + * Note, do not check the Hyper-V is fully enabled in guest CPUID, this + * helper is used to _get_ the vCPU's supported CPUID. */ if (kvm_cpu_cap_get(X86_FEATURE_VMX) && (!vcpu || to_vmx(vcpu)->nested.enlightened_vmcs_enabled)) @@ -342,10 +360,67 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) return 0; } -void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata) +enum evmcs_revision { + EVMCSv1_LEGACY, + NR_EVMCS_REVISIONS, +}; + +enum evmcs_ctrl_type { + EVMCS_EXIT_CTRLS, + EVMCS_ENTRY_CTRLS, + EVMCS_2NDEXEC, + EVMCS_PINCTRL, + EVMCS_VMFUNC, + NR_EVMCS_CTRLS, +}; + +static const u32 evmcs_unsupported_ctrls[NR_EVMCS_CTRLS][NR_EVMCS_REVISIONS] = { + [EVMCS_EXIT_CTRLS] = { + [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL, + }, + [EVMCS_ENTRY_CTRLS] = { + [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_VMENTRY_CTRL, + }, + [EVMCS_2NDEXEC] = { + [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_2NDEXEC, + }, + [EVMCS_PINCTRL] = { + [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_PINCTRL, + }, + [EVMCS_VMFUNC] = { + [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_VMFUNC, + }, +}; + +static u32 evmcs_get_unsupported_ctls(enum evmcs_ctrl_type ctrl_type) +{ + enum evmcs_revision evmcs_rev = EVMCSv1_LEGACY; + + return evmcs_unsupported_ctrls[ctrl_type][evmcs_rev]; +} + +static bool evmcs_has_perf_global_ctrl(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + /* + * PERF_GLOBAL_CTRL has a quirk where some Windows guests may fail to + * boot if a PV CPUID feature flag is not also set. Treat the fields + * as unsupported if the flag is not set in guest CPUID. This should + * be called only for guest accesses, and all guest accesses should be + * gated on Hyper-V being enabled and initialized. + */ + if (WARN_ON_ONCE(!hv_vcpu)) + return false; + + return hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL; +} + +void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) { u32 ctl_low = (u32)*pdata; u32 ctl_high = (u32)(*pdata >> 32); + u32 unsupported_ctrls; /* * Hyper-V 2016 and 2019 try using these features even when eVMCS @@ -354,77 +429,70 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata) switch (msr_index) { case MSR_IA32_VMX_EXIT_CTLS: case MSR_IA32_VMX_TRUE_EXIT_CTLS: - ctl_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; + unsupported_ctrls = evmcs_get_unsupported_ctls(EVMCS_EXIT_CTRLS); + if (!evmcs_has_perf_global_ctrl(vcpu)) + unsupported_ctrls |= VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; + ctl_high &= ~unsupported_ctrls; break; case MSR_IA32_VMX_ENTRY_CTLS: case MSR_IA32_VMX_TRUE_ENTRY_CTLS: - ctl_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; + unsupported_ctrls = evmcs_get_unsupported_ctls(EVMCS_ENTRY_CTRLS); + if (!evmcs_has_perf_global_ctrl(vcpu)) + unsupported_ctrls |= VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + ctl_high &= ~unsupported_ctrls; break; case MSR_IA32_VMX_PROCBASED_CTLS2: - ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC; + ctl_high &= ~evmcs_get_unsupported_ctls(EVMCS_2NDEXEC); break; case MSR_IA32_VMX_TRUE_PINBASED_CTLS: case MSR_IA32_VMX_PINBASED_CTLS: - ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; + ctl_high &= ~evmcs_get_unsupported_ctls(EVMCS_PINCTRL); break; case MSR_IA32_VMX_VMFUNC: - ctl_low &= ~EVMCS1_UNSUPPORTED_VMFUNC; + ctl_low &= ~evmcs_get_unsupported_ctls(EVMCS_VMFUNC); break; } *pdata = ctl_low | ((u64)ctl_high << 32); } +static bool nested_evmcs_is_valid_controls(enum evmcs_ctrl_type ctrl_type, + u32 val) +{ + return !(val & evmcs_get_unsupported_ctls(ctrl_type)); +} + int nested_evmcs_check_controls(struct vmcs12 *vmcs12) { - int ret = 0; - u32 unsupp_ctl; - - unsupp_ctl = vmcs12->pin_based_vm_exec_control & - EVMCS1_UNSUPPORTED_PINCTRL; - if (unsupp_ctl) { - trace_kvm_nested_vmenter_failed( - "eVMCS: unsupported pin-based VM-execution controls", - unsupp_ctl); - ret = -EINVAL; - } + if (CC(!nested_evmcs_is_valid_controls(EVMCS_PINCTRL, + vmcs12->pin_based_vm_exec_control))) + return -EINVAL; - unsupp_ctl = vmcs12->secondary_vm_exec_control & - EVMCS1_UNSUPPORTED_2NDEXEC; - if (unsupp_ctl) { - trace_kvm_nested_vmenter_failed( - "eVMCS: unsupported secondary VM-execution controls", - unsupp_ctl); - ret = -EINVAL; - } + if (CC(!nested_evmcs_is_valid_controls(EVMCS_2NDEXEC, + vmcs12->secondary_vm_exec_control))) + return -EINVAL; - unsupp_ctl = vmcs12->vm_exit_controls & - EVMCS1_UNSUPPORTED_VMEXIT_CTRL; - if (unsupp_ctl) { - trace_kvm_nested_vmenter_failed( - "eVMCS: unsupported VM-exit controls", - unsupp_ctl); - ret = -EINVAL; - } + if (CC(!nested_evmcs_is_valid_controls(EVMCS_EXIT_CTRLS, + vmcs12->vm_exit_controls))) + return -EINVAL; - unsupp_ctl = vmcs12->vm_entry_controls & - EVMCS1_UNSUPPORTED_VMENTRY_CTRL; - if (unsupp_ctl) { - trace_kvm_nested_vmenter_failed( - "eVMCS: unsupported VM-entry controls", - unsupp_ctl); - ret = -EINVAL; - } + if (CC(!nested_evmcs_is_valid_controls(EVMCS_ENTRY_CTRLS, + vmcs12->vm_entry_controls))) + return -EINVAL; - unsupp_ctl = vmcs12->vm_function_control & EVMCS1_UNSUPPORTED_VMFUNC; - if (unsupp_ctl) { - trace_kvm_nested_vmenter_failed( - "eVMCS: unsupported VM-function controls", - unsupp_ctl); - ret = -EINVAL; - } + /* + * VM-Func controls are 64-bit, but KVM currently doesn't support any + * controls in bits 63:32, i.e. dropping those bits on the consistency + * check is intentional. + */ + if (WARN_ON_ONCE(vmcs12->vm_function_control >> 32)) + return -EINVAL; - return ret; + if (CC(!nested_evmcs_is_valid_controls(EVMCS_VMFUNC, + vmcs12->vm_function_control))) + return -EINVAL; + + return 0; } int nested_enable_evmcs(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index f886a8ff0342..6f746ef3c038 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -42,8 +42,6 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs); * PLE_GAP = 0x00004020, * PLE_WINDOW = 0x00004022, * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, - * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, - * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, * * Currently unsupported in KVM: * GUEST_IA32_RTIT_CTL = 0x00002814, @@ -61,9 +59,8 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs); SECONDARY_EXEC_TSC_SCALING | \ SECONDARY_EXEC_PAUSE_LOOP_EXITING) #define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \ - (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \ - VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) -#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) + (VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) +#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (0) #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING) struct evmcs_field { @@ -212,7 +209,6 @@ static inline void evmcs_load(u64 phys_addr) vp_ap->enlighten_vmentry = 1; } -__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf); #else /* !IS_ENABLED(CONFIG_HYPERV) */ static __always_inline void evmcs_write64(unsigned long field, u64 value) {} static inline void evmcs_write32(unsigned long field, u32 value) {} @@ -243,7 +239,7 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa); uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu); int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); -void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata); +void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int nested_evmcs_check_controls(struct vmcs12 *vmcs12); #endif /* __KVM_X86_VMX_EVMCS_H */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 7eaf96064cb0..0c62352dda6a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -439,61 +439,22 @@ static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, return inequality ^ bit; } - -/* - * KVM wants to inject page-faults which it got to the guest. This function - * checks whether in a nested guest, we need to inject them to L1 or L2. - */ -static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual) +static bool nested_vmx_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector, + u32 error_code) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - unsigned int nr = vcpu->arch.exception.nr; - bool has_payload = vcpu->arch.exception.has_payload; - unsigned long payload = vcpu->arch.exception.payload; - - if (nr == PF_VECTOR) { - if (vcpu->arch.exception.nested_apf) { - *exit_qual = vcpu->arch.apf.nested_apf_token; - return 1; - } - if (nested_vmx_is_page_fault_vmexit(vmcs12, - vcpu->arch.exception.error_code)) { - *exit_qual = has_payload ? payload : vcpu->arch.cr2; - return 1; - } - } else if (vmcs12->exception_bitmap & (1u << nr)) { - if (nr == DB_VECTOR) { - if (!has_payload) { - payload = vcpu->arch.dr6; - payload &= ~DR6_BT; - payload ^= DR6_ACTIVE_LOW; - } - *exit_qual = payload; - } else - *exit_qual = 0; - return 1; - } - return 0; -} - -static bool nested_vmx_handle_page_fault_workaround(struct kvm_vcpu *vcpu, - struct x86_exception *fault) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - - WARN_ON(!is_guest_mode(vcpu)); + /* + * Drop bits 31:16 of the error code when performing the #PF mask+match + * check. All VMCS fields involved are 32 bits, but Intel CPUs never + * set bits 31:16 and VMX disallows setting bits 31:16 in the injected + * error code. Including the to-be-dropped bits in the check might + * result in an "impossible" or missed exit from L1's perspective. + */ + if (vector == PF_VECTOR) + return nested_vmx_is_page_fault_vmexit(vmcs12, (u16)error_code); - if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) && - !WARN_ON_ONCE(to_vmx(vcpu)->nested.nested_run_pending)) { - vmcs12->vm_exit_intr_error_code = fault->error_code; - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, - PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | - INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, - fault->address); - return true; - } - return false; + return (vmcs12->exception_bitmap & (1u << vector)); } static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu, @@ -1607,6 +1568,10 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields vmcs12->guest_rflags = evmcs->guest_rflags; vmcs12->guest_interruptibility_info = evmcs->guest_interruptibility_info; + /* + * Not present in struct vmcs12: + * vmcs12->guest_ssp = evmcs->guest_ssp; + */ } if (unlikely(!(hv_clean_fields & @@ -1653,6 +1618,13 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields vmcs12->host_fs_selector = evmcs->host_fs_selector; vmcs12->host_gs_selector = evmcs->host_gs_selector; vmcs12->host_tr_selector = evmcs->host_tr_selector; + vmcs12->host_ia32_perf_global_ctrl = evmcs->host_ia32_perf_global_ctrl; + /* + * Not present in struct vmcs12: + * vmcs12->host_ia32_s_cet = evmcs->host_ia32_s_cet; + * vmcs12->host_ssp = evmcs->host_ssp; + * vmcs12->host_ia32_int_ssp_table_addr = evmcs->host_ia32_int_ssp_table_addr; + */ } if (unlikely(!(hv_clean_fields & @@ -1720,6 +1692,8 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields vmcs12->tsc_offset = evmcs->tsc_offset; vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr; vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap; + vmcs12->encls_exiting_bitmap = evmcs->encls_exiting_bitmap; + vmcs12->tsc_multiplier = evmcs->tsc_multiplier; } if (unlikely(!(hv_clean_fields & @@ -1767,6 +1741,13 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs; vmcs12->guest_activity_state = evmcs->guest_activity_state; vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs; + vmcs12->guest_ia32_perf_global_ctrl = evmcs->guest_ia32_perf_global_ctrl; + /* + * Not present in struct vmcs12: + * vmcs12->guest_ia32_s_cet = evmcs->guest_ia32_s_cet; + * vmcs12->guest_ia32_lbr_ctl = evmcs->guest_ia32_lbr_ctl; + * vmcs12->guest_ia32_int_ssp_table_addr = evmcs->guest_ia32_int_ssp_table_addr; + */ } /* @@ -1869,12 +1850,23 @@ static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx) * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count; * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count; * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count; + * evmcs->guest_ia32_perf_global_ctrl = vmcs12->guest_ia32_perf_global_ctrl; + * evmcs->host_ia32_perf_global_ctrl = vmcs12->host_ia32_perf_global_ctrl; + * evmcs->encls_exiting_bitmap = vmcs12->encls_exiting_bitmap; + * evmcs->tsc_multiplier = vmcs12->tsc_multiplier; * * Not present in struct vmcs12: * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx; * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi; * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi; * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip; + * evmcs->host_ia32_s_cet = vmcs12->host_ia32_s_cet; + * evmcs->host_ssp = vmcs12->host_ssp; + * evmcs->host_ia32_int_ssp_table_addr = vmcs12->host_ia32_int_ssp_table_addr; + * evmcs->guest_ia32_s_cet = vmcs12->guest_ia32_s_cet; + * evmcs->guest_ia32_lbr_ctl = vmcs12->guest_ia32_lbr_ctl; + * evmcs->guest_ia32_int_ssp_table_addr = vmcs12->guest_ia32_int_ssp_table_addr; + * evmcs->guest_ssp = vmcs12->guest_ssp; */ evmcs->guest_es_selector = vmcs12->guest_es_selector; @@ -1982,7 +1974,7 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( bool evmcs_gpa_changed = false; u64 evmcs_gpa; - if (likely(!vmx->nested.enlightened_vmcs_enabled)) + if (likely(!guest_cpuid_has_evmcs(vcpu))) return EVMPTRLD_DISABLED; if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) { @@ -2328,9 +2320,14 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate * on the related bits (if supported by the CPU) in the hope that * we can avoid VMWrites during vmx_set_efer(). + * + * Similarly, take vmcs01's PERF_GLOBAL_CTRL in the hope that if KVM is + * loading PERF_GLOBAL_CTRL via the VMCS for L1, then KVM will want to + * do the same for L2. */ exec_control = __vm_entry_controls_get(vmcs01); - exec_control |= vmcs12->vm_entry_controls; + exec_control |= (vmcs12->vm_entry_controls & + ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER); if (cpu_has_load_ia32_efer()) { if (guest_efer & EFER_LMA) @@ -2863,7 +2860,7 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu, nested_check_vm_entry_controls(vcpu, vmcs12)) return -EINVAL; - if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled) + if (guest_cpuid_has_evmcs(vcpu)) return nested_evmcs_check_controls(vmcs12); return 0; @@ -3145,7 +3142,7 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu) * L2 was running), map it here to make sure vmcs12 changes are * properly reflected. */ - if (vmx->nested.enlightened_vmcs_enabled && + if (guest_cpuid_has_evmcs(vcpu) && vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) { enum nested_evmptrld_status evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, false); @@ -3364,12 +3361,24 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, }; u32 failed_index; + trace_kvm_nested_vmenter(kvm_rip_read(vcpu), + vmx->nested.current_vmptr, + vmcs12->guest_rip, + vmcs12->guest_intr_status, + vmcs12->vm_entry_intr_info_field, + vmcs12->secondary_vm_exec_control & SECONDARY_EXEC_ENABLE_EPT, + vmcs12->ept_pointer, + vmcs12->guest_cr3, + KVM_ISA_VMX); + kvm_service_local_tlb_flush_requests(vcpu); evaluate_pending_interrupts = exec_controls_get(vmx) & (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING); if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); + if (!evaluate_pending_interrupts) + evaluate_pending_interrupts |= kvm_apic_has_pending_init_or_sipi(vcpu); if (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) @@ -3450,18 +3459,10 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, } /* - * If L1 had a pending IRQ/NMI until it executed - * VMLAUNCH/VMRESUME which wasn't delivered because it was - * disallowed (e.g. interrupts disabled), L0 needs to - * evaluate if this pending event should cause an exit from L2 - * to L1 or delivered directly to L2 (e.g. In case L1 don't - * intercept EXTERNAL_INTERRUPT). - * - * Usually this would be handled by the processor noticing an - * IRQ/NMI window request, or checking RVI during evaluation of - * pending virtual interrupts. However, this setting was done - * on VMCS01 and now VMCS02 is active instead. Thus, we force L0 - * to perform pending event evaluation by requesting a KVM_REQ_EVENT. + * Re-evaluate pending events if L1 had a pending IRQ/NMI/INIT/SIPI + * when it executed VMLAUNCH/VMRESUME, as entering non-root mode can + * effectively unblock various events, e.g. INIT/SIPI cause VM-Exit + * unconditionally. */ if (unlikely(evaluate_pending_interrupts)) kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -3718,7 +3719,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, is_double_fault(exit_intr_info))) { vmcs12->idt_vectoring_info_field = 0; } else if (vcpu->arch.exception.injected) { - nr = vcpu->arch.exception.nr; + nr = vcpu->arch.exception.vector; idt_vectoring = nr | VECTORING_INFO_VALID_MASK; if (kvm_exception_is_soft(nr)) { @@ -3819,19 +3820,40 @@ mmio_needed: return -ENXIO; } -static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, - unsigned long exit_qual) +static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu) { + struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit; + u32 intr_info = ex->vector | INTR_INFO_VALID_MASK; struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - unsigned int nr = vcpu->arch.exception.nr; - u32 intr_info = nr | INTR_INFO_VALID_MASK; + unsigned long exit_qual; + + if (ex->has_payload) { + exit_qual = ex->payload; + } else if (ex->vector == PF_VECTOR) { + exit_qual = vcpu->arch.cr2; + } else if (ex->vector == DB_VECTOR) { + exit_qual = vcpu->arch.dr6; + exit_qual &= ~DR6_BT; + exit_qual ^= DR6_ACTIVE_LOW; + } else { + exit_qual = 0; + } - if (vcpu->arch.exception.has_error_code) { - vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; + if (ex->has_error_code) { + /* + * Intel CPUs do not generate error codes with bits 31:16 set, + * and more importantly VMX disallows setting bits 31:16 in the + * injected error code for VM-Entry. Drop the bits to mimic + * hardware and avoid inducing failure on nested VM-Entry if L1 + * chooses to inject the exception back to L2. AMD CPUs _do_ + * generate "full" 32-bit error codes, so KVM allows userspace + * to inject exception error codes with bits 31:16 set. + */ + vmcs12->vm_exit_intr_error_code = (u16)ex->error_code; intr_info |= INTR_INFO_DELIVER_CODE_MASK; } - if (kvm_exception_is_soft(nr)) + if (kvm_exception_is_soft(ex->vector)) intr_info |= INTR_TYPE_SOFT_EXCEPTION; else intr_info |= INTR_TYPE_HARD_EXCEPTION; @@ -3844,16 +3866,39 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, } /* - * Returns true if a debug trap is pending delivery. + * Returns true if a debug trap is (likely) pending delivery. Infer the class + * of a #DB (trap-like vs. fault-like) from the exception payload (to-be-DR6). + * Using the payload is flawed because code breakpoints (fault-like) and data + * breakpoints (trap-like) set the same bits in DR6 (breakpoint detected), i.e. + * this will return false positives if a to-be-injected code breakpoint #DB is + * pending (from KVM's perspective, but not "pending" across an instruction + * boundary). ICEBP, a.k.a. INT1, is also not reflected here even though it + * too is trap-like. * - * In KVM, debug traps bear an exception payload. As such, the class of a #DB - * exception may be inferred from the presence of an exception payload. + * KVM "works" despite these flaws as ICEBP isn't currently supported by the + * emulator, Monitor Trap Flag is not marked pending on intercepted #DBs (the + * #DB has already happened), and MTF isn't marked pending on code breakpoints + * from the emulator (because such #DBs are fault-like and thus don't trigger + * actions that fire on instruction retire). + */ +static unsigned long vmx_get_pending_dbg_trap(struct kvm_queued_exception *ex) +{ + if (!ex->pending || ex->vector != DB_VECTOR) + return 0; + + /* General Detect #DBs are always fault-like. */ + return ex->payload & ~DR6_BD; +} + +/* + * Returns true if there's a pending #DB exception that is lower priority than + * a pending Monitor Trap Flag VM-Exit. TSS T-flag #DBs are not emulated by + * KVM, but could theoretically be injected by userspace. Note, this code is + * imperfect, see above. */ -static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu) +static bool vmx_is_low_priority_db_trap(struct kvm_queued_exception *ex) { - return vcpu->arch.exception.pending && - vcpu->arch.exception.nr == DB_VECTOR && - vcpu->arch.exception.payload; + return vmx_get_pending_dbg_trap(ex) & ~DR6_BT; } /* @@ -3865,9 +3910,11 @@ static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu) */ static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu) { - if (vmx_pending_dbg_trap(vcpu)) - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, - vcpu->arch.exception.payload); + unsigned long pending_dbg; + + pending_dbg = vmx_get_pending_dbg_trap(&vcpu->arch.exception); + if (pending_dbg) + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, pending_dbg); } static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu) @@ -3876,21 +3923,113 @@ static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu) to_vmx(vcpu)->nested.preemption_timer_expired; } +static bool vmx_has_nested_events(struct kvm_vcpu *vcpu) +{ + return nested_vmx_preemption_timer_pending(vcpu) || + to_vmx(vcpu)->nested.mtf_pending; +} + +/* + * Per the Intel SDM's table "Priority Among Concurrent Events", with minor + * edits to fill in missing examples, e.g. #DB due to split-lock accesses, + * and less minor edits to splice in the priority of VMX Non-Root specific + * events, e.g. MTF and NMI/INTR-window exiting. + * + * 1 Hardware Reset and Machine Checks + * - RESET + * - Machine Check + * + * 2 Trap on Task Switch + * - T flag in TSS is set (on task switch) + * + * 3 External Hardware Interventions + * - FLUSH + * - STOPCLK + * - SMI + * - INIT + * + * 3.5 Monitor Trap Flag (MTF) VM-exit[1] + * + * 4 Traps on Previous Instruction + * - Breakpoints + * - Trap-class Debug Exceptions (#DB due to TF flag set, data/I-O + * breakpoint, or #DB due to a split-lock access) + * + * 4.3 VMX-preemption timer expired VM-exit + * + * 4.6 NMI-window exiting VM-exit[2] + * + * 5 Nonmaskable Interrupts (NMI) + * + * 5.5 Interrupt-window exiting VM-exit and Virtual-interrupt delivery + * + * 6 Maskable Hardware Interrupts + * + * 7 Code Breakpoint Fault + * + * 8 Faults from Fetching Next Instruction + * - Code-Segment Limit Violation + * - Code Page Fault + * - Control protection exception (missing ENDBRANCH at target of indirect + * call or jump) + * + * 9 Faults from Decoding Next Instruction + * - Instruction length > 15 bytes + * - Invalid Opcode + * - Coprocessor Not Available + * + *10 Faults on Executing Instruction + * - Overflow + * - Bound error + * - Invalid TSS + * - Segment Not Present + * - Stack fault + * - General Protection + * - Data Page Fault + * - Alignment Check + * - x86 FPU Floating-point exception + * - SIMD floating-point exception + * - Virtualization exception + * - Control protection exception + * + * [1] Per the "Monitor Trap Flag" section: System-management interrupts (SMIs), + * INIT signals, and higher priority events take priority over MTF VM exits. + * MTF VM exits take priority over debug-trap exceptions and lower priority + * events. + * + * [2] Debug-trap exceptions and higher priority events take priority over VM exits + * caused by the VMX-preemption timer. VM exits caused by the VMX-preemption + * timer take priority over VM exits caused by the "NMI-window exiting" + * VM-execution control and lower priority events. + * + * [3] Debug-trap exceptions and higher priority events take priority over VM exits + * caused by "NMI-window exiting". VM exits caused by this control take + * priority over non-maskable interrupts (NMIs) and lower priority events. + * + * [4] Virtual-interrupt delivery has the same priority as that of VM exits due to + * the 1-setting of the "interrupt-window exiting" VM-execution control. Thus, + * non-maskable interrupts (NMIs) and higher priority events take priority over + * delivery of a virtual interrupt; delivery of a virtual interrupt takes + * priority over external interrupts and lower priority events. + */ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long exit_qual; - bool block_nested_events = - vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); - bool mtf_pending = vmx->nested.mtf_pending; struct kvm_lapic *apic = vcpu->arch.apic; - + struct vcpu_vmx *vmx = to_vmx(vcpu); /* - * Clear the MTF state. If a higher priority VM-exit is delivered first, - * this state is discarded. + * Only a pending nested run blocks a pending exception. If there is a + * previously injected event, the pending exception occurred while said + * event was being delivered and thus needs to be handled. */ - if (!block_nested_events) - vmx->nested.mtf_pending = false; + bool block_nested_exceptions = vmx->nested.nested_run_pending; + /* + * New events (not exceptions) are only recognized at instruction + * boundaries. If an event needs reinjection, then KVM is handling a + * VM-Exit that occurred _during_ instruction execution; new events are + * blocked until the instruction completes. + */ + bool block_nested_events = block_nested_exceptions || + kvm_event_needs_reinjection(vcpu); if (lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &apic->pending_events)) { @@ -3900,6 +4039,9 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) clear_bit(KVM_APIC_INIT, &apic->pending_events); if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED) nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); + + /* MTF is discarded if the vCPU is in WFS. */ + vmx->nested.mtf_pending = false; return 0; } @@ -3909,31 +4051,41 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) return -EBUSY; clear_bit(KVM_APIC_SIPI, &apic->pending_events); - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0, apic->sipi_vector & 0xFFUL); - return 0; + return 0; + } + /* Fallthrough, the SIPI is completely ignored. */ } /* - * Process any exceptions that are not debug traps before MTF. + * Process exceptions that are higher priority than Monitor Trap Flag: + * fault-like exceptions, TSS T flag #DB (not emulated by KVM, but + * could theoretically come in from userspace), and ICEBP (INT1). * - * Note that only a pending nested run can block a pending exception. - * Otherwise an injected NMI/interrupt should either be - * lost or delivered to the nested hypervisor in the IDT_VECTORING_INFO, - * while delivering the pending exception. + * TODO: SMIs have higher priority than MTF and trap-like #DBs (except + * for TSS T flag #DBs). KVM also doesn't save/restore pending MTF + * across SMI/RSM as it should; that needs to be addressed in order to + * prioritize SMI over MTF and trap-like #DBs. */ - - if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) { - if (vmx->nested.nested_run_pending) + if (vcpu->arch.exception_vmexit.pending && + !vmx_is_low_priority_db_trap(&vcpu->arch.exception_vmexit)) { + if (block_nested_exceptions) return -EBUSY; - if (!nested_vmx_check_exception(vcpu, &exit_qual)) - goto no_vmexit; - nested_vmx_inject_exception_vmexit(vcpu, exit_qual); + + nested_vmx_inject_exception_vmexit(vcpu); return 0; } - if (mtf_pending) { + if (vcpu->arch.exception.pending && + !vmx_is_low_priority_db_trap(&vcpu->arch.exception)) { + if (block_nested_exceptions) + return -EBUSY; + goto no_vmexit; + } + + if (vmx->nested.mtf_pending) { if (block_nested_events) return -EBUSY; nested_vmx_update_pending_dbg(vcpu); @@ -3941,15 +4093,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) return 0; } - if (vcpu->arch.exception.pending) { - if (vmx->nested.nested_run_pending) + if (vcpu->arch.exception_vmexit.pending) { + if (block_nested_exceptions) return -EBUSY; - if (!nested_vmx_check_exception(vcpu, &exit_qual)) - goto no_vmexit; - nested_vmx_inject_exception_vmexit(vcpu, exit_qual); + + nested_vmx_inject_exception_vmexit(vcpu); return 0; } + if (vcpu->arch.exception.pending) { + if (block_nested_exceptions) + return -EBUSY; + goto no_vmexit; + } + if (nested_vmx_preemption_timer_pending(vcpu)) { if (block_nested_events) return -EBUSY; @@ -4255,14 +4412,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL); } - - /* - * Drop what we picked up for L2 via vmx_complete_interrupts. It is - * preserved above and would only end up incorrectly in L1. - */ - vcpu->arch.nmi_injected = false; - kvm_clear_exception_queue(vcpu); - kvm_clear_interrupt_queue(vcpu); } /* @@ -4538,6 +4687,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + /* Pending MTF traps are discarded on VM-Exit. */ + vmx->nested.mtf_pending = false; + /* trying to cancel vmlaunch/vmresume is a bug */ WARN_ON_ONCE(vmx->nested.nested_run_pending); @@ -4602,6 +4754,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, WARN_ON_ONCE(nested_early_check); } + /* + * Drop events/exceptions that were queued for re-injection to L2 + * (picked up via vmx_complete_interrupts()), as well as exceptions + * that were pending for L2. Note, this must NOT be hoisted above + * prepare_vmcs12(), events/exceptions queued for re-injection need to + * be captured in vmcs12 (see vmcs12_save_pending_event()). + */ + vcpu->arch.nmi_injected = false; + kvm_clear_exception_queue(vcpu); + kvm_clear_interrupt_queue(vcpu); + vmx_switch_vmcs(vcpu, &vmx->vmcs01); /* Update any VMCS fields that might have changed while L2 ran */ @@ -5030,8 +5193,8 @@ static int handle_vmxoff(struct kvm_vcpu *vcpu) free_nested(vcpu); - /* Process a latched INIT during time CPU was in VMX operation */ - kvm_make_request(KVM_REQ_EVENT, vcpu); + if (kvm_apic_has_pending_init_or_sipi(vcpu)) + kvm_make_request(KVM_REQ_EVENT, vcpu); return nested_vmx_succeed(vcpu); } @@ -5067,7 +5230,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) * state. It is possible that the area will stay mapped as * vmx->nested.hv_evmcs but this shouldn't be a problem. */ - if (likely(!vmx->nested.enlightened_vmcs_enabled || + if (likely(!guest_cpuid_has_evmcs(vcpu) || !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) { if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vcpu); @@ -6463,6 +6626,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (ret) goto error_guest_mode; + if (vmx->nested.mtf_pending) + kvm_make_request(KVM_REQ_EVENT, vcpu); + return 0; error_guest_mode: @@ -6522,8 +6688,10 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void) * bit in the high half is on if the corresponding bit in the control field * may be on. See also vmx_control_verify(). */ -void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) +void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) { + struct nested_vmx_msrs *msrs = &vmcs_conf->nested; + /* * Note that as a general rule, the high half of the MSRs (bits in * the control fields which may be 1) should be initialized by the @@ -6540,11 +6708,10 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) */ /* pin-based controls */ - rdmsr(MSR_IA32_VMX_PINBASED_CTLS, - msrs->pinbased_ctls_low, - msrs->pinbased_ctls_high); - msrs->pinbased_ctls_low |= + msrs->pinbased_ctls_low = PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; + + msrs->pinbased_ctls_high = vmcs_conf->pin_based_exec_ctrl; msrs->pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | @@ -6555,50 +6722,47 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) PIN_BASED_VMX_PREEMPTION_TIMER; /* exit controls */ - rdmsr(MSR_IA32_VMX_EXIT_CTLS, - msrs->exit_ctls_low, - msrs->exit_ctls_high); msrs->exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; + msrs->exit_ctls_high = vmcs_conf->vmexit_ctrl; msrs->exit_ctls_high &= #ifdef CONFIG_X86_64 VM_EXIT_HOST_ADDR_SPACE_SIZE | #endif VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | - VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; + VM_EXIT_CLEAR_BNDCFGS; msrs->exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | - VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT | + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; /* We support free control of debug control saving. */ msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; /* entry controls */ - rdmsr(MSR_IA32_VMX_ENTRY_CTLS, - msrs->entry_ctls_low, - msrs->entry_ctls_high); msrs->entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; + + msrs->entry_ctls_high = vmcs_conf->vmentry_ctrl; msrs->entry_ctls_high &= #ifdef CONFIG_X86_64 VM_ENTRY_IA32E_MODE | #endif - VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS | - VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; msrs->entry_ctls_high |= - (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); + (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER | + VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); /* We support free control of debug control loading. */ msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; /* cpu-based controls */ - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, - msrs->procbased_ctls_low, - msrs->procbased_ctls_high); msrs->procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; + + msrs->procbased_ctls_high = vmcs_conf->cpu_based_exec_ctrl; msrs->procbased_ctls_high &= CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING | @@ -6632,12 +6796,9 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) * depend on CPUID bits, they are added later by * vmx_vcpu_after_set_cpuid. */ - if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, - msrs->secondary_ctls_low, - msrs->secondary_ctls_high); - msrs->secondary_ctls_low = 0; + + msrs->secondary_ctls_high = vmcs_conf->cpu_based_2nd_exec_ctrl; msrs->secondary_ctls_high &= SECONDARY_EXEC_DESC | SECONDARY_EXEC_ENABLE_RDTSCP | @@ -6717,10 +6878,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING; /* miscellaneous data */ - rdmsr(MSR_IA32_VMX_MISC, - msrs->misc_low, - msrs->misc_high); - msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA; + msrs->misc_low = (u32)vmcs_conf->misc & VMX_MISC_SAVE_EFER_LMA; msrs->misc_low |= MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | @@ -6814,9 +6972,9 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) struct kvm_x86_nested_ops vmx_nested_ops = { .leave_nested = vmx_leave_nested, + .is_exception_vmexit = nested_vmx_is_exception_vmexit, .check_events = vmx_check_nested_events, - .handle_page_fault_workaround = nested_vmx_handle_page_fault_workaround, - .hv_timer_pending = nested_vmx_preemption_timer_pending, + .has_events = vmx_has_nested_events, .triple_fault = nested_vmx_triple_fault, .get_state = vmx_get_nested_state, .set_state = vmx_set_nested_state, diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 88b00a7359e4..6312c9541c3c 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -17,7 +17,7 @@ enum nvmx_vmentry_status { }; void vmx_leave_nested(struct kvm_vcpu *vcpu); -void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps); +void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps); void nested_vmx_hardware_unsetup(void); __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); void nested_vmx_set_vmcs_shadowing_bitmap(void); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index c399637a3a79..25b70a85bef5 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -68,15 +68,11 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) } } -/* function is called when global control register has been updated. */ -static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data) +static void reprogram_counters(struct kvm_pmu *pmu, u64 diff) { int bit; - u64 diff = pmu->global_ctrl ^ data; struct kvm_pmc *pmc; - pmu->global_ctrl = data; - for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) { pmc = intel_pmc_idx_to_pmc(pmu, bit); if (pmc) @@ -397,7 +393,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) struct kvm_pmc *pmc; u32 msr = msr_info->index; u64 data = msr_info->data; - u64 reserved_bits; + u64 reserved_bits, diff; switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -418,7 +414,9 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (pmu->global_ctrl == data) return 0; if (kvm_valid_perf_global_ctrl(pmu, data)) { - global_ctrl_changed(pmu, data); + diff = pmu->global_ctrl ^ data; + pmu->global_ctrl = data; + reprogram_counters(pmu, diff); return 0; } break; @@ -433,7 +431,9 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (pmu->pebs_enable == data) return 0; if (!(data & pmu->pebs_enable_mask)) { + diff = pmu->pebs_enable ^ data; pmu->pebs_enable = data; + reprogram_counters(pmu, diff); return 0; } break; @@ -776,20 +776,23 @@ static void intel_pmu_cleanup(struct kvm_vcpu *vcpu) void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu) { struct kvm_pmc *pmc = NULL; - int bit; + int bit, hw_idx; for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl, X86_PMC_IDX_MAX) { pmc = intel_pmc_idx_to_pmc(pmu, bit); if (!pmc || !pmc_speculative_in_use(pmc) || - !intel_pmc_is_enabled(pmc)) + !intel_pmc_is_enabled(pmc) || !pmc->perf_event) continue; - if (pmc->perf_event && pmc->idx != pmc->perf_event->hw.idx) { - pmu->host_cross_mapped_mask |= - BIT_ULL(pmc->perf_event->hw.idx); - } + /* + * A negative index indicates the event isn't mapped to a + * physical counter in the host, e.g. due to contention. + */ + hw_idx = pmc->perf_event->hw.idx; + if (hw_idx != pmc->idx && hw_idx > -1) + pmu->host_cross_mapped_mask |= BIT_ULL(hw_idx); } } diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index aba8cebdc587..8f95c7c01433 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -129,7 +129,7 @@ static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr) ex.address = gva; ex.error_code_valid = true; ex.nested_page_fault = false; - kvm_inject_page_fault(vcpu, &ex); + kvm_inject_emulated_page_fault(vcpu, &ex); } else { kvm_inject_gp(vcpu, 0); } diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 6de96b943804..8477d8bdd69c 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -189,13 +189,16 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) xor %ebx, %ebx .Lclear_regs: + /* Discard @regs. The register is irrelevant, it just can't be RBX. */ + pop %_ASM_AX + /* * Clear all general purpose registers except RSP and RBX to prevent * speculative use of the guest's values, even those that are reloaded * via the stack. In theory, an L1 cache miss when restoring registers * could lead to speculative execution with the guest's values. * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially - * free. RSP and RAX are exempt as RSP is restored by hardware during + * free. RSP and RBX are exempt as RSP is restored by hardware during * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return * value. */ @@ -216,9 +219,6 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) xor %r15d, %r15d #endif - /* "POP" @regs. */ - add $WORD_SIZE, %_ASM_SP - /* * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before * the first unbalanced RET after vmexit! @@ -234,7 +234,6 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\ X86_FEATURE_RSB_VMEXIT_LITE - pop %_ASM_ARG2 /* @flags */ pop %_ASM_ARG1 /* @vmx */ @@ -293,22 +292,13 @@ SYM_FUNC_START(vmread_error_trampoline) push %r10 push %r11 #endif -#ifdef CONFIG_X86_64 + /* Load @field and @fault to arg1 and arg2 respectively. */ - mov 3*WORD_SIZE(%rbp), %_ASM_ARG2 - mov 2*WORD_SIZE(%rbp), %_ASM_ARG1 -#else - /* Parameters are passed on the stack for 32-bit (see asmlinkage). */ - push 3*WORD_SIZE(%ebp) - push 2*WORD_SIZE(%ebp) -#endif + mov 3*WORD_SIZE(%_ASM_BP), %_ASM_ARG2 + mov 2*WORD_SIZE(%_ASM_BP), %_ASM_ARG1 call vmread_error -#ifndef CONFIG_X86_64 - add $8, %esp -#endif - /* Zero out @fault, which will be popped into the result register. */ _ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c9b49a09e6b5..9dba04b6b019 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -439,7 +439,7 @@ do { \ pr_warn_ratelimited(fmt); \ } while (0) -asmlinkage void vmread_error(unsigned long field, bool fault) +void vmread_error(unsigned long field, bool fault) { if (fault) kvm_spurious_fault(); @@ -864,7 +864,7 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) return flags; } -static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, +static __always_inline void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { vm_entry_controls_clearbit(vmx, entry); @@ -922,7 +922,7 @@ skip_guest: vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); } -static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, +static __always_inline void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit, unsigned long guest_val_vmcs, unsigned long host_val_vmcs, u64 guest_val, u64 host_val) @@ -1652,17 +1652,25 @@ static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu) /* * Per the SDM, MTF takes priority over debug-trap exceptions besides - * T-bit traps. As instruction emulation is completed (i.e. at the - * instruction boundary), any #DB exception pending delivery must be a - * debug-trap. Record the pending MTF state to be delivered in + * TSS T-bit traps and ICEBP (INT1). KVM doesn't emulate T-bit traps + * or ICEBP (in the emulator proper), and skipping of ICEBP after an + * intercepted #DB deliberately avoids single-step #DB and MTF updates + * as ICEBP is higher priority than both. As instruction emulation is + * completed at this point (i.e. KVM is at the instruction boundary), + * any #DB exception pending delivery must be a debug-trap of lower + * priority than MTF. Record the pending MTF state to be delivered in * vmx_check_nested_events(). */ if (nested_cpu_has_mtf(vmcs12) && (!vcpu->arch.exception.pending || - vcpu->arch.exception.nr == DB_VECTOR)) + vcpu->arch.exception.vector == DB_VECTOR) && + (!vcpu->arch.exception_vmexit.pending || + vcpu->arch.exception_vmexit.vector == DB_VECTOR)) { vmx->nested.mtf_pending = true; - else + kvm_make_request(KVM_REQ_EVENT, vcpu); + } else { vmx->nested.mtf_pending = false; + } } static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu) @@ -1684,32 +1692,40 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); } -static void vmx_queue_exception(struct kvm_vcpu *vcpu) +static void vmx_inject_exception(struct kvm_vcpu *vcpu) { + struct kvm_queued_exception *ex = &vcpu->arch.exception; + u32 intr_info = ex->vector | INTR_INFO_VALID_MASK; struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned nr = vcpu->arch.exception.nr; - bool has_error_code = vcpu->arch.exception.has_error_code; - u32 error_code = vcpu->arch.exception.error_code; - u32 intr_info = nr | INTR_INFO_VALID_MASK; - kvm_deliver_exception_payload(vcpu); + kvm_deliver_exception_payload(vcpu, ex); - if (has_error_code) { - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); + if (ex->has_error_code) { + /* + * Despite the error code being architecturally defined as 32 + * bits, and the VMCS field being 32 bits, Intel CPUs and thus + * VMX don't actually supporting setting bits 31:16. Hardware + * will (should) never provide a bogus error code, but AMD CPUs + * do generate error codes with bits 31:16 set, and so KVM's + * ABI lets userspace shove in arbitrary 32-bit values. Drop + * the upper bits to avoid VM-Fail, losing information that + * does't really exist is preferable to killing the VM. + */ + vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)ex->error_code); intr_info |= INTR_INFO_DELIVER_CODE_MASK; } if (vmx->rmode.vm86_active) { int inc_eip = 0; - if (kvm_exception_is_soft(nr)) + if (kvm_exception_is_soft(ex->vector)) inc_eip = vcpu->arch.event_exit_inst_len; - kvm_inject_realmode_interrupt(vcpu, nr, inc_eip); + kvm_inject_realmode_interrupt(vcpu, ex->vector, inc_eip); return; } WARN_ON_ONCE(vmx->emulation_required); - if (kvm_exception_is_soft(nr)) { + if (kvm_exception_is_soft(ex->vector)) { vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, vmx->vcpu.arch.event_exit_inst_len); intr_info |= INTR_TYPE_SOFT_EXCEPTION; @@ -1930,9 +1946,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) * sanity checking and refuse to boot. Filter all unsupported * features out. */ - if (!msr_info->host_initiated && - vmx->nested.enlightened_vmcs_enabled) - nested_evmcs_filter_control_msr(msr_info->index, + if (!msr_info->host_initiated && guest_cpuid_has_evmcs(vcpu)) + nested_evmcs_filter_control_msr(vcpu, msr_info->index, &msr_info->data); break; case MSR_IA32_RTIT_CTL: @@ -2494,6 +2509,30 @@ static bool cpu_has_sgx(void) return cpuid_eax(0) >= 0x12 && (cpuid_eax(0x12) & BIT(0)); } +/* + * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they + * can't be used due to errata where VM Exit may incorrectly clear + * IA32_PERF_GLOBAL_CTRL[34:32]. Work around the errata by using the + * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL. + */ +static bool cpu_has_perf_global_ctrl_bug(void) +{ + if (boot_cpu_data.x86 == 0x6) { + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_NEHALEM_EP: /* AAK155 */ + case INTEL_FAM6_NEHALEM: /* AAP115 */ + case INTEL_FAM6_WESTMERE: /* AAT100 */ + case INTEL_FAM6_WESTMERE_EP: /* BC86,AAY89,BD102 */ + case INTEL_FAM6_NEHALEM_EX: /* BA97 */ + return true; + default: + break; + } + } + + return false; +} + static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr, u32 *result) { @@ -2526,13 +2565,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, struct vmx_capability *vmx_cap) { u32 vmx_msr_low, vmx_msr_high; - u32 min, opt, min2, opt2; u32 _pin_based_exec_control = 0; u32 _cpu_based_exec_control = 0; u32 _cpu_based_2nd_exec_control = 0; u64 _cpu_based_3rd_exec_control = 0; u32 _vmexit_control = 0; u32 _vmentry_control = 0; + u64 misc_msr; int i; /* @@ -2552,64 +2591,17 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, }; memset(vmcs_conf, 0, sizeof(*vmcs_conf)); - min = CPU_BASED_HLT_EXITING | -#ifdef CONFIG_X86_64 - CPU_BASED_CR8_LOAD_EXITING | - CPU_BASED_CR8_STORE_EXITING | -#endif - CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_UNCOND_IO_EXITING | - CPU_BASED_MOV_DR_EXITING | - CPU_BASED_USE_TSC_OFFSETTING | - CPU_BASED_MWAIT_EXITING | - CPU_BASED_MONITOR_EXITING | - CPU_BASED_INVLPG_EXITING | - CPU_BASED_RDPMC_EXITING; - - opt = CPU_BASED_TPR_SHADOW | - CPU_BASED_USE_MSR_BITMAPS | - CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | - CPU_BASED_ACTIVATE_TERTIARY_CONTROLS; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, - &_cpu_based_exec_control) < 0) + + if (adjust_vmx_controls(KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL, + KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL, + MSR_IA32_VMX_PROCBASED_CTLS, + &_cpu_based_exec_control)) return -EIO; -#ifdef CONFIG_X86_64 - if (_cpu_based_exec_control & CPU_BASED_TPR_SHADOW) - _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING & - ~CPU_BASED_CR8_STORE_EXITING; -#endif if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { - min2 = 0; - opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_WBINVD_EXITING | - SECONDARY_EXEC_ENABLE_VPID | - SECONDARY_EXEC_ENABLE_EPT | - SECONDARY_EXEC_UNRESTRICTED_GUEST | - SECONDARY_EXEC_PAUSE_LOOP_EXITING | - SECONDARY_EXEC_DESC | - SECONDARY_EXEC_ENABLE_RDTSCP | - SECONDARY_EXEC_ENABLE_INVPCID | - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_SHADOW_VMCS | - SECONDARY_EXEC_XSAVES | - SECONDARY_EXEC_RDSEED_EXITING | - SECONDARY_EXEC_RDRAND_EXITING | - SECONDARY_EXEC_ENABLE_PML | - SECONDARY_EXEC_TSC_SCALING | - SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | - SECONDARY_EXEC_PT_USE_GPA | - SECONDARY_EXEC_PT_CONCEAL_VMX | - SECONDARY_EXEC_ENABLE_VMFUNC | - SECONDARY_EXEC_BUS_LOCK_DETECTION | - SECONDARY_EXEC_NOTIFY_VM_EXITING; - if (cpu_has_sgx()) - opt2 |= SECONDARY_EXEC_ENCLS_EXITING; - if (adjust_vmx_controls(min2, opt2, + if (adjust_vmx_controls(KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL, + KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL, MSR_IA32_VMX_PROCBASED_CTLS2, - &_cpu_based_2nd_exec_control) < 0) + &_cpu_based_2nd_exec_control)) return -EIO; } #ifndef CONFIG_X86_64 @@ -2627,13 +2619,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, &vmx_cap->ept, &vmx_cap->vpid); - if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { - /* CR3 accesses and invlpg don't need to cause VM Exits when EPT - enabled */ - _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_INVLPG_EXITING); - } else if (vmx_cap->ept) { + if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) && + vmx_cap->ept) { pr_warn_once("EPT CAP should not exist if not support " "1-setting enable EPT VM-execution control\n"); @@ -2653,32 +2640,24 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, vmx_cap->vpid = 0; } - if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) { - u64 opt3 = TERTIARY_EXEC_IPI_VIRT; + if (!cpu_has_sgx()) + _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_ENCLS_EXITING; - _cpu_based_3rd_exec_control = adjust_vmx_controls64(opt3, + if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) + _cpu_based_3rd_exec_control = + adjust_vmx_controls64(KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL, MSR_IA32_VMX_PROCBASED_CTLS3); - } - min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT; -#ifdef CONFIG_X86_64 - min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; -#endif - opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | - VM_EXIT_LOAD_IA32_PAT | - VM_EXIT_LOAD_IA32_EFER | - VM_EXIT_CLEAR_BNDCFGS | - VM_EXIT_PT_CONCEAL_PIP | - VM_EXIT_CLEAR_IA32_RTIT_CTL; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, - &_vmexit_control) < 0) + if (adjust_vmx_controls(KVM_REQUIRED_VMX_VM_EXIT_CONTROLS, + KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS, + MSR_IA32_VMX_EXIT_CTLS, + &_vmexit_control)) return -EIO; - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | - PIN_BASED_VMX_PREEMPTION_TIMER; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, - &_pin_based_exec_control) < 0) + if (adjust_vmx_controls(KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL, + KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL, + MSR_IA32_VMX_PINBASED_CTLS, + &_pin_based_exec_control)) return -EIO; if (cpu_has_broken_vmx_preemption_timer()) @@ -2687,15 +2666,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)) _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; - min = VM_ENTRY_LOAD_DEBUG_CONTROLS; - opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | - VM_ENTRY_LOAD_IA32_PAT | - VM_ENTRY_LOAD_IA32_EFER | - VM_ENTRY_LOAD_BNDCFGS | - VM_ENTRY_PT_CONCEAL_PIP | - VM_ENTRY_LOAD_IA32_RTIT_CTL; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, - &_vmentry_control) < 0) + if (adjust_vmx_controls(KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS, + KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS, + MSR_IA32_VMX_ENTRY_CTLS, + &_vmentry_control)) return -EIO; for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) { @@ -2715,30 +2689,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, _vmexit_control &= ~x_ctrl; } - /* - * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they - * can't be used due to an errata where VM Exit may incorrectly clear - * IA32_PERF_GLOBAL_CTRL[34:32]. Workaround the errata by using the - * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL. - */ - if (boot_cpu_data.x86 == 0x6) { - switch (boot_cpu_data.x86_model) { - case 26: /* AAK155 */ - case 30: /* AAP115 */ - case 37: /* AAT100 */ - case 44: /* BC86,AAY89,BD102 */ - case 46: /* BA97 */ - _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; - _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; - pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " - "does not work properly. Using workaround\n"); - break; - default: - break; - } - } - - rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ @@ -2755,6 +2705,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, if (((vmx_msr_high >> 18) & 15) != 6) return -EIO; + rdmsrl(MSR_IA32_VMX_MISC, misc_msr); + vmcs_conf->size = vmx_msr_high & 0x1fff; vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; @@ -2766,11 +2718,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, vmcs_conf->cpu_based_3rd_exec_ctrl = _cpu_based_3rd_exec_control; vmcs_conf->vmexit_ctrl = _vmexit_control; vmcs_conf->vmentry_ctrl = _vmentry_control; - -#if IS_ENABLED(CONFIG_HYPERV) - if (enlightened_vmcs) - evmcs_sanitize_exec_ctrls(vmcs_conf); -#endif + vmcs_conf->misc = misc_msr; return 0; } @@ -3037,10 +2985,15 @@ int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) return 0; vcpu->arch.efer = efer; +#ifdef CONFIG_X86_64 if (efer & EFER_LMA) vm_entry_controls_setbit(vmx, VM_ENTRY_IA32E_MODE); else vm_entry_controls_clearbit(vmx, VM_ENTRY_IA32E_MODE); +#else + if (KVM_BUG_ON(efer & EFER_LMA, vcpu->kvm)) + return 1; +#endif vmx_setup_uret_msrs(vmx); return 0; @@ -4327,18 +4280,37 @@ static u32 vmx_vmentry_ctrl(void) if (vmx_pt_mode_is_system()) vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL); - /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ - return vmentry_ctrl & - ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); + /* + * IA32e mode, and loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically. + */ + vmentry_ctrl &= ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | + VM_ENTRY_LOAD_IA32_EFER | + VM_ENTRY_IA32E_MODE); + + if (cpu_has_perf_global_ctrl_bug()) + vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + + return vmentry_ctrl; } static u32 vmx_vmexit_ctrl(void) { u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; + /* + * Not used by KVM and never set in vmcs01 or vmcs02, but emulated for + * nested virtualization and thus allowed to be set in vmcs12. + */ + vmexit_ctrl &= ~(VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER | + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); + if (vmx_pt_mode_is_system()) vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | VM_EXIT_CLEAR_IA32_RTIT_CTL); + + if (cpu_has_perf_global_ctrl_bug()) + vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; + /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ return vmexit_ctrl & ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); @@ -4376,20 +4348,38 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_exec_ctrl; + /* + * Not used by KVM, but fully supported for nesting, i.e. are allowed in + * vmcs12 and propagated to vmcs02 when set in vmcs12. + */ + exec_control &= ~(CPU_BASED_RDTSC_EXITING | + CPU_BASED_USE_IO_BITMAPS | + CPU_BASED_MONITOR_TRAP_FLAG | + CPU_BASED_PAUSE_EXITING); + + /* INTR_WINDOW_EXITING and NMI_WINDOW_EXITING are toggled dynamically */ + exec_control &= ~(CPU_BASED_INTR_WINDOW_EXITING | + CPU_BASED_NMI_WINDOW_EXITING); + if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) exec_control &= ~CPU_BASED_MOV_DR_EXITING; - if (!cpu_need_tpr_shadow(&vmx->vcpu)) { + if (!cpu_need_tpr_shadow(&vmx->vcpu)) exec_control &= ~CPU_BASED_TPR_SHADOW; + #ifdef CONFIG_X86_64 + if (exec_control & CPU_BASED_TPR_SHADOW) + exec_control &= ~(CPU_BASED_CR8_LOAD_EXITING | + CPU_BASED_CR8_STORE_EXITING); + else exec_control |= CPU_BASED_CR8_STORE_EXITING | CPU_BASED_CR8_LOAD_EXITING; #endif - } - if (!enable_ept) - exec_control |= CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_INVLPG_EXITING; + /* No need to intercept CR3 access or INVPLG when using EPT. */ + if (enable_ept) + exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING | + CPU_BASED_INVLPG_EXITING); if (kvm_mwait_in_guest(vmx->vcpu.kvm)) exec_control &= ~(CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING); @@ -5155,8 +5145,10 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) * instruction. ICEBP generates a trap-like #DB, but * despite its interception control being tied to #DB, * is an instruction intercept, i.e. the VM-Exit occurs - * on the ICEBP itself. Note, skipping ICEBP also - * clears STI and MOVSS blocking. + * on the ICEBP itself. Use the inner "skip" helper to + * avoid single-step #DB and MTF updates, as ICEBP is + * higher priority. Note, skipping ICEBP still clears + * STI and MOVSS blocking. * * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS * if single-step is enabled in RFLAGS and STI or MOVSS @@ -5638,7 +5630,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - trace_kvm_page_fault(gpa, exit_qualification); + trace_kvm_page_fault(vcpu, gpa, exit_qualification); /* Is it a read fault? */ error_code = (exit_qualification & EPT_VIOLATION_ACC_READ) @@ -5710,7 +5702,7 @@ static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); return vmx->emulation_required && !vmx->rmode.vm86_active && - (vcpu->arch.exception.pending || vcpu->arch.exception.injected); + (kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected); } static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) @@ -7430,7 +7422,7 @@ static int __init vmx_check_processor_compat(void) if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) return -EIO; if (nested) - nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept); + nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept); if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", smp_processor_id()); @@ -8070,7 +8062,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .patch_hypercall = vmx_patch_hypercall, .inject_irq = vmx_inject_irq, .inject_nmi = vmx_inject_nmi, - .queue_exception = vmx_queue_exception, + .inject_exception = vmx_inject_exception, .cancel_injection = vmx_cancel_injection, .interrupt_allowed = vmx_interrupt_allowed, .nmi_allowed = vmx_nmi_allowed, @@ -8227,6 +8219,10 @@ static __init int hardware_setup(void) if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) return -EIO; + if (cpu_has_perf_global_ctrl_bug()) + pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " + "does not work properly. Using workaround\n"); + if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); @@ -8341,11 +8337,9 @@ static __init int hardware_setup(void) if (enable_preemption_timer) { u64 use_timer_freq = 5000ULL * 1000 * 1000; - u64 vmx_msr; - rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); cpu_preemption_timer_multi = - vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; + vmcs_config.misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; if (tsc_khz) use_timer_freq = (u64)tsc_khz * 1000; @@ -8381,8 +8375,7 @@ static __init int hardware_setup(void) setup_default_sgx_lepubkeyhash(); if (nested) { - nested_vmx_setup_ctls_msrs(&vmcs_config.nested, - vmx_capability.ept); + nested_vmx_setup_ctls_msrs(&vmcs_config, vmx_capability.ept); r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); if (r) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 24d58c2ffaa3..a3da84f4ea45 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -477,29 +477,145 @@ static inline u8 vmx_get_rvi(void) return vmcs_read16(GUEST_INTR_STATUS) & 0xff; } -#define BUILD_CONTROLS_SHADOW(lname, uname, bits) \ -static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \ -{ \ - if (vmx->loaded_vmcs->controls_shadow.lname != val) { \ - vmcs_write##bits(uname, val); \ - vmx->loaded_vmcs->controls_shadow.lname = val; \ - } \ -} \ -static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs) \ -{ \ - return vmcs->controls_shadow.lname; \ -} \ -static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx) \ -{ \ - return __##lname##_controls_get(vmx->loaded_vmcs); \ -} \ -static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val) \ -{ \ - lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \ -} \ -static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val) \ -{ \ - lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ +#define __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \ + (VM_ENTRY_LOAD_DEBUG_CONTROLS) +#ifdef CONFIG_X86_64 + #define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \ + (__KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS | \ + VM_ENTRY_IA32E_MODE) +#else + #define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \ + __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS +#endif +#define KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS \ + (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_ENTRY_LOAD_IA32_PAT | \ + VM_ENTRY_LOAD_IA32_EFER | \ + VM_ENTRY_LOAD_BNDCFGS | \ + VM_ENTRY_PT_CONCEAL_PIP | \ + VM_ENTRY_LOAD_IA32_RTIT_CTL) + +#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ + (VM_EXIT_SAVE_DEBUG_CONTROLS | \ + VM_EXIT_ACK_INTR_ON_EXIT) +#ifdef CONFIG_X86_64 + #define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ + (__KVM_REQUIRED_VMX_VM_EXIT_CONTROLS | \ + VM_EXIT_HOST_ADDR_SPACE_SIZE) +#else + #define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ + __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS +#endif +#define KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS \ + (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_EXIT_SAVE_IA32_PAT | \ + VM_EXIT_LOAD_IA32_PAT | \ + VM_EXIT_SAVE_IA32_EFER | \ + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | \ + VM_EXIT_LOAD_IA32_EFER | \ + VM_EXIT_CLEAR_BNDCFGS | \ + VM_EXIT_PT_CONCEAL_PIP | \ + VM_EXIT_CLEAR_IA32_RTIT_CTL) + +#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \ + (PIN_BASED_EXT_INTR_MASK | \ + PIN_BASED_NMI_EXITING) +#define KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL \ + (PIN_BASED_VIRTUAL_NMIS | \ + PIN_BASED_POSTED_INTR | \ + PIN_BASED_VMX_PREEMPTION_TIMER) + +#define __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \ + (CPU_BASED_HLT_EXITING | \ + CPU_BASED_CR3_LOAD_EXITING | \ + CPU_BASED_CR3_STORE_EXITING | \ + CPU_BASED_UNCOND_IO_EXITING | \ + CPU_BASED_MOV_DR_EXITING | \ + CPU_BASED_USE_TSC_OFFSETTING | \ + CPU_BASED_MWAIT_EXITING | \ + CPU_BASED_MONITOR_EXITING | \ + CPU_BASED_INVLPG_EXITING | \ + CPU_BASED_RDPMC_EXITING | \ + CPU_BASED_INTR_WINDOW_EXITING) + +#ifdef CONFIG_X86_64 + #define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \ + (__KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL | \ + CPU_BASED_CR8_LOAD_EXITING | \ + CPU_BASED_CR8_STORE_EXITING) +#else + #define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \ + __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL +#endif + +#define KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL \ + (CPU_BASED_RDTSC_EXITING | \ + CPU_BASED_TPR_SHADOW | \ + CPU_BASED_USE_IO_BITMAPS | \ + CPU_BASED_MONITOR_TRAP_FLAG | \ + CPU_BASED_USE_MSR_BITMAPS | \ + CPU_BASED_NMI_WINDOW_EXITING | \ + CPU_BASED_PAUSE_EXITING | \ + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | \ + CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) + +#define KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL 0 +#define KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL \ + (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \ + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | \ + SECONDARY_EXEC_WBINVD_EXITING | \ + SECONDARY_EXEC_ENABLE_VPID | \ + SECONDARY_EXEC_ENABLE_EPT | \ + SECONDARY_EXEC_UNRESTRICTED_GUEST | \ + SECONDARY_EXEC_PAUSE_LOOP_EXITING | \ + SECONDARY_EXEC_DESC | \ + SECONDARY_EXEC_ENABLE_RDTSCP | \ + SECONDARY_EXEC_ENABLE_INVPCID | \ + SECONDARY_EXEC_APIC_REGISTER_VIRT | \ + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \ + SECONDARY_EXEC_SHADOW_VMCS | \ + SECONDARY_EXEC_XSAVES | \ + SECONDARY_EXEC_RDSEED_EXITING | \ + SECONDARY_EXEC_RDRAND_EXITING | \ + SECONDARY_EXEC_ENABLE_PML | \ + SECONDARY_EXEC_TSC_SCALING | \ + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | \ + SECONDARY_EXEC_PT_USE_GPA | \ + SECONDARY_EXEC_PT_CONCEAL_VMX | \ + SECONDARY_EXEC_ENABLE_VMFUNC | \ + SECONDARY_EXEC_BUS_LOCK_DETECTION | \ + SECONDARY_EXEC_NOTIFY_VM_EXITING | \ + SECONDARY_EXEC_ENCLS_EXITING) + +#define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0 +#define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \ + (TERTIARY_EXEC_IPI_VIRT) + +#define BUILD_CONTROLS_SHADOW(lname, uname, bits) \ +static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \ +{ \ + if (vmx->loaded_vmcs->controls_shadow.lname != val) { \ + vmcs_write##bits(uname, val); \ + vmx->loaded_vmcs->controls_shadow.lname = val; \ + } \ +} \ +static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs) \ +{ \ + return vmcs->controls_shadow.lname; \ +} \ +static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx) \ +{ \ + return __##lname##_controls_get(vmx->loaded_vmcs); \ +} \ +static __always_inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val) \ +{ \ + BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \ + lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \ +} \ +static __always_inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val) \ +{ \ + BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \ + lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ } BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32) BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32) @@ -626,4 +742,14 @@ static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu) return lapic_in_kernel(vcpu) && enable_ipiv; } +static inline bool guest_cpuid_has_evmcs(struct kvm_vcpu *vcpu) +{ + /* + * eVMCS is exposed to the guest if Hyper-V is enabled in CPUID and + * eVMCS has been explicitly enabled by userspace. + */ + return vcpu->arch.hyperv_enabled && + to_vmx(vcpu)->nested.enlightened_vmcs_enabled; +} + #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 5cfc49ddb1b4..ec268df83ed6 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -10,7 +10,7 @@ #include "vmcs.h" #include "../x86.h" -asmlinkage void vmread_error(unsigned long field, bool fault); +void vmread_error(unsigned long field, bool fault); __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, bool fault); void vmwrite_error(unsigned long field, unsigned long value); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b0c47b41c264..4bd5f8a751de 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -173,8 +173,13 @@ bool __read_mostly enable_vmware_backdoor = false; module_param(enable_vmware_backdoor, bool, S_IRUGO); EXPORT_SYMBOL_GPL(enable_vmware_backdoor); -static bool __read_mostly force_emulation_prefix = false; -module_param(force_emulation_prefix, bool, S_IRUGO); +/* + * Flags to manipulate forced emulation behavior (any non-zero value will + * enable forced emulation). + */ +#define KVM_FEP_CLEAR_RFLAGS_RF BIT(1) +static int __read_mostly force_emulation_prefix; +module_param(force_emulation_prefix, int, 0644); int __read_mostly pi_inject_timer = -1; module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR); @@ -528,6 +533,7 @@ static int exception_class(int vector) #define EXCPT_TRAP 1 #define EXCPT_ABORT 2 #define EXCPT_INTERRUPT 3 +#define EXCPT_DB 4 static int exception_type(int vector) { @@ -538,8 +544,14 @@ static int exception_type(int vector) mask = 1 << vector; - /* #DB is trap, as instruction watchpoints are handled elsewhere */ - if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR))) + /* + * #DBs can be trap-like or fault-like, the caller must check other CPU + * state, e.g. DR6, to determine whether a #DB is a trap or fault. + */ + if (mask & (1 << DB_VECTOR)) + return EXCPT_DB; + + if (mask & ((1 << BP_VECTOR) | (1 << OF_VECTOR))) return EXCPT_TRAP; if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR))) @@ -549,16 +561,13 @@ static int exception_type(int vector) return EXCPT_FAULT; } -void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) +void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu, + struct kvm_queued_exception *ex) { - unsigned nr = vcpu->arch.exception.nr; - bool has_payload = vcpu->arch.exception.has_payload; - unsigned long payload = vcpu->arch.exception.payload; - - if (!has_payload) + if (!ex->has_payload) return; - switch (nr) { + switch (ex->vector) { case DB_VECTOR: /* * "Certain debug exceptions may clear bit 0-3. The @@ -583,8 +592,8 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) * So they need to be flipped for DR6. */ vcpu->arch.dr6 |= DR6_ACTIVE_LOW; - vcpu->arch.dr6 |= payload; - vcpu->arch.dr6 ^= payload & DR6_ACTIVE_LOW; + vcpu->arch.dr6 |= ex->payload; + vcpu->arch.dr6 ^= ex->payload & DR6_ACTIVE_LOW; /* * The #DB payload is defined as compatible with the 'pending @@ -595,15 +604,30 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) vcpu->arch.dr6 &= ~BIT(12); break; case PF_VECTOR: - vcpu->arch.cr2 = payload; + vcpu->arch.cr2 = ex->payload; break; } - vcpu->arch.exception.has_payload = false; - vcpu->arch.exception.payload = 0; + ex->has_payload = false; + ex->payload = 0; } EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload); +static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vector, + bool has_error_code, u32 error_code, + bool has_payload, unsigned long payload) +{ + struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit; + + ex->vector = vector; + ex->injected = false; + ex->pending = true; + ex->has_error_code = has_error_code; + ex->error_code = error_code; + ex->has_payload = has_payload; + ex->payload = payload; +} + static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error, u32 error_code, bool has_payload, unsigned long payload, bool reinject) @@ -613,18 +637,31 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, kvm_make_request(KVM_REQ_EVENT, vcpu); + /* + * If the exception is destined for L2 and isn't being reinjected, + * morph it to a VM-Exit if L1 wants to intercept the exception. A + * previously injected exception is not checked because it was checked + * when it was original queued, and re-checking is incorrect if _L1_ + * injected the exception, in which case it's exempt from interception. + */ + if (!reinject && is_guest_mode(vcpu) && + kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, nr, error_code)) { + kvm_queue_exception_vmexit(vcpu, nr, has_error, error_code, + has_payload, payload); + return; + } + if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) { queue: if (reinject) { /* - * On vmentry, vcpu->arch.exception.pending is only - * true if an event injection was blocked by - * nested_run_pending. In that case, however, - * vcpu_enter_guest requests an immediate exit, - * and the guest shouldn't proceed far enough to - * need reinjection. + * On VM-Entry, an exception can be pending if and only + * if event injection was blocked by nested_run_pending. + * In that case, however, vcpu_enter_guest() requests an + * immediate exit, and the guest shouldn't proceed far + * enough to need reinjection. */ - WARN_ON_ONCE(vcpu->arch.exception.pending); + WARN_ON_ONCE(kvm_is_exception_pending(vcpu)); vcpu->arch.exception.injected = true; if (WARN_ON_ONCE(has_payload)) { /* @@ -639,17 +676,18 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, vcpu->arch.exception.injected = false; } vcpu->arch.exception.has_error_code = has_error; - vcpu->arch.exception.nr = nr; + vcpu->arch.exception.vector = nr; vcpu->arch.exception.error_code = error_code; vcpu->arch.exception.has_payload = has_payload; vcpu->arch.exception.payload = payload; if (!is_guest_mode(vcpu)) - kvm_deliver_exception_payload(vcpu); + kvm_deliver_exception_payload(vcpu, + &vcpu->arch.exception); return; } /* to check exception */ - prev_nr = vcpu->arch.exception.nr; + prev_nr = vcpu->arch.exception.vector; if (prev_nr == DF_VECTOR) { /* triple fault -> shutdown */ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); @@ -657,25 +695,22 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, } class1 = exception_class(prev_nr); class2 = exception_class(nr); - if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) - || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { + if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) || + (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { /* - * Generate double fault per SDM Table 5-5. Set - * exception.pending = true so that the double fault - * can trigger a nested vmexit. + * Synthesize #DF. Clear the previously injected or pending + * exception so as not to incorrectly trigger shutdown. */ - vcpu->arch.exception.pending = true; vcpu->arch.exception.injected = false; - vcpu->arch.exception.has_error_code = true; - vcpu->arch.exception.nr = DF_VECTOR; - vcpu->arch.exception.error_code = 0; - vcpu->arch.exception.has_payload = false; - vcpu->arch.exception.payload = 0; - } else + vcpu->arch.exception.pending = false; + + kvm_queue_exception_e(vcpu, DF_VECTOR, 0); + } else { /* replace previous exception with a new one in a hope that instruction re-execution will regenerate lost exception */ goto queue; + } } void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) @@ -729,20 +764,22 @@ static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err) void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { ++vcpu->stat.pf_guest; - vcpu->arch.exception.nested_apf = - is_guest_mode(vcpu) && fault->async_page_fault; - if (vcpu->arch.exception.nested_apf) { - vcpu->arch.apf.nested_apf_token = fault->address; - kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); - } else { + + /* + * Async #PF in L2 is always forwarded to L1 as a VM-Exit regardless of + * whether or not L1 wants to intercept "regular" #PF. + */ + if (is_guest_mode(vcpu) && fault->async_page_fault) + kvm_queue_exception_vmexit(vcpu, PF_VECTOR, + true, fault->error_code, + true, fault->address); + else kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code, fault->address); - } } EXPORT_SYMBOL_GPL(kvm_inject_page_fault); -/* Returns true if the page fault was immediately morphed into a VM-Exit. */ -bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, +void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { struct kvm_mmu *fault_mmu; @@ -760,26 +797,7 @@ bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address, fault_mmu->root.hpa); - /* - * A workaround for KVM's bad exception handling. If KVM injected an - * exception into L2, and L2 encountered a #PF while vectoring the - * injected exception, manually check to see if L1 wants to intercept - * #PF, otherwise queuing the #PF will lead to #DF or a lost exception. - * In all other cases, defer the check to nested_ops->check_events(), - * which will correctly handle priority (this does not). Note, other - * exceptions, e.g. #GP, are theoretically affected, #PF is simply the - * most problematic, e.g. when L0 and L1 are both intercepting #PF for - * shadow paging. - * - * TODO: Rewrite exception handling to track injected and pending - * (VM-Exit) exceptions separately. - */ - if (unlikely(vcpu->arch.exception.injected && is_guest_mode(vcpu)) && - kvm_x86_ops.nested_ops->handle_page_fault_workaround(vcpu, fault)) - return true; - fault_mmu->inject_page_fault(vcpu, fault); - return false; } EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault); @@ -4841,7 +4859,7 @@ static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) return (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_accept_dm_intr(vcpu) && !kvm_event_needs_reinjection(vcpu) && - !vcpu->arch.exception.pending); + !kvm_is_exception_pending(vcpu)); } static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, @@ -5016,25 +5034,38 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { + struct kvm_queued_exception *ex; + process_nmi(vcpu); if (kvm_check_request(KVM_REQ_SMI, vcpu)) process_smi(vcpu); /* - * In guest mode, payload delivery should be deferred, - * so that the L1 hypervisor can intercept #PF before - * CR2 is modified (or intercept #DB before DR6 is - * modified under nVMX). Unless the per-VM capability, - * KVM_CAP_EXCEPTION_PAYLOAD, is set, we may not defer the delivery of - * an exception payload and handle after a KVM_GET_VCPU_EVENTS. Since we - * opportunistically defer the exception payload, deliver it if the - * capability hasn't been requested before processing a - * KVM_GET_VCPU_EVENTS. + * KVM's ABI only allows for one exception to be migrated. Luckily, + * the only time there can be two queued exceptions is if there's a + * non-exiting _injected_ exception, and a pending exiting exception. + * In that case, ignore the VM-Exiting exception as it's an extension + * of the injected exception. + */ + if (vcpu->arch.exception_vmexit.pending && + !vcpu->arch.exception.pending && + !vcpu->arch.exception.injected) + ex = &vcpu->arch.exception_vmexit; + else + ex = &vcpu->arch.exception; + + /* + * In guest mode, payload delivery should be deferred if the exception + * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1 + * intercepts #PF, ditto for DR6 and #DBs. If the per-VM capability, + * KVM_CAP_EXCEPTION_PAYLOAD, is not set, userspace may or may not + * propagate the payload and so it cannot be safely deferred. Deliver + * the payload if the capability hasn't been requested. */ if (!vcpu->kvm->arch.exception_payload_enabled && - vcpu->arch.exception.pending && vcpu->arch.exception.has_payload) - kvm_deliver_exception_payload(vcpu); + ex->pending && ex->has_payload) + kvm_deliver_exception_payload(vcpu, ex); /* * The API doesn't provide the instruction length for software @@ -5042,26 +5073,25 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, * isn't advanced, we should expect to encounter the exception * again. */ - if (kvm_exception_is_soft(vcpu->arch.exception.nr)) { + if (kvm_exception_is_soft(ex->vector)) { events->exception.injected = 0; events->exception.pending = 0; } else { - events->exception.injected = vcpu->arch.exception.injected; - events->exception.pending = vcpu->arch.exception.pending; + events->exception.injected = ex->injected; + events->exception.pending = ex->pending; /* * For ABI compatibility, deliberately conflate * pending and injected exceptions when * KVM_CAP_EXCEPTION_PAYLOAD isn't enabled. */ if (!vcpu->kvm->arch.exception_payload_enabled) - events->exception.injected |= - vcpu->arch.exception.pending; + events->exception.injected |= ex->pending; } - events->exception.nr = vcpu->arch.exception.nr; - events->exception.has_error_code = vcpu->arch.exception.has_error_code; - events->exception.error_code = vcpu->arch.exception.error_code; - events->exception_has_payload = vcpu->arch.exception.has_payload; - events->exception_payload = vcpu->arch.exception.payload; + events->exception.nr = ex->vector; + events->exception.has_error_code = ex->has_error_code; + events->exception.error_code = ex->error_code; + events->exception_has_payload = ex->has_payload; + events->exception_payload = ex->payload; events->interrupt.injected = vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; @@ -5131,9 +5161,22 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return -EINVAL; process_nmi(vcpu); + + /* + * Flag that userspace is stuffing an exception, the next KVM_RUN will + * morph the exception to a VM-Exit if appropriate. Do this only for + * pending exceptions, already-injected exceptions are not subject to + * intercpetion. Note, userspace that conflates pending and injected + * is hosed, and will incorrectly convert an injected exception into a + * pending exception, which in turn may cause a spurious VM-Exit. + */ + vcpu->arch.exception_from_userspace = events->exception.pending; + + vcpu->arch.exception_vmexit.pending = false; + vcpu->arch.exception.injected = events->exception.injected; vcpu->arch.exception.pending = events->exception.pending; - vcpu->arch.exception.nr = events->exception.nr; + vcpu->arch.exception.vector = events->exception.nr; vcpu->arch.exception.has_error_code = events->exception.has_error_code; vcpu->arch.exception.error_code = events->exception.error_code; vcpu->arch.exception.has_payload = events->exception_has_payload; @@ -7257,6 +7300,7 @@ static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type, int handle_ud(struct kvm_vcpu *vcpu) { static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX }; + int fep_flags = READ_ONCE(force_emulation_prefix); int emul_type = EMULTYPE_TRAP_UD; char sig[5]; /* ud2; .ascii "kvm" */ struct x86_exception e; @@ -7264,10 +7308,12 @@ int handle_ud(struct kvm_vcpu *vcpu) if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0))) return 1; - if (force_emulation_prefix && + if (fep_flags && kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e) == 0 && memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) { + if (fep_flags & KVM_FEP_CLEAR_RFLAGS_RF) + kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) & ~X86_EFLAGS_RF); kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig)); emul_type = EMULTYPE_TRAP_UD_FORCED; } @@ -7933,14 +7979,20 @@ static int emulator_get_msr_with_filter(struct x86_emulate_ctxt *ctxt, int r; r = kvm_get_msr_with_filter(vcpu, msr_index, pdata); + if (r < 0) + return X86EMUL_UNHANDLEABLE; - if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_RDMSR, 0, - complete_emulated_rdmsr, r)) { - /* Bounce to user space */ - return X86EMUL_IO_NEEDED; + if (r) { + if (kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_RDMSR, 0, + complete_emulated_rdmsr, r)) + return X86EMUL_IO_NEEDED; + + trace_kvm_msr_read_ex(msr_index); + return X86EMUL_PROPAGATE_FAULT; } - return r; + trace_kvm_msr_read(msr_index, *pdata); + return X86EMUL_CONTINUE; } static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt, @@ -7950,14 +8002,20 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt, int r; r = kvm_set_msr_with_filter(vcpu, msr_index, data); + if (r < 0) + return X86EMUL_UNHANDLEABLE; - if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_WRMSR, data, - complete_emulated_msr_access, r)) { - /* Bounce to user space */ - return X86EMUL_IO_NEEDED; + if (r) { + if (kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_WRMSR, data, + complete_emulated_msr_access, r)) + return X86EMUL_IO_NEEDED; + + trace_kvm_msr_write_ex(msr_index, data); + return X86EMUL_PROPAGATE_FAULT; } - return r; + trace_kvm_msr_write(msr_index, data); + return X86EMUL_CONTINUE; } static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, @@ -8161,18 +8219,17 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) } } -static bool inject_emulated_exception(struct kvm_vcpu *vcpu) +static void inject_emulated_exception(struct kvm_vcpu *vcpu) { struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; - if (ctxt->exception.vector == PF_VECTOR) - return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception); - if (ctxt->exception.error_code_valid) + if (ctxt->exception.vector == PF_VECTOR) + kvm_inject_emulated_page_fault(vcpu, &ctxt->exception); + else if (ctxt->exception.error_code_valid) kvm_queue_exception_e(vcpu, ctxt->exception.vector, ctxt->exception.error_code); else kvm_queue_exception(vcpu, ctxt->exception.vector); - return false; } static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu) @@ -8548,8 +8605,46 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); -static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r) +static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu) { + u32 shadow; + + if (kvm_get_rflags(vcpu) & X86_EFLAGS_RF) + return true; + + /* + * Intel CPUs inhibit code #DBs when MOV/POP SS blocking is active, + * but AMD CPUs do not. MOV/POP SS blocking is rare, check that first + * to avoid the relatively expensive CPUID lookup. + */ + shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu); + return (shadow & KVM_X86_SHADOW_INT_MOV_SS) && + guest_cpuid_is_intel(vcpu); +} + +static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, + int emulation_type, int *r) +{ + WARN_ON_ONCE(emulation_type & EMULTYPE_NO_DECODE); + + /* + * Do not check for code breakpoints if hardware has already done the + * checks, as inferred from the emulation type. On NO_DECODE and SKIP, + * the instruction has passed all exception checks, and all intercepted + * exceptions that trigger emulation have lower priority than code + * breakpoints, i.e. the fact that the intercepted exception occurred + * means any code breakpoints have already been serviced. + * + * Note, KVM needs to check for code #DBs on EMULTYPE_TRAP_UD_FORCED as + * hardware has checked the RIP of the magic prefix, but not the RIP of + * the instruction being emulated. The intent of forced emulation is + * to behave as if KVM intercepted the instruction without an exception + * and without a prefix. + */ + if (emulation_type & (EMULTYPE_NO_DECODE | EMULTYPE_SKIP | + EMULTYPE_TRAP_UD | EMULTYPE_VMWARE_GP | EMULTYPE_PF)) + return false; + if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { struct kvm_run *kvm_run = vcpu->run; @@ -8569,7 +8664,7 @@ static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r) } if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) && - !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) { + !kvm_is_code_breakpoint_inhibited(vcpu)) { unsigned long eip = kvm_get_linear_rip(vcpu); u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0, vcpu->arch.dr7, @@ -8671,8 +8766,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, * are fault-like and are higher priority than any faults on * the code fetch itself. */ - if (!(emulation_type & EMULTYPE_SKIP) && - kvm_vcpu_check_code_breakpoint(vcpu, &r)) + if (kvm_vcpu_check_code_breakpoint(vcpu, emulation_type, &r)) return r; r = x86_decode_emulated_instruction(vcpu, emulation_type, @@ -8770,8 +8864,7 @@ restart: if (ctxt->have_exception) { r = 1; - if (inject_emulated_exception(vcpu)) - return r; + inject_emulated_exception(vcpu); } else if (vcpu->arch.pio.count) { if (!vcpu->arch.pio.in) { /* FIXME: return into emulator if single-stepping. */ @@ -8801,6 +8894,12 @@ writeback: unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu); toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + + /* + * Note, EXCPT_DB is assumed to be fault-like as the emulator + * only supports code breakpoints and general detect #DB, both + * of which are fault-like. + */ if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) { kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS); @@ -9662,74 +9761,155 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu) static void kvm_inject_exception(struct kvm_vcpu *vcpu) { - trace_kvm_inj_exception(vcpu->arch.exception.nr, + trace_kvm_inj_exception(vcpu->arch.exception.vector, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code, vcpu->arch.exception.injected); if (vcpu->arch.exception.error_code && !is_protmode(vcpu)) vcpu->arch.exception.error_code = false; - static_call(kvm_x86_queue_exception)(vcpu); + static_call(kvm_x86_inject_exception)(vcpu); } -static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) +/* + * Check for any event (interrupt or exception) that is ready to be injected, + * and if there is at least one event, inject the event with the highest + * priority. This handles both "pending" events, i.e. events that have never + * been injected into the guest, and "injected" events, i.e. events that were + * injected as part of a previous VM-Enter, but weren't successfully delivered + * and need to be re-injected. + * + * Note, this is not guaranteed to be invoked on a guest instruction boundary, + * i.e. doesn't guarantee that there's an event window in the guest. KVM must + * be able to inject exceptions in the "middle" of an instruction, and so must + * also be able to re-inject NMIs and IRQs in the middle of an instruction. + * I.e. for exceptions and re-injected events, NOT invoking this on instruction + * boundaries is necessary and correct. + * + * For simplicity, KVM uses a single path to inject all events (except events + * that are injected directly from L1 to L2) and doesn't explicitly track + * instruction boundaries for asynchronous events. However, because VM-Exits + * that can occur during instruction execution typically result in KVM skipping + * the instruction or injecting an exception, e.g. instruction and exception + * intercepts, and because pending exceptions have higher priority than pending + * interrupts, KVM still honors instruction boundaries in most scenarios. + * + * But, if a VM-Exit occurs during instruction execution, and KVM does NOT skip + * the instruction or inject an exception, then KVM can incorrecty inject a new + * asynchrounous event if the event became pending after the CPU fetched the + * instruction (in the guest). E.g. if a page fault (#PF, #NPF, EPT violation) + * occurs and is resolved by KVM, a coincident NMI, SMI, IRQ, etc... can be + * injected on the restarted instruction instead of being deferred until the + * instruction completes. + * + * In practice, this virtualization hole is unlikely to be observed by the + * guest, and even less likely to cause functional problems. To detect the + * hole, the guest would have to trigger an event on a side effect of an early + * phase of instruction execution, e.g. on the instruction fetch from memory. + * And for it to be a functional problem, the guest would need to depend on the + * ordering between that side effect, the instruction completing, _and_ the + * delivery of the asynchronous event. + */ +static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, + bool *req_immediate_exit) { + bool can_inject; int r; - bool can_inject = true; - /* try to reinject previous events if any */ + /* + * Process nested events first, as nested VM-Exit supercedes event + * re-injection. If there's an event queued for re-injection, it will + * be saved into the appropriate vmc{b,s}12 fields on nested VM-Exit. + */ + if (is_guest_mode(vcpu)) + r = kvm_check_nested_events(vcpu); + else + r = 0; - if (vcpu->arch.exception.injected) { - kvm_inject_exception(vcpu); - can_inject = false; - } /* - * Do not inject an NMI or interrupt if there is a pending - * exception. Exceptions and interrupts are recognized at - * instruction boundaries, i.e. the start of an instruction. - * Trap-like exceptions, e.g. #DB, have higher priority than - * NMIs and interrupts, i.e. traps are recognized before an - * NMI/interrupt that's pending on the same instruction. - * Fault-like exceptions, e.g. #GP and #PF, are the lowest - * priority, but are only generated (pended) during instruction - * execution, i.e. a pending fault-like exception means the - * fault occurred on the *previous* instruction and must be - * serviced prior to recognizing any new events in order to - * fully complete the previous instruction. + * Re-inject exceptions and events *especially* if immediate entry+exit + * to/from L2 is needed, as any event that has already been injected + * into L2 needs to complete its lifecycle before injecting a new event. + * + * Don't re-inject an NMI or interrupt if there is a pending exception. + * This collision arises if an exception occurred while vectoring the + * injected event, KVM intercepted said exception, and KVM ultimately + * determined the fault belongs to the guest and queues the exception + * for injection back into the guest. + * + * "Injected" interrupts can also collide with pending exceptions if + * userspace ignores the "ready for injection" flag and blindly queues + * an interrupt. In that case, prioritizing the exception is correct, + * as the exception "occurred" before the exit to userspace. Trap-like + * exceptions, e.g. most #DBs, have higher priority than interrupts. + * And while fault-like exceptions, e.g. #GP and #PF, are the lowest + * priority, they're only generated (pended) during instruction + * execution, and interrupts are recognized at instruction boundaries. + * Thus a pending fault-like exception means the fault occurred on the + * *previous* instruction and must be serviced prior to recognizing any + * new events in order to fully complete the previous instruction. */ - else if (!vcpu->arch.exception.pending) { - if (vcpu->arch.nmi_injected) { - static_call(kvm_x86_inject_nmi)(vcpu); - can_inject = false; - } else if (vcpu->arch.interrupt.injected) { - static_call(kvm_x86_inject_irq)(vcpu, true); - can_inject = false; - } - } + if (vcpu->arch.exception.injected) + kvm_inject_exception(vcpu); + else if (kvm_is_exception_pending(vcpu)) + ; /* see above */ + else if (vcpu->arch.nmi_injected) + static_call(kvm_x86_inject_nmi)(vcpu); + else if (vcpu->arch.interrupt.injected) + static_call(kvm_x86_inject_irq)(vcpu, true); + /* + * Exceptions that morph to VM-Exits are handled above, and pending + * exceptions on top of injected exceptions that do not VM-Exit should + * either morph to #DF or, sadly, override the injected exception. + */ WARN_ON_ONCE(vcpu->arch.exception.injected && vcpu->arch.exception.pending); /* - * Call check_nested_events() even if we reinjected a previous event - * in order for caller to determine if it should require immediate-exit - * from L2 to L1 due to pending L1 events which require exit - * from L2 to L1. + * Bail if immediate entry+exit to/from the guest is needed to complete + * nested VM-Enter or event re-injection so that a different pending + * event can be serviced (or if KVM needs to exit to userspace). + * + * Otherwise, continue processing events even if VM-Exit occurred. The + * VM-Exit will have cleared exceptions that were meant for L2, but + * there may now be events that can be injected into L1. */ - if (is_guest_mode(vcpu)) { - r = kvm_check_nested_events(vcpu); - if (r < 0) - goto out; - } + if (r < 0) + goto out; + + /* + * A pending exception VM-Exit should either result in nested VM-Exit + * or force an immediate re-entry and exit to/from L2, and exception + * VM-Exits cannot be injected (flag should _never_ be set). + */ + WARN_ON_ONCE(vcpu->arch.exception_vmexit.injected || + vcpu->arch.exception_vmexit.pending); + + /* + * New events, other than exceptions, cannot be injected if KVM needs + * to re-inject a previous event. See above comments on re-injecting + * for why pending exceptions get priority. + */ + can_inject = !kvm_event_needs_reinjection(vcpu); - /* try to inject new event if pending */ if (vcpu->arch.exception.pending) { - if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) + /* + * Fault-class exceptions, except #DBs, set RF=1 in the RFLAGS + * value pushed on the stack. Trap-like exception and all #DBs + * leave RF as-is (KVM follows Intel's behavior in this regard; + * AMD states that code breakpoint #DBs excplitly clear RF=0). + * + * Note, most versions of Intel's SDM and AMD's APM incorrectly + * describe the behavior of General Detect #DBs, which are + * fault-like. They do _not_ set RF, a la code breakpoints. + */ + if (exception_type(vcpu->arch.exception.vector) == EXCPT_FAULT) __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | X86_EFLAGS_RF); - if (vcpu->arch.exception.nr == DB_VECTOR) { - kvm_deliver_exception_payload(vcpu); + if (vcpu->arch.exception.vector == DB_VECTOR) { + kvm_deliver_exception_payload(vcpu, &vcpu->arch.exception); if (vcpu->arch.dr7 & DR7_GD) { vcpu->arch.dr7 &= ~DR7_GD; kvm_update_dr7(vcpu); @@ -9801,11 +9981,11 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) } if (is_guest_mode(vcpu) && - kvm_x86_ops.nested_ops->hv_timer_pending && - kvm_x86_ops.nested_ops->hv_timer_pending(vcpu)) + kvm_x86_ops.nested_ops->has_events && + kvm_x86_ops.nested_ops->has_events(vcpu)) *req_immediate_exit = true; - WARN_ON(vcpu->arch.exception.pending); + WARN_ON(kvm_is_exception_pending(vcpu)); return 0; out: @@ -10110,7 +10290,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) * When APICv gets disabled, we may still have injected interrupts * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was * still active when the interrupt got accepted. Make sure - * inject_pending_event() is called to check for that. + * kvm_check_and_inject_events() is called to check for that. */ if (!apic->apicv_active) kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -10407,7 +10587,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto out; } - r = inject_pending_event(vcpu, &req_immediate_exit); + r = kvm_check_and_inject_events(vcpu, &req_immediate_exit); if (r < 0) { r = 0; goto out; @@ -10646,10 +10826,26 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu) if (hv_timer) kvm_lapic_switch_to_hv_timer(vcpu); - if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) + /* + * If the vCPU is not runnable, a signal or another host event + * of some kind is pending; service it without changing the + * vCPU's activity state. + */ + if (!kvm_arch_vcpu_runnable(vcpu)) return 1; } + /* + * Evaluate nested events before exiting the halted state. This allows + * the halt state to be recorded properly in the VMCS12's activity + * state field (AMD does not have a similar field and a VM-Exit always + * causes a spurious wakeup from HLT). + */ + if (is_guest_mode(vcpu)) { + if (kvm_check_nested_events(vcpu) < 0) + return 0; + } + if (kvm_apic_accept_events(vcpu) < 0) return 0; switch(vcpu->arch.mp_state) { @@ -10673,9 +10869,6 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) - kvm_check_nested_events(vcpu); - return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted); } @@ -10824,6 +11017,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { + struct kvm_queued_exception *ex = &vcpu->arch.exception; struct kvm_run *kvm_run = vcpu->run; int r; @@ -10852,7 +11046,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) r = 0; goto out; } - kvm_clear_request(KVM_REQ_UNHALT, vcpu); r = -EAGAIN; if (signal_pending(current)) { r = -EINTR; @@ -10882,6 +11075,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) } } + /* + * If userspace set a pending exception and L2 is active, convert it to + * a pending VM-Exit if L1 wants to intercept the exception. + */ + if (vcpu->arch.exception_from_userspace && is_guest_mode(vcpu) && + kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, ex->vector, + ex->error_code)) { + kvm_queue_exception_vmexit(vcpu, ex->vector, + ex->has_error_code, ex->error_code, + ex->has_payload, ex->payload); + ex->injected = false; + ex->pending = false; + } + vcpu->arch.exception_from_userspace = false; + if (unlikely(vcpu->arch.complete_userspace_io)) { int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; vcpu->arch.complete_userspace_io = NULL; @@ -10988,6 +11196,7 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED); vcpu->arch.exception.pending = false; + vcpu->arch.exception_vmexit.pending = false; kvm_make_request(KVM_REQ_EVENT, vcpu); } @@ -11125,11 +11334,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, } /* - * KVM_MP_STATE_INIT_RECEIVED means the processor is in - * INIT state; latched init should be reported using - * KVM_SET_VCPU_EVENTS, so reject it here. + * Pending INITs are reported using KVM_SET_VCPU_EVENTS, disallow + * forcing the guest into INIT/SIPI if those events are supposed to be + * blocked. KVM prioritizes SMI over INIT, so reject INIT/SIPI state + * if an SMI is pending as well. */ - if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) && + if ((!kvm_apic_init_sipi_allowed(vcpu) || vcpu->arch.smi_pending) && (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED || mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED)) goto out; @@ -11368,7 +11578,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { r = -EBUSY; - if (vcpu->arch.exception.pending) + if (kvm_is_exception_pending(vcpu)) goto out; if (dbg->control & KVM_GUESTDBG_INJECT_DB) kvm_queue_exception(vcpu, DB_VECTOR); @@ -11750,8 +11960,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; /* - * To avoid have the INIT path from kvm_apic_has_events() that be - * called with loaded FPU and does not let userspace fix the state. + * All paths that lead to INIT are required to load the guest's + * FPU state (because most paths are buried in KVM_RUN). */ if (init_event) kvm_put_guest_fpu(vcpu); @@ -12080,6 +12290,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_page_track; + ret = static_call(kvm_x86_vm_init)(kvm); + if (ret) + goto out_uninit_mmu; + INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); atomic_set(&kvm->arch.noncoherent_dma_count, 0); @@ -12115,8 +12329,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_hv_init_vm(kvm); kvm_xen_init_vm(kvm); - return static_call(kvm_x86_vm_init)(kvm); + return 0; +out_uninit_mmu: + kvm_mmu_uninit_vm(kvm); out_page_track: kvm_page_track_cleanup(kvm); out: @@ -12589,13 +12805,14 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) if (!list_empty_careful(&vcpu->async_pf.done)) return true; - if (kvm_apic_has_events(vcpu)) + if (kvm_apic_has_pending_init_or_sipi(vcpu) && + kvm_apic_init_sipi_allowed(vcpu)) return true; if (vcpu->arch.pv.pv_unhalted) return true; - if (vcpu->arch.exception.pending) + if (kvm_is_exception_pending(vcpu)) return true; if (kvm_test_request(KVM_REQ_NMI, vcpu) || @@ -12617,16 +12834,13 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) return true; if (is_guest_mode(vcpu) && - kvm_x86_ops.nested_ops->hv_timer_pending && - kvm_x86_ops.nested_ops->hv_timer_pending(vcpu)) + kvm_x86_ops.nested_ops->has_events && + kvm_x86_ops.nested_ops->has_events(vcpu)) return true; if (kvm_xen_has_pending_events(vcpu)) return true; - if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) - return true; - return false; } @@ -12850,7 +13064,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) { if (unlikely(!lapic_in_kernel(vcpu) || kvm_event_needs_reinjection(vcpu) || - vcpu->arch.exception.pending)) + kvm_is_exception_pending(vcpu))) return false; if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu)) @@ -13401,7 +13615,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); -EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 1926d2cb8e79..829d3134c1eb 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -82,10 +82,18 @@ static inline unsigned int __shrink_ple_window(unsigned int val, void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); int kvm_check_nested_events(struct kvm_vcpu *vcpu); +static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.exception.pending || + vcpu->arch.exception_vmexit.pending || + kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu); +} + static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) { vcpu->arch.exception.pending = false; vcpu->arch.exception.injected = false; + vcpu->arch.exception_vmexit.pending = false; } static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, @@ -267,11 +275,6 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) return !(kvm->arch.disabled_quirks & quirk); } -static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu) -{ - return is_smm(vcpu) || static_call(kvm_x86_apic_init_signal_blocked)(vcpu); -} - void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); u64 get_kvmclock_ns(struct kvm *kvm); @@ -286,7 +289,8 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, int handle_ud(struct kvm_vcpu *vcpu); -void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu); +void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu, + struct kvm_queued_exception *ex); void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu); u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 280cb5dc7341..93c628d3e3a9 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1065,7 +1065,6 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, del_timer(&vcpu->arch.xen.poll_timer); vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - kvm_clear_request(KVM_REQ_UNHALT, vcpu); } vcpu->arch.xen.poll_evtchn = 0; diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index f76747862bd2..7ba5f61d7273 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -65,7 +65,9 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y) endif else obj-y += iomap_copy_64.o +ifneq ($(CONFIG_GENERIC_CSUM),y) lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o +endif lib-y += clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c index 3e2f33fc33de..e0411a3774d4 100644 --- a/arch/x86/lib/iomem.c +++ b/arch/x86/lib/iomem.c @@ -1,6 +1,7 @@ #include <linux/string.h> #include <linux/module.h> #include <linux/io.h> +#include <linux/kmsan-checks.h> #define movs(type,to,from) \ asm volatile("movs" type:"=&D" (to), "=&S" (from):"0" (to), "1" (from):"memory") @@ -37,6 +38,8 @@ static void string_memcpy_fromio(void *to, const volatile void __iomem *from, si n-=2; } rep_movs(to, (const void *)from, n); + /* KMSAN must treat values read from devices as initialized. */ + kmsan_unpoison_memory(to, n); } static void string_memcpy_toio(volatile void __iomem *to, const void *from, size_t n) @@ -44,6 +47,8 @@ static void string_memcpy_toio(volatile void __iomem *to, const void *from, size if (unlikely(!n)) return; + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(from, n); /* Align any unaligned destination IO */ if (unlikely(1 & (unsigned long)to)) { movs("b", to, from); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 829c1409ffbd..c80febc44cd2 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -14,6 +14,8 @@ KASAN_SANITIZE_pgprot.o := n # Disable KCSAN entirely, because otherwise we get warnings that some functions # reference __initdata sections. KCSAN_SANITIZE := n +# Avoid recursion by not calling KMSAN hooks for CEA code. +KMSAN_SANITIZE_cpu_entry_area.o := n ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_mem_encrypt.o = -pg @@ -44,6 +46,9 @@ obj-$(CONFIG_HIGHMEM) += highmem_32.o KASAN_SANITIZE_kasan_init_$(BITS).o := n obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o +KMSAN_SANITIZE_kmsan_shadow.o := n +obj-$(CONFIG_KMSAN) += kmsan_shadow.o + obj-$(CONFIG_MMIOTRACE) += mmiotrace.o mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a498ae1fbe66..7b0d4ab894c8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -260,7 +260,7 @@ static noinline int vmalloc_fault(unsigned long address) } NOKPROBE_SYMBOL(vmalloc_fault); -void arch_sync_kernel_mappings(unsigned long start, unsigned long end) +static void __arch_sync_kernel_mappings(unsigned long start, unsigned long end) { unsigned long addr; @@ -284,6 +284,27 @@ void arch_sync_kernel_mappings(unsigned long start, unsigned long end) } } +void arch_sync_kernel_mappings(unsigned long start, unsigned long end) +{ + __arch_sync_kernel_mappings(start, end); +#ifdef CONFIG_KMSAN + /* + * KMSAN maintains two additional metadata page mappings for the + * [VMALLOC_START, VMALLOC_END) range. These mappings start at + * KMSAN_VMALLOC_SHADOW_START and KMSAN_VMALLOC_ORIGIN_START and + * have to be synced together with the vmalloc memory mapping. + */ + if (start >= VMALLOC_START && end < VMALLOC_END) { + __arch_sync_kernel_mappings( + start - VMALLOC_START + KMSAN_VMALLOC_SHADOW_START, + end - VMALLOC_START + KMSAN_VMALLOC_SHADOW_START); + __arch_sync_kernel_mappings( + start - VMALLOC_START + KMSAN_VMALLOC_ORIGIN_START, + end - VMALLOC_START + KMSAN_VMALLOC_ORIGIN_START); + } +#endif +} + static bool low_pfn(unsigned long pfn) { return pfn < max_low_pfn; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 82a042c03824..9121bc1b9453 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1054,7 +1054,7 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) } #ifdef CONFIG_SWAP -unsigned long max_swapfile_size(void) +unsigned long arch_max_swapfile_size(void) { unsigned long pages; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0fe690ebc269..3f040c6e5d13 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -90,6 +90,12 @@ DEFINE_ENTRY(pud, pud, init) DEFINE_ENTRY(pmd, pmd, init) DEFINE_ENTRY(pte, pte, init) +static inline pgprot_t prot_sethuge(pgprot_t prot) +{ + WARN_ON_ONCE(pgprot_val(prot) & _PAGE_PAT); + + return __pgprot(pgprot_val(prot) | _PAGE_PSE); +} /* * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the @@ -557,9 +563,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, if (page_size_mask & (1<<PG_LEVEL_2M)) { pages++; spin_lock(&init_mm.page_table_lock); - set_pte_init((pte_t *)pmd, - pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT, - __pgprot(pgprot_val(prot) | _PAGE_PSE)), + set_pmd_init(pmd, + pfn_pmd(paddr >> PAGE_SHIFT, prot_sethuge(prot)), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; @@ -644,12 +649,8 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, if (page_size_mask & (1<<PG_LEVEL_1G)) { pages++; spin_lock(&init_mm.page_table_lock); - - prot = __pgprot(pgprot_val(prot) | _PAGE_PSE); - - set_pte_init((pte_t *)pud, - pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, - prot), + set_pud_init(pud, + pfn_pud(paddr >> PAGE_SHIFT, prot_sethuge(prot)), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; @@ -1287,7 +1288,7 @@ static void __init preallocate_vmalloc_pages(void) unsigned long addr; const char *lvl; - for (addr = VMALLOC_START; addr <= VMALLOC_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) { + for (addr = VMALLOC_START; addr <= VMEMORY_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) { pgd_t *pgd = pgd_offset_k(addr); p4d_t *p4d; pud_t *pud; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 1ad0228f8ceb..78c5bc654cff 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -17,6 +17,7 @@ #include <linux/cc_platform.h> #include <linux/efi.h> #include <linux/pgtable.h> +#include <linux/kmsan.h> #include <asm/set_memory.h> #include <asm/e820/api.h> @@ -479,6 +480,8 @@ void iounmap(volatile void __iomem *addr) return; } + kmsan_iounmap_page_range((unsigned long)addr, + (unsigned long)addr + get_vm_area_size(p)); memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p)); /* Finally remove it */ diff --git a/arch/x86/mm/kmsan_shadow.c b/arch/x86/mm/kmsan_shadow.c new file mode 100644 index 000000000000..bee2ec4a3bfa --- /dev/null +++ b/arch/x86/mm/kmsan_shadow.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * x86-specific bits of KMSAN shadow implementation. + * + * Copyright (C) 2022 Google LLC + * Author: Alexander Potapenko <glider@google.com> + */ + +#include <asm/cpu_entry_area.h> +#include <linux/percpu-defs.h> + +/* + * Addresses within the CPU entry area (including e.g. exception stacks) do not + * have struct page entries corresponding to them, so they need separate + * handling. + * arch_kmsan_get_meta_or_null() (declared in the header) maps the addresses in + * CPU entry area to addresses in cpu_entry_area_shadow/cpu_entry_area_origin. + */ +DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow); +DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin); diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c index 0612a73638a8..423b21e80929 100644 --- a/arch/x86/mm/pat/cpa-test.c +++ b/arch/x86/mm/pat/cpa-test.c @@ -136,10 +136,10 @@ static int pageattr_test(void) failed += print_split(&sa); for (i = 0; i < NTEST; i++) { - unsigned long pfn = prandom_u32() % max_pfn_mapped; + unsigned long pfn = prandom_u32_max(max_pfn_mapped); addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT); - len[i] = prandom_u32() % NPAGES; + len[i] = prandom_u32_max(NPAGES); len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1); if (len[i] == 0) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 1abd5438f126..97342c42dda8 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -580,6 +580,46 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, } /* + * Validate strict W^X semantics. + */ +static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start, + unsigned long pfn, unsigned long npg) +{ + unsigned long end; + + /* + * 32-bit has some unfixable W+X issues, like EFI code + * and writeable data being in the same page. Disable + * detection and enforcement there. + */ + if (IS_ENABLED(CONFIG_X86_32)) + return new; + + /* Only verify when NX is supported: */ + if (!(__supported_pte_mask & _PAGE_NX)) + return new; + + if (!((pgprot_val(old) ^ pgprot_val(new)) & (_PAGE_RW | _PAGE_NX))) + return new; + + if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW) + return new; + + end = start + npg * PAGE_SIZE - 1; + WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n", + (unsigned long long)pgprot_val(old), + (unsigned long long)pgprot_val(new), + start, end, pfn); + + /* + * For now, allow all permission change attempts by returning the + * attempted permissions. This can 'return old' to actively + * refuse the permission change at a later time. + */ + return new; +} + +/* * Lookup the page table entry for a virtual address in a specific pgd. * Return a pointer to the entry and the level of the mapping. */ @@ -885,6 +925,8 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address, new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages, psize, CPA_DETECT); + new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages); + /* * If there is a conflict, split the large page. * @@ -1525,6 +1567,7 @@ repeat: if (level == PG_LEVEL_4K) { pte_t new_pte; + pgprot_t old_prot = pte_pgprot(old_pte); pgprot_t new_prot = pte_pgprot(old_pte); unsigned long pfn = pte_pfn(old_pte); @@ -1536,6 +1579,8 @@ repeat: new_prot = static_protections(new_prot, address, pfn, 1, 0, CPA_PROTECT); + new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1); + new_prot = pgprot_clear_protnone_bits(new_prot); /* @@ -1944,7 +1989,7 @@ int set_mce_nospec(unsigned long pfn) return rc; } -static int set_memory_present(unsigned long *addr, int numpages) +static int set_memory_p(unsigned long *addr, int numpages) { return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0); } @@ -1954,7 +1999,7 @@ int clear_mce_nospec(unsigned long pfn) { unsigned long addr = (unsigned long) pfn_to_kaddr(pfn); - return set_memory_present(&addr, 1); + return set_memory_p(&addr, 1); } EXPORT_SYMBOL_GPL(clear_mce_nospec); #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index a932d7712d85..8525f2876fb4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, return ret; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { @@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, return ret; } +#endif + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE int pudp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp) { diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 1f3675453a57..b36596bf0fc3 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -176,7 +176,8 @@ virt_to_phys_or_null_size(void *va, unsigned long size) int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - unsigned long pfn, text, pf, rodata; + extern const u8 __efi64_thunk_ret_tramp[]; + unsigned long pfn, text, pf, rodata, tramp; struct page *page; unsigned npages; pgd_t *pgd = efi_mm.pgd; @@ -238,11 +239,9 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); - pfn = text >> PAGE_SHIFT; - pf = _PAGE_ENC; - if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) { - pr_err("Failed to map kernel text 1:1\n"); + if (kernel_unmap_pages_in_pgd(pgd, text, npages)) { + pr_err("Failed to unmap kernel text 1:1 mapping\n"); return 1; } @@ -256,6 +255,15 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 1; } + tramp = __pa(__efi64_thunk_ret_tramp); + pfn = tramp >> PAGE_SHIFT; + + pf = _PAGE_ENC; + if (kernel_map_pages_in_pgd(pgd, pfn, tramp, 1, pf)) { + pr_err("Failed to map mixed mode return trampoline\n"); + return 1; + } + return 0; } diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 4e5257a4811b..c4b1144f99f6 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -23,7 +23,6 @@ #include <linux/objtool.h> #include <asm/page_types.h> #include <asm/segment.h> -#include <asm/nospec-branch.h> .text .code64 @@ -73,10 +72,18 @@ STACK_FRAME_NON_STANDARD __efi64_thunk pushq %rdi /* EFI runtime service address */ lretq + // This return instruction is not needed for correctness, as it will + // never be reached. It only exists to make objtool happy, which will + // otherwise complain about unreachable instructions in the callers. + RET +SYM_FUNC_END(__efi64_thunk) + + .section ".rodata", "a", @progbits + .balign 16 +SYM_DATA_START(__efi64_thunk_ret_tramp) 1: movq 0x20(%rsp), %rsp pop %rbx pop %rbp - ANNOTATE_UNRET_SAFE ret int3 @@ -84,7 +91,7 @@ STACK_FRAME_NON_STANDARD __efi64_thunk 2: pushl $__KERNEL_CS pushl %ebp lret -SYM_FUNC_END(__efi64_thunk) +SYM_DATA_END(__efi64_thunk_ret_tramp) .bss .balign 8 diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 83f1b6a56449..f614009d3e4e 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -10,6 +10,7 @@ # Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n KCSAN_SANITIZE := n +KMSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 85246dd9faa1..9b1ec5d8c99c 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -92,3 +92,12 @@ config XEN_DOM0 select X86_X2APIC if XEN_PVH && X86_64 help Support running as a Xen Dom0 guest. + +config XEN_PV_MSR_SAFE + bool "Always use safe MSR accesses in PV guests" + default y + depends on XEN_PV + help + Use safe (not faulting) MSR access functions even if the MSR access + should not fault anyway. + The default can be changed by using the "xen_msr_safe" boot parameter. diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 1c1ac418484b..c1cd28e915a3 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -212,7 +212,7 @@ static void __init xen_hvm_guest_init(void) return; if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) - virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc); + virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc); init_hvm_pv_info(); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 9b1a58dda935..f82857e48815 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -108,11 +108,21 @@ struct tls_descs { */ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); +static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE); + +static int __init parse_xen_msr_safe(char *str) +{ + if (str) + return strtobool(str, &xen_msr_safe); + return -EINVAL; +} +early_param("xen_msr_safe", parse_xen_msr_safe); + static void __init xen_pv_init_platform(void) { /* PV guests can't operate virtio devices without grants. */ if (IS_ENABLED(CONFIG_XEN_VIRTIO)) - virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc); + virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc); populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP)); @@ -917,14 +927,18 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } -static u64 xen_read_msr_safe(unsigned int msr, int *err) +static u64 xen_do_read_msr(unsigned int msr, int *err) { - u64 val; + u64 val = 0; /* Avoid uninitialized value for safe variant. */ if (pmu_msr_read(msr, &val, err)) return val; - val = native_read_msr_safe(msr, err); + if (err) + val = native_read_msr_safe(msr, err); + else + val = native_read_msr(msr); + switch (msr) { case MSR_IA32_APICBASE: val &= ~X2APIC_ENABLE; @@ -933,23 +947,39 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) return val; } -static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) +static void set_seg(unsigned int which, unsigned int low, unsigned int high, + int *err) { - int ret; - unsigned int which; - u64 base; + u64 base = ((u64)high << 32) | low; - ret = 0; + if (HYPERVISOR_set_segment_base(which, base) == 0) + return; + if (err) + *err = -EIO; + else + WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base); +} + +/* + * Support write_msr_safe() and write_msr() semantics. + * With err == NULL write_msr() semantics are selected. + * Supplying an err pointer requires err to be pre-initialized with 0. + */ +static void xen_do_write_msr(unsigned int msr, unsigned int low, + unsigned int high, int *err) +{ switch (msr) { - case MSR_FS_BASE: which = SEGBASE_FS; goto set; - case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; - case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; - - set: - base = ((u64)high << 32) | low; - if (HYPERVISOR_set_segment_base(which, base) != 0) - ret = -EIO; + case MSR_FS_BASE: + set_seg(SEGBASE_FS, low, high, err); + break; + + case MSR_KERNEL_GS_BASE: + set_seg(SEGBASE_GS_USER, low, high, err); + break; + + case MSR_GS_BASE: + set_seg(SEGBASE_GS_KERNEL, low, high, err); break; case MSR_STAR: @@ -965,31 +995,42 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) break; default: - if (!pmu_msr_write(msr, low, high, &ret)) - ret = native_write_msr_safe(msr, low, high); + if (!pmu_msr_write(msr, low, high, err)) { + if (err) + *err = native_write_msr_safe(msr, low, high); + else + native_write_msr(msr, low, high); + } } +} - return ret; +static u64 xen_read_msr_safe(unsigned int msr, int *err) +{ + return xen_do_read_msr(msr, err); +} + +static int xen_write_msr_safe(unsigned int msr, unsigned int low, + unsigned int high) +{ + int err = 0; + + xen_do_write_msr(msr, low, high, &err); + + return err; } static u64 xen_read_msr(unsigned int msr) { - /* - * This will silently swallow a #GP from RDMSR. It may be worth - * changing that. - */ int err; - return xen_read_msr_safe(msr, &err); + return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL); } static void xen_write_msr(unsigned int msr, unsigned low, unsigned high) { - /* - * This will silently swallow a #GP from WRMSR. It may be worth - * changing that. - */ - xen_write_msr_safe(msr, low, high); + int err; + + xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL); } /* This is called once we have the cpu_possible_mask */ diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 21ecbe754cb2..68aff1382872 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -131,6 +131,10 @@ static inline uint32_t get_fam15h_addr(u32 addr) static inline bool is_amd_pmu_msr(unsigned int msr) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return false; + if ((msr >= MSR_F15H_PERF_CTL && msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) || (msr >= MSR_K7_EVNTSEL0 && @@ -140,10 +144,15 @@ static inline bool is_amd_pmu_msr(unsigned int msr) return false; } -static int is_intel_pmu_msr(u32 msr_index, int *type, int *index) +static bool is_intel_pmu_msr(u32 msr_index, int *type, int *index) { u32 msr_index_pmc; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && + boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR && + boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) + return false; + switch (msr_index) { case MSR_CORE_PERF_FIXED_CTR_CTRL: case MSR_IA32_DS_AREA: @@ -290,48 +299,52 @@ static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) return false; } +static bool pmu_msr_chk_emulated(unsigned int msr, uint64_t *val, bool is_read, + bool *emul) +{ + int type, index; + + if (is_amd_pmu_msr(msr)) + *emul = xen_amd_pmu_emulate(msr, val, is_read); + else if (is_intel_pmu_msr(msr, &type, &index)) + *emul = xen_intel_pmu_emulate(msr, val, type, index, is_read); + else + return false; + + return true; +} + bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { - if (is_amd_pmu_msr(msr)) { - if (!xen_amd_pmu_emulate(msr, val, 1)) - *val = native_read_msr_safe(msr, err); - return true; - } - } else { - int type, index; + bool emulated; - if (is_intel_pmu_msr(msr, &type, &index)) { - if (!xen_intel_pmu_emulate(msr, val, type, index, 1)) - *val = native_read_msr_safe(msr, err); - return true; - } + if (!pmu_msr_chk_emulated(msr, val, true, &emulated)) + return false; + + if (!emulated) { + *val = err ? native_read_msr_safe(msr, err) + : native_read_msr(msr); } - return false; + return true; } bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err) { uint64_t val = ((uint64_t)high << 32) | low; + bool emulated; - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { - if (is_amd_pmu_msr(msr)) { - if (!xen_amd_pmu_emulate(msr, &val, 0)) - *err = native_write_msr_safe(msr, low, high); - return true; - } - } else { - int type, index; + if (!pmu_msr_chk_emulated(msr, &val, false, &emulated)) + return false; - if (is_intel_pmu_msr(msr, &type, &index)) { - if (!xen_intel_pmu_emulate(msr, &val, type, index, 0)) - *err = native_write_msr_safe(msr, low, high); - return true; - } + if (!emulated) { + if (err) + *err = native_write_msr_safe(msr, low, high); + else + native_write_msr(msr, low, high); } - return false; + return true; } static unsigned long long xen_amd_read_pmc(int counter) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index ba7af2eca755..480be82e9b7b 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -179,7 +179,7 @@ static void __init _get_smp_config(unsigned int early) * hypercall to expand the max number of VCPUs an already * running guest has. So cap it up to X. */ if (subtract) - nr_cpu_ids = nr_cpu_ids - subtract; + set_nr_cpu_ids(nr_cpu_ids - subtract); #endif } diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 12ac277282ba..bcb0c5d2abc2 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -771,7 +771,7 @@ config HIGHMEM If unsure, say Y. -config FORCE_MAX_ZONEORDER +config ARCH_FORCE_MAX_ORDER int "Maximum zone order" default "11" help diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index 5097caa7bf0c..bfd8e433ed62 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -55,8 +55,6 @@ KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(vardirs) $(plfdirs)) KBUILD_DEFCONFIG := iss_defconfig -head-y := arch/xtensa/kernel/head.o - libs-y += arch/xtensa/lib/ boot := arch/xtensa/boot diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig index 3be62da8089b..ef0ebcfbccf9 100644 --- a/arch/xtensa/configs/audio_kc705_defconfig +++ b/arch/xtensa/configs/audio_kc705_defconfig @@ -120,7 +120,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOCKUP_DETECTOR=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig index fc240737b14d..2665962d247a 100644 --- a/arch/xtensa/configs/cadence_csp_defconfig +++ b/arch/xtensa/configs/cadence_csp_defconfig @@ -100,7 +100,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOCKUP_DETECTOR=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig index e9d6b6f6eca1..236c7f23cc10 100644 --- a/arch/xtensa/configs/generic_kc705_defconfig +++ b/arch/xtensa/configs/generic_kc705_defconfig @@ -107,7 +107,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOCKUP_DETECTOR=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig index fcb620ef3799..8263da9e078d 100644 --- a/arch/xtensa/configs/nommu_kc705_defconfig +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -105,7 +105,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y # CONFIG_FRAME_POINTER is not set CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_VM=y diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index a47c85638ec1..7bdffa3a69c6 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -111,7 +111,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_VM=y CONFIG_LOCKUP_DETECTOR=y diff --git a/arch/xtensa/configs/virt_defconfig b/arch/xtensa/configs/virt_defconfig index 6d1387dfa96f..98acb7191cb7 100644 --- a/arch/xtensa/configs/virt_defconfig +++ b/arch/xtensa/configs/virt_defconfig @@ -97,7 +97,7 @@ CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_FONTS=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y diff --git a/arch/xtensa/configs/xip_kc705_defconfig b/arch/xtensa/configs/xip_kc705_defconfig index 062148e17135..1c3cebaaa71b 100644 --- a/arch/xtensa/configs/xip_kc705_defconfig +++ b/arch/xtensa/configs/xip_kc705_defconfig @@ -102,7 +102,7 @@ CONFIG_CRYPTO_LZO=y CONFIG_CRYPTO_ANSI_CPRNG=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/xtensa/include/asm/elf.h b/arch/xtensa/include/asm/elf.h index 909a6ab4f22b..ffcf1ada19c6 100644 --- a/arch/xtensa/include/asm/elf.h +++ b/arch/xtensa/include/asm/elf.h @@ -93,6 +93,10 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; #define elf_check_arch(x) ( ( (x)->e_machine == EM_XTENSA ) || \ ( (x)->e_machine == EM_XTENSA_OLD ) ) +#define ELFOSABI_XTENSA_FDPIC 65 +#define elf_check_fdpic(x) ((x)->e_ident[EI_OSABI] == ELFOSABI_XTENSA_FDPIC) +#define ELF_FDPIC_CORE_EFLAGS 0 + /* * These are used to set parameters in the core dumps. */ @@ -153,10 +157,22 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; */ #define ELF_PLAT_INIT(_r, load_addr) \ - do { _r->areg[0]=0; /*_r->areg[1]=0;*/ _r->areg[2]=0; _r->areg[3]=0; \ - _r->areg[4]=0; _r->areg[5]=0; _r->areg[6]=0; _r->areg[7]=0; \ - _r->areg[8]=0; _r->areg[9]=0; _r->areg[10]=0; _r->areg[11]=0; \ - _r->areg[12]=0; _r->areg[13]=0; _r->areg[14]=0; _r->areg[15]=0; \ + do { \ + (_r)->areg[0] = 0; /*(_r)->areg[1] = 0;*/ \ + (_r)->areg[2] = 0; (_r)->areg[3] = 0; \ + (_r)->areg[4] = 0; (_r)->areg[5] = 0; \ + (_r)->areg[6] = 0; (_r)->areg[7] = 0; \ + (_r)->areg[8] = 0; (_r)->areg[9] = 0; \ + (_r)->areg[10] = 0; (_r)->areg[11] = 0; \ + (_r)->areg[12] = 0; (_r)->areg[13] = 0; \ + (_r)->areg[14] = 0; (_r)->areg[15] = 0; \ + } while (0) + +#define ELF_FDPIC_PLAT_INIT(_r, _exec_map_addr, _interp_map_addr, dynamic_addr) \ + do { \ + (_r)->areg[4] = _exec_map_addr; \ + (_r)->areg[5] = _interp_map_addr; \ + (_r)->areg[6] = dynamic_addr; \ } while (0) typedef struct { diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 76bc63127c66..228e4dff5fb2 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -205,9 +205,12 @@ struct thread_struct { #define start_thread(regs, new_pc, new_sp) \ do { \ unsigned long syscall = (regs)->syscall; \ + unsigned long current_aregs[16]; \ + memcpy(current_aregs, (regs)->areg, sizeof(current_aregs)); \ memset((regs), 0, sizeof(*(regs))); \ (regs)->pc = (new_pc); \ (regs)->ps = USER_PS_VALUE; \ + memcpy((regs)->areg, current_aregs, sizeof(current_aregs)); \ (regs)->areg[1] = (new_sp); \ (regs)->areg[0] = 0; \ (regs)->wmask = 1; \ @@ -221,9 +224,6 @@ struct thread_struct { struct task_struct; struct mm_struct; -/* Free all resources held by a thread. */ -#define release_thread(thread) do { } while(0) - extern unsigned long __get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc) diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index 7966a58af472..1ff0c858544f 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -111,6 +111,8 @@ #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ +#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/xtensa/include/uapi/asm/ptrace.h b/arch/xtensa/include/uapi/asm/ptrace.h index 50db3e0a6341..9115e86ebc75 100644 --- a/arch/xtensa/include/uapi/asm/ptrace.h +++ b/arch/xtensa/include/uapi/asm/ptrace.h @@ -37,6 +37,10 @@ #define PTRACE_SETXTREGS 19 #define PTRACE_GETHBPREGS 20 #define PTRACE_SETHBPREGS 21 +#define PTRACE_GETFDPIC 22 + +#define PTRACE_GETFDPIC_EXEC 0 +#define PTRACE_GETFDPIC_INTERP 1 #ifndef __ASSEMBLY__ diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile index 897c1c741058..f28b8e3d717e 100644 --- a/arch/xtensa/kernel/Makefile +++ b/arch/xtensa/kernel/Makefile @@ -3,9 +3,9 @@ # Makefile for the Linux/Xtensa kernel. # -extra-y := head.o vmlinux.lds +extra-y := vmlinux.lds -obj-y := align.o coprocessor.o entry.o irq.o platform.o process.o \ +obj-y := head.o align.o coprocessor.o entry.o irq.o platform.o process.o \ ptrace.o setup.o signal.o stacktrace.o syscall.o time.o traps.o \ vectors.o diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c index 201356faa7e6..b3c2450d6f23 100644 --- a/arch/xtensa/kernel/syscall.c +++ b/arch/xtensa/kernel/syscall.c @@ -58,6 +58,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct vm_area_struct *vmm; + struct vma_iterator vmi; if (flags & MAP_FIXED) { /* We do not accept a shared mapping if it would violate @@ -79,15 +80,20 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, else addr = PAGE_ALIGN(addr); - for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) { - /* At this point: (!vmm || addr < vmm->vm_end). */ - if (TASK_SIZE - len < addr) - return -ENOMEM; - if (!vmm || addr + len <= vm_start_gap(vmm)) - return addr; + vma_iter_init(&vmi, current->mm, addr); + for_each_vma(vmi, vmm) { + /* At this point: (addr < vmm->vm_end). */ + if (addr + len <= vm_start_gap(vmm)) + break; + addr = vmm->vm_end; if (flags & MAP_SHARED) addr = COLOUR_ALIGN(addr, pgoff); } + + if (TASK_SIZE - len < addr) + return -ENOMEM; + + return addr; } #endif |