aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/Kconfig32
-rw-r--r--arch/arm/Kconfig.debug4
-rw-r--r--arch/arm/Makefile1
-rw-r--r--arch/arm/boot/dts/aspeed-g6.dtsi13
-rw-r--r--arch/arm/crypto/Kconfig2
-rw-r--r--arch/arm/crypto/ghash-ce-core.S382
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c423
-rw-r--r--arch/arm/crypto/sha1_glue.c14
-rw-r--r--arch/arm/include/asm/arch_gicv3.h5
-rw-r--r--arch/arm/include/asm/assembler.h19
-rw-r--r--arch/arm/include/asm/checksum.h1
-rw-r--r--arch/arm/include/asm/simd.h8
-rw-r--r--arch/arm/kernel/asm-offsets.c1
-rw-r--r--arch/arm/mach-rda/Makefile2
-rw-r--r--arch/arm/mm/Kconfig2
-rw-r--r--arch/arm/vfp/entry.S4
-rw-r--r--arch/arm/vfp/vfphw.S4
-rw-r--r--arch/arm/vfp/vfpmodule.c19
-rw-r--r--arch/arm64/Kconfig25
-rw-r--r--arch/arm64/Kconfig.platforms2
-rw-r--r--arch/arm64/Makefile70
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts78
-rw-r--r--arch/arm64/boot/dts/freescale/imx93.dtsi48
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c57
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c145
-rw-r--r--arch/arm64/crypto/sm4-ce-ccm-glue.c44
-rw-r--r--arch/arm64/crypto/sm4-ce-gcm-glue.c51
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h5
-rw-r--r--arch/arm64/include/asm/barrier.h11
-rw-r--r--arch/arm64/include/asm/cpufeature.h14
-rw-r--r--arch/arm64/include/asm/efi.h2
-rw-r--r--arch/arm64/include/asm/el2_setup.h8
-rw-r--r--arch/arm64/include/asm/esr.h1
-rw-r--r--arch/arm64/include/asm/fpsimd.h30
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h22
-rw-r--r--arch/arm64/include/asm/ftrace.h15
-rw-r--r--arch/arm64/include/asm/hwcap.h14
-rw-r--r--arch/arm64/include/asm/insn.h1
-rw-r--r--arch/arm64/include/asm/irqflags.h191
-rw-r--r--arch/arm64/include/asm/linkage.h4
-rw-r--r--arch/arm64/include/asm/patching.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h8
-rw-r--r--arch/arm64/include/asm/processor.h2
-rw-r--r--arch/arm64/include/asm/ptrace.h2
-rw-r--r--arch/arm64/include/asm/scs.h7
-rw-r--r--arch/arm64/include/asm/sysreg.h106
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h6
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h27
-rw-r--r--arch/arm64/kernel/asm-offsets.c4
-rw-r--r--arch/arm64/kernel/cpufeature.c293
-rw-r--r--arch/arm64/kernel/cpuinfo.c14
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S30
-rw-r--r--arch/arm64/kernel/entry-ftrace.S32
-rw-r--r--arch/arm64/kernel/entry.S41
-rw-r--r--arch/arm64/kernel/fpsimd.c52
-rw-r--r--arch/arm64/kernel/ftrace.c158
-rw-r--r--arch/arm64/kernel/head.S116
-rw-r--r--arch/arm64/kernel/hyp-stub.S7
-rw-r--r--arch/arm64/kernel/idreg-override.c1
-rw-r--r--arch/arm64/kernel/image-vars.h7
-rw-r--r--arch/arm64/kernel/patch-scs.c11
-rw-r--r--arch/arm64/kernel/patching.c17
-rw-r--r--arch/arm64/kernel/probes/kprobes.c4
-rw-r--r--arch/arm64/kernel/process.c21
-rw-r--r--arch/arm64/kernel/ptrace.c64
-rw-r--r--arch/arm64/kernel/setup.c17
-rw-r--r--arch/arm64/kernel/signal.c259
-rw-r--r--arch/arm64/kernel/sleep.S6
-rw-r--r--arch/arm64/kernel/syscall.c8
-rw-r--r--arch/arm64/kernel/traps.c6
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S8
-rw-r--r--arch/arm64/kvm/debug.c2
-rw-r--r--arch/arm64/kvm/fpsimd.c2
-rw-r--r--arch/arm64/kvm/hyp/entry.S2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c2
-rw-r--r--arch/arm64/mm/cache.S1
-rw-r--r--arch/arm64/mm/mmu.c8
-rw-r--r--arch/arm64/mm/proc.S2
-rw-r--r--arch/arm64/tools/cpucaps6
-rwxr-xr-xarch/arm64/tools/gen-sysreg.awk29
-rw-r--r--arch/arm64/tools/sysreg560
-rw-r--r--arch/loongarch/net/bpf_jit.c2
-rw-r--r--arch/loongarch/net/bpf_jit.h21
-rw-r--r--arch/m68k/68000/entry.S2
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/m68k/Kconfig.devices1
-rw-r--r--arch/m68k/coldfire/entry.S2
-rw-r--r--arch/m68k/configs/amiga_defconfig3
-rw-r--r--arch/m68k/configs/apollo_defconfig3
-rw-r--r--arch/m68k/configs/atari_defconfig3
-rw-r--r--arch/m68k/configs/bvme6000_defconfig3
-rw-r--r--arch/m68k/configs/hp300_defconfig3
-rw-r--r--arch/m68k/configs/mac_defconfig3
-rw-r--r--arch/m68k/configs/multi_defconfig4
-rw-r--r--arch/m68k/configs/mvme147_defconfig3
-rw-r--r--arch/m68k/configs/mvme16x_defconfig3
-rw-r--r--arch/m68k/configs/q40_defconfig3
-rw-r--r--arch/m68k/configs/sun3_defconfig3
-rw-r--r--arch/m68k/configs/sun3x_defconfig3
-rw-r--r--arch/m68k/include/asm/gpio.h7
-rw-r--r--arch/m68k/include/asm/seccomp.h11
-rw-r--r--arch/m68k/include/asm/syscall.h57
-rw-r--r--arch/m68k/include/asm/thread_info.h2
-rw-r--r--arch/m68k/kernel/entry.S6
-rw-r--r--arch/m68k/kernel/ptrace.c6
-rw-r--r--arch/m68k/q40/q40ints.c4
-rw-r--r--arch/riscv/include/asm/patch.h2
-rw-r--r--arch/riscv/kernel/patch.c19
-rw-r--r--arch/riscv/kernel/probes/kprobes.c15
-rw-r--r--arch/riscv/net/bpf_jit.h5
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c435
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/boot/Makefile2
-rw-r--r--arch/s390/boot/boot.h40
-rw-r--r--arch/s390/boot/decompressor.c1
-rw-r--r--arch/s390/boot/decompressor.h26
-rw-r--r--arch/s390/boot/kaslr.c20
-rw-r--r--arch/s390/boot/mem_detect.c72
-rw-r--r--arch/s390/boot/startup.c86
-rw-r--r--arch/s390/boot/vmem.c278
-rw-r--r--arch/s390/crypto/aes_s390.c4
-rw-r--r--arch/s390/crypto/arch_random.c1
-rw-r--r--arch/s390/crypto/paes_s390.c2
-rw-r--r--arch/s390/include/asm/abs_lowcore.h16
-rw-r--r--arch/s390/include/asm/ap.h12
-rw-r--r--arch/s390/include/asm/asm-extable.h4
-rw-r--r--arch/s390/include/asm/ccwdev.h2
-rw-r--r--arch/s390/include/asm/cmpxchg.h109
-rw-r--r--arch/s390/include/asm/cpu_mcf.h112
-rw-r--r--arch/s390/include/asm/cpu_mf.h53
-rw-r--r--arch/s390/include/asm/cputime.h19
-rw-r--r--arch/s390/include/asm/diag.h16
-rw-r--r--arch/s390/include/asm/fpu/internal.h4
-rw-r--r--arch/s390/include/asm/idals.h12
-rw-r--r--arch/s390/include/asm/idle.h5
-rw-r--r--arch/s390/include/asm/kasan.h12
-rw-r--r--arch/s390/include/asm/kprobes.h2
-rw-r--r--arch/s390/include/asm/maccess.h2
-rw-r--r--arch/s390/include/asm/mem_detect.h39
-rw-r--r--arch/s390/include/asm/pgtable.h69
-rw-r--r--arch/s390/include/asm/processor.h29
-rw-r--r--arch/s390/include/asm/ptrace.h2
-rw-r--r--arch/s390/include/asm/setup.h6
-rw-r--r--arch/s390/include/asm/syscall_wrapper.h144
-rw-r--r--arch/s390/include/asm/uaccess.h208
-rw-r--r--arch/s390/include/asm/unwind.h10
-rw-r--r--arch/s390/include/uapi/asm/fs3270.h25
-rw-r--r--arch/s390/include/uapi/asm/raw3270.h75
-rw-r--r--arch/s390/include/uapi/asm/types.h15
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h3
-rw-r--r--arch/s390/kernel/Makefile3
-rw-r--r--arch/s390/kernel/abs_lowcore.c49
-rw-r--r--arch/s390/kernel/cache.c2
-rw-r--r--arch/s390/kernel/compat_signal.c4
-rw-r--r--arch/s390/kernel/crash_dump.c2
-rw-r--r--arch/s390/kernel/diag.c26
-rw-r--r--arch/s390/kernel/early.c8
-rw-r--r--arch/s390/kernel/entry.S6
-rw-r--r--arch/s390/kernel/entry.h1
-rw-r--r--arch/s390/kernel/head64.S1
-rw-r--r--arch/s390/kernel/idle.c91
-rw-r--r--arch/s390/kernel/ipl.c101
-rw-r--r--arch/s390/kernel/irq.c8
-rw-r--r--arch/s390/kernel/kprobes.c30
-rw-r--r--arch/s390/kernel/machine_kexec.c5
-rw-r--r--arch/s390/kernel/mcount.S12
-rw-r--r--arch/s390/kernel/os_info.c5
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c308
-rw-r--r--arch/s390/kernel/perf_cpum_cf_common.c233
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c148
-rw-r--r--arch/s390/kernel/perf_pai_ext.c2
-rw-r--r--arch/s390/kernel/process.c4
-rw-r--r--arch/s390/kernel/ptrace.c6
-rw-r--r--arch/s390/kernel/rethook.c34
-rw-r--r--arch/s390/kernel/rethook.h7
-rw-r--r--arch/s390/kernel/setup.c96
-rw-r--r--arch/s390/kernel/signal.c4
-rw-r--r--arch/s390/kernel/smp.c14
-rw-r--r--arch/s390/kernel/stacktrace.c6
-rw-r--r--arch/s390/kernel/text_amode31.S13
-rw-r--r--arch/s390/kernel/vmlinux.lds.S4
-rw-r--r--arch/s390/lib/test_unwind.c12
-rw-r--r--arch/s390/mm/dump_pagetables.c16
-rw-r--r--arch/s390/mm/extable.c9
-rw-r--r--arch/s390/mm/fault.c63
-rw-r--r--arch/s390/mm/init.c35
-rw-r--r--arch/s390/mm/kasan_init.c246
-rw-r--r--arch/s390/mm/maccess.c28
-rw-r--r--arch/s390/mm/pgtable.c25
-rw-r--r--arch/s390/mm/vmem.c103
-rw-r--r--arch/s390/net/bpf_jit_comp.c715
-rw-r--r--arch/sh/include/asm/checksum_32.h1
-rw-r--r--arch/sh/include/asm/gpio.h5
-rw-r--r--arch/x86/Kconfig.assembler5
-rw-r--r--arch/x86/crypto/Kconfig38
-rw-r--r--arch/x86/crypto/Makefile6
-rw-r--r--arch/x86/crypto/aria-aesni-avx-asm_64.S172
-rw-r--r--arch/x86/crypto/aria-aesni-avx2-asm_64.S1441
-rw-r--r--arch/x86/crypto/aria-avx.h48
-rw-r--r--arch/x86/crypto/aria-gfni-avx512-asm_64.S971
-rw-r--r--arch/x86/crypto/aria_aesni_avx2_glue.c254
-rw-r--r--arch/x86/crypto/aria_aesni_avx_glue.c49
-rw-r--r--arch/x86/crypto/aria_gfni_avx512_glue.c250
-rw-r--r--arch/x86/crypto/blowfish-x86_64-asm_64.S71
-rw-r--r--arch/x86/crypto/blowfish_glue.c200
-rw-r--r--arch/x86/crypto/ecb_cbc_helpers.h19
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S6
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c45
-rw-r--r--arch/x86/entry/entry_64.S2
-rw-r--r--arch/x86/include/asm/asm-prototypes.h1
-rw-r--r--arch/x86/include/asm/checksum_64.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h7
-rw-r--r--arch/x86/include/asm/cpufeatures.h12
-rw-r--r--arch/x86/include/asm/debugreg.h9
-rw-r--r--arch/x86/include/asm/disabled-features.h3
-rw-r--r--arch/x86/include/asm/gsseg.h66
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h17
-rw-r--r--arch/x86/include/asm/mmu_context.h1
-rw-r--r--arch/x86/include/asm/mshyperv.h78
-rw-r--r--arch/x86/include/asm/msr-index.h6
-rw-r--r--arch/x86/include/asm/required-features.h3
-rw-r--r--arch/x86/include/asm/special_insns.h21
-rw-r--r--arch/x86/kernel/asm-offsets.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c49
-rw-r--r--arch/x86/kernel/cpu/bugs.c31
-rw-r--r--arch/x86/kernel/cpu/common.c27
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c72
-rw-r--r--arch/x86/kernel/cpu/tsx.c1
-rw-r--r--arch/x86/kernel/hw_breakpoint.c4
-rw-r--r--arch/x86/kernel/kprobes/opt.c6
-rw-r--r--arch/x86/kernel/module.c97
-rw-r--r--arch/x86/kernel/nmi.c108
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/signal_32.c1
-rw-r--r--arch/x86/kernel/tls.c1
-rw-r--r--arch/x86/kvm/cpuid.c41
-rw-r--r--arch/x86/kvm/reverse_cpuid.h1
-rw-r--r--arch/x86/kvm/svm/svm.c3
-rw-r--r--arch/x86/kvm/x86.c3
-rw-r--r--arch/x86/lib/x86-opcode-map.txt1
-rw-r--r--arch/x86/net/bpf_jit_comp.c171
-rw-r--r--arch/x86/xen/enlighten_pv.c1
-rw-r--r--arch/x86/xen/setup.c4
-rw-r--r--arch/x86/xen/smp.h2
-rw-r--r--arch/x86/xen/smp_pv.c17
-rw-r--r--arch/x86/xen/time.c38
-rw-r--r--arch/x86/xen/xen-head.S7
248 files changed, 9937 insertions, 2951 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9f85cc34be7d..e24a9820e12f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -344,14 +344,16 @@ comment "CPU Core family selection"
config ARCH_MULTI_V4
bool "ARMv4 based platforms (FA526, StrongARM)"
depends on !ARCH_MULTI_V6_V7
- depends on !LD_IS_LLD
+ # https://github.com/llvm/llvm-project/issues/50764
+ depends on !LD_IS_LLD || LLD_VERSION >= 160000
select ARCH_MULTI_V4_V5
select CPU_FA526 if !(CPU_SA110 || CPU_SA1100)
config ARCH_MULTI_V4T
bool "ARMv4T based platforms (ARM720T, ARM920T, ...)"
depends on !ARCH_MULTI_V6_V7
- depends on !LD_IS_LLD
+ # https://github.com/llvm/llvm-project/issues/50764
+ depends on !LD_IS_LLD || LLD_VERSION >= 160000
select ARCH_MULTI_V4_V5
select CPU_ARM920T if !(CPU_ARM7TDMI || CPU_ARM720T || \
CPU_ARM740T || CPU_ARM9TDMI || CPU_ARM922T || \
@@ -656,7 +658,9 @@ config ARM_ERRATA_458693
hazard might then cause a processor deadlock. The workaround enables
the L1 caching of the NEON accesses and disables the PLD instruction
in the ACTLR register. Note that setting specific bits in the ACTLR
- register may not be available in non-secure mode.
+ register may not be available in non-secure mode and thus is not
+ available on a multiplatform kernel. This should be applied by the
+ bootloader instead.
config ARM_ERRATA_460075
bool "ARM errata: Data written to the L2 cache can be overwritten with stale data"
@@ -669,7 +673,9 @@ config ARM_ERRATA_460075
and overwritten with stale memory contents from external memory. The
workaround disables the write-allocate mode for the L2 cache via the
ACTLR register. Note that setting specific bits in the ACTLR register
- may not be available in non-secure mode.
+ may not be available in non-secure mode and thus is not available on
+ a multiplatform kernel. This should be applied by the bootloader
+ instead.
config ARM_ERRATA_742230
bool "ARM errata: DMB operation may be faulty"
@@ -682,7 +688,10 @@ config ARM_ERRATA_742230
ordering of the two writes. This workaround sets a specific bit in
the diagnostic register of the Cortex-A9 which causes the DMB
instruction to behave as a DSB, ensuring the correct behaviour of
- the two writes.
+ the two writes. Note that setting specific bits in the diagnostics
+ register may not be available in non-secure mode and thus is not
+ available on a multiplatform kernel. This should be applied by the
+ bootloader instead.
config ARM_ERRATA_742231
bool "ARM errata: Incorrect hazard handling in the SCU may lead to data corruption"
@@ -697,7 +706,10 @@ config ARM_ERRATA_742231
replaced from one of the CPUs at the same time as another CPU is
accessing it. This workaround sets specific bits in the diagnostic
register of the Cortex-A9 which reduces the linefill issuing
- capabilities of the processor.
+ capabilities of the processor. Note that setting specific bits in the
+ diagnostics register may not be available in non-secure mode and thus
+ is not available on a multiplatform kernel. This should be applied by
+ the bootloader instead.
config ARM_ERRATA_643719
bool "ARM errata: LoUIS bit field in CLIDR register is incorrect"
@@ -734,7 +746,9 @@ config ARM_ERRATA_743622
register of the Cortex-A9 which disables the Store Buffer
optimisation, preventing the defect from occurring. This has no
visible impact on the overall performance or power consumption of the
- processor.
+ processor. Note that setting specific bits in the diagnostics register
+ may not be available in non-secure mode and thus is not available on a
+ multiplatform kernel. This should be applied by the bootloader instead.
config ARM_ERRATA_751472
bool "ARM errata: Interrupted ICIALLUIS may prevent completion of broadcasted operation"
@@ -746,6 +760,10 @@ config ARM_ERRATA_751472
completion of a following broadcasted operation if the second
operation is received by a CPU before the ICIALLUIS has completed,
potentially leading to corrupted entries in the cache or TLB.
+ Note that setting specific bits in the diagnostics register may
+ not be available in non-secure mode and thus is not available on
+ a multiplatform kernel. This should be applied by the bootloader
+ instead.
config ARM_ERRATA_754322
bool "ARM errata: possible faulty MMU translations following an ASID switch"
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 86f423143185..b407b7b9b715 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -1206,8 +1206,8 @@ choice
depends on MACH_STM32MP157
select DEBUG_STM32_UART
help
- Say Y here if you want kernel low-level debugging support
- on STM32MP1 based platforms, wich default UART is wired on
+ Say Y here if you want kernel low-level debugging support on
+ STM32MP1-based platforms, where the default UART is wired to
UART4, but another UART instance can be selected by modifying
CONFIG_DEBUG_UART_PHYS and CONFIG_DEBUG_UART_VIRT.
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 265c2c89e73a..485a439e22ca 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -209,7 +209,6 @@ machine-$(CONFIG_ARCH_OMAP2PLUS) += omap2
machine-$(CONFIG_ARCH_ORION5X) += orion5x
machine-$(CONFIG_ARCH_PXA) += pxa
machine-$(CONFIG_ARCH_QCOM) += qcom
-machine-$(CONFIG_ARCH_RDA) += rda
machine-$(CONFIG_ARCH_REALTEK) += realtek
machine-$(CONFIG_ARCH_ROCKCHIP) += rockchip
machine-$(CONFIG_ARCH_RPC) += rpc
diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi
index cc2f8b785917..8246a60de0d0 100644
--- a/arch/arm/boot/dts/aspeed-g6.dtsi
+++ b/arch/arm/boot/dts/aspeed-g6.dtsi
@@ -98,6 +98,11 @@
<0x40466000 0x2000>;
};
+ ahbc: bus@1e600000 {
+ compatible = "aspeed,ast2600-ahbc", "syscon";
+ reg = <0x1e600000 0x100>;
+ };
+
fmc: spi@1e620000 {
reg = <0x1e620000 0xc4>, <0x20000000 0x10000000>;
#address-cells = <1>;
@@ -431,6 +436,14 @@
reg = <0x1e6f2000 0x1000>;
};
+ acry: crypto@1e6fa000 {
+ compatible = "aspeed,ast2600-acry";
+ reg = <0x1e6fa000 0x400>, <0x1e710000 0x1800>;
+ interrupts = <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&syscon ASPEED_CLK_GATE_RSACLK>;
+ aspeed,ahbc = <&ahbc>;
+ };
+
video: video@1e700000 {
compatible = "aspeed,ast2600-video-engine";
reg = <0x1e700000 0x1000>;
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 7b2b7d043d9b..847b7a003356 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -16,8 +16,10 @@ config CRYPTO_CURVE25519_NEON
config CRYPTO_GHASH_ARM_CE
tristate "Hash functions: GHASH (PMULL/NEON/ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
+ select CRYPTO_AEAD
select CRYPTO_HASH
select CRYPTO_CRYPTD
+ select CRYPTO_LIB_AES
select CRYPTO_LIB_GF128MUL
help
GCM GHASH function (NIST SP800-38D)
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index 9f51e3fa4526..858c0d66798b 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -2,7 +2,8 @@
/*
* Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
*
- * Copyright (C) 2015 - 2017 Linaro Ltd. <[email protected]>
+ * Copyright (C) 2015 - 2017 Linaro Ltd.
+ * Copyright (C) 2023 Google LLC. <[email protected]>
*/
#include <linux/linkage.h>
@@ -44,7 +45,7 @@
t2q .req q7
t3q .req q8
t4q .req q9
- T2 .req q9
+ XH2 .req q9
s1l .req d20
s1h .req d21
@@ -80,7 +81,7 @@
XL2 .req q5
XM2 .req q6
- XH2 .req q7
+ T2 .req q7
T3 .req q8
XL2_L .req d10
@@ -192,9 +193,10 @@
vshr.u64 XL, XL, #1
.endm
- .macro ghash_update, pn
+ .macro ghash_update, pn, enc, aggregate=1, head=1
vld1.64 {XL}, [r1]
+ .if \head
/* do the head block first, if supplied */
ldr ip, [sp]
teq ip, #0
@@ -202,13 +204,32 @@
vld1.64 {T1}, [ip]
teq r0, #0
b 3f
+ .endif
0: .ifc \pn, p64
+ .if \aggregate
tst r0, #3 // skip until #blocks is a
bne 2f // round multiple of 4
vld1.8 {XL2-XM2}, [r2]!
-1: vld1.8 {T3-T2}, [r2]!
+1: vld1.8 {T2-T3}, [r2]!
+
+ .ifnb \enc
+ \enc\()_4x XL2, XM2, T2, T3
+
+ add ip, r3, #16
+ vld1.64 {HH}, [ip, :128]!
+ vld1.64 {HH3-HH4}, [ip, :128]
+
+ veor SHASH2_p64, SHASH_L, SHASH_H
+ veor SHASH2_H, HH_L, HH_H
+ veor HH34_L, HH3_L, HH3_H
+ veor HH34_H, HH4_L, HH4_H
+
+ vmov.i8 MASK, #0xe1
+ vshl.u64 MASK, MASK, #57
+ .endif
+
vrev64.8 XL2, XL2
vrev64.8 XM2, XM2
@@ -218,8 +239,8 @@
veor XL2_H, XL2_H, XL_L
veor XL, XL, T1
- vrev64.8 T3, T3
- vrev64.8 T1, T2
+ vrev64.8 T1, T3
+ vrev64.8 T3, T2
vmull.p64 XH, HH4_H, XL_H // a1 * b1
veor XL2_H, XL2_H, XL_H
@@ -267,14 +288,22 @@
b 1b
.endif
+ .endif
+
+2: vld1.8 {T1}, [r2]!
+
+ .ifnb \enc
+ \enc\()_1x T1
+ veor SHASH2_p64, SHASH_L, SHASH_H
+ vmov.i8 MASK, #0xe1
+ vshl.u64 MASK, MASK, #57
+ .endif
-2: vld1.64 {T1}, [r2]!
subs r0, r0, #1
3: /* multiply XL by SHASH in GF(2^128) */
-#ifndef CONFIG_CPU_BIG_ENDIAN
vrev64.8 T1, T1
-#endif
+
vext.8 IN1, T1, T1, #8
veor T1_L, T1_L, XL_H
veor XL, XL, IN1
@@ -293,9 +322,6 @@
veor XL, XL, T1
bne 0b
-
- vst1.64 {XL}, [r1]
- bx lr
.endm
/*
@@ -316,6 +342,9 @@ ENTRY(pmull_ghash_update_p64)
vshl.u64 MASK, MASK, #57
ghash_update p64
+ vst1.64 {XL}, [r1]
+
+ bx lr
ENDPROC(pmull_ghash_update_p64)
ENTRY(pmull_ghash_update_p8)
@@ -336,4 +365,331 @@ ENTRY(pmull_ghash_update_p8)
vmov.i64 k48, #0xffffffffffff
ghash_update p8
+ vst1.64 {XL}, [r1]
+
+ bx lr
ENDPROC(pmull_ghash_update_p8)
+
+ e0 .req q9
+ e1 .req q10
+ e2 .req q11
+ e3 .req q12
+ e0l .req d18
+ e0h .req d19
+ e2l .req d22
+ e2h .req d23
+ e3l .req d24
+ e3h .req d25
+ ctr .req q13
+ ctr0 .req d26
+ ctr1 .req d27
+
+ ek0 .req q14
+ ek1 .req q15
+
+ .macro round, rk:req, regs:vararg
+ .irp r, \regs
+ aese.8 \r, \rk
+ aesmc.8 \r, \r
+ .endr
+ .endm
+
+ .macro aes_encrypt, rkp, rounds, regs:vararg
+ vld1.8 {ek0-ek1}, [\rkp, :128]!
+ cmp \rounds, #12
+ blt .L\@ // AES-128
+
+ round ek0, \regs
+ vld1.8 {ek0}, [\rkp, :128]!
+ round ek1, \regs
+ vld1.8 {ek1}, [\rkp, :128]!
+
+ beq .L\@ // AES-192
+
+ round ek0, \regs
+ vld1.8 {ek0}, [\rkp, :128]!
+ round ek1, \regs
+ vld1.8 {ek1}, [\rkp, :128]!
+
+.L\@: .rept 4
+ round ek0, \regs
+ vld1.8 {ek0}, [\rkp, :128]!
+ round ek1, \regs
+ vld1.8 {ek1}, [\rkp, :128]!
+ .endr
+
+ round ek0, \regs
+ vld1.8 {ek0}, [\rkp, :128]
+
+ .irp r, \regs
+ aese.8 \r, ek1
+ .endr
+ .irp r, \regs
+ veor \r, \r, ek0
+ .endr
+ .endm
+
+pmull_aes_encrypt:
+ add ip, r5, #4
+ vld1.8 {ctr0}, [r5] // load 12 byte IV
+ vld1.8 {ctr1}, [ip]
+ rev r8, r7
+ vext.8 ctr1, ctr1, ctr1, #4
+ add r7, r7, #1
+ vmov.32 ctr1[1], r8
+ vmov e0, ctr
+
+ add ip, r3, #64
+ aes_encrypt ip, r6, e0
+ bx lr
+ENDPROC(pmull_aes_encrypt)
+
+pmull_aes_encrypt_4x:
+ add ip, r5, #4
+ vld1.8 {ctr0}, [r5]
+ vld1.8 {ctr1}, [ip]
+ rev r8, r7
+ vext.8 ctr1, ctr1, ctr1, #4
+ add r7, r7, #1
+ vmov.32 ctr1[1], r8
+ rev ip, r7
+ vmov e0, ctr
+ add r7, r7, #1
+ vmov.32 ctr1[1], ip
+ rev r8, r7
+ vmov e1, ctr
+ add r7, r7, #1
+ vmov.32 ctr1[1], r8
+ rev ip, r7
+ vmov e2, ctr
+ add r7, r7, #1
+ vmov.32 ctr1[1], ip
+ vmov e3, ctr
+
+ add ip, r3, #64
+ aes_encrypt ip, r6, e0, e1, e2, e3
+ bx lr
+ENDPROC(pmull_aes_encrypt_4x)
+
+pmull_aes_encrypt_final:
+ add ip, r5, #4
+ vld1.8 {ctr0}, [r5]
+ vld1.8 {ctr1}, [ip]
+ rev r8, r7
+ vext.8 ctr1, ctr1, ctr1, #4
+ mov r7, #1 << 24 // BE #1 for the tag
+ vmov.32 ctr1[1], r8
+ vmov e0, ctr
+ vmov.32 ctr1[1], r7
+ vmov e1, ctr
+
+ add ip, r3, #64
+ aes_encrypt ip, r6, e0, e1
+ bx lr
+ENDPROC(pmull_aes_encrypt_final)
+
+ .macro enc_1x, in0
+ bl pmull_aes_encrypt
+ veor \in0, \in0, e0
+ vst1.8 {\in0}, [r4]!
+ .endm
+
+ .macro dec_1x, in0
+ bl pmull_aes_encrypt
+ veor e0, e0, \in0
+ vst1.8 {e0}, [r4]!
+ .endm
+
+ .macro enc_4x, in0, in1, in2, in3
+ bl pmull_aes_encrypt_4x
+
+ veor \in0, \in0, e0
+ veor \in1, \in1, e1
+ veor \in2, \in2, e2
+ veor \in3, \in3, e3
+
+ vst1.8 {\in0-\in1}, [r4]!
+ vst1.8 {\in2-\in3}, [r4]!
+ .endm
+
+ .macro dec_4x, in0, in1, in2, in3
+ bl pmull_aes_encrypt_4x
+
+ veor e0, e0, \in0
+ veor e1, e1, \in1
+ veor e2, e2, \in2
+ veor e3, e3, \in3
+
+ vst1.8 {e0-e1}, [r4]!
+ vst1.8 {e2-e3}, [r4]!
+ .endm
+
+ /*
+ * void pmull_gcm_encrypt(int blocks, u64 dg[], const char *src,
+ * struct gcm_key const *k, char *dst,
+ * char *iv, int rounds, u32 counter)
+ */
+ENTRY(pmull_gcm_encrypt)
+ push {r4-r8, lr}
+ ldrd r4, r5, [sp, #24]
+ ldrd r6, r7, [sp, #32]
+
+ vld1.64 {SHASH}, [r3]
+
+ ghash_update p64, enc, head=0
+ vst1.64 {XL}, [r1]
+
+ pop {r4-r8, pc}
+ENDPROC(pmull_gcm_encrypt)
+
+ /*
+ * void pmull_gcm_decrypt(int blocks, u64 dg[], const char *src,
+ * struct gcm_key const *k, char *dst,
+ * char *iv, int rounds, u32 counter)
+ */
+ENTRY(pmull_gcm_decrypt)
+ push {r4-r8, lr}
+ ldrd r4, r5, [sp, #24]
+ ldrd r6, r7, [sp, #32]
+
+ vld1.64 {SHASH}, [r3]
+
+ ghash_update p64, dec, head=0
+ vst1.64 {XL}, [r1]
+
+ pop {r4-r8, pc}
+ENDPROC(pmull_gcm_decrypt)
+
+ /*
+ * void pmull_gcm_enc_final(int bytes, u64 dg[], char *tag,
+ * struct gcm_key const *k, char *head,
+ * char *iv, int rounds, u32 counter)
+ */
+ENTRY(pmull_gcm_enc_final)
+ push {r4-r8, lr}
+ ldrd r4, r5, [sp, #24]
+ ldrd r6, r7, [sp, #32]
+
+ bl pmull_aes_encrypt_final
+
+ cmp r0, #0
+ beq .Lenc_final
+
+ mov_l ip, .Lpermute
+ sub r4, r4, #16
+ add r8, ip, r0
+ add ip, ip, #32
+ add r4, r4, r0
+ sub ip, ip, r0
+
+ vld1.8 {e3}, [r8] // permute vector for key stream
+ vld1.8 {e2}, [ip] // permute vector for ghash input
+
+ vtbl.8 e3l, {e0}, e3l
+ vtbl.8 e3h, {e0}, e3h
+
+ vld1.8 {e0}, [r4] // encrypt tail block
+ veor e0, e0, e3
+ vst1.8 {e0}, [r4]
+
+ vtbl.8 T1_L, {e0}, e2l
+ vtbl.8 T1_H, {e0}, e2h
+
+ vld1.64 {XL}, [r1]
+.Lenc_final:
+ vld1.64 {SHASH}, [r3, :128]
+ vmov.i8 MASK, #0xe1
+ veor SHASH2_p64, SHASH_L, SHASH_H
+ vshl.u64 MASK, MASK, #57
+ mov r0, #1
+ bne 3f // process head block first
+ ghash_update p64, aggregate=0, head=0
+
+ vrev64.8 XL, XL
+ vext.8 XL, XL, XL, #8
+ veor XL, XL, e1
+
+ sub r2, r2, #16 // rewind src pointer
+ vst1.8 {XL}, [r2] // store tag
+
+ pop {r4-r8, pc}
+ENDPROC(pmull_gcm_enc_final)
+
+ /*
+ * int pmull_gcm_dec_final(int bytes, u64 dg[], char *tag,
+ * struct gcm_key const *k, char *head,
+ * char *iv, int rounds, u32 counter,
+ * const char *otag, int authsize)
+ */
+ENTRY(pmull_gcm_dec_final)
+ push {r4-r8, lr}
+ ldrd r4, r5, [sp, #24]
+ ldrd r6, r7, [sp, #32]
+
+ bl pmull_aes_encrypt_final
+
+ cmp r0, #0
+ beq .Ldec_final
+
+ mov_l ip, .Lpermute
+ sub r4, r4, #16
+ add r8, ip, r0
+ add ip, ip, #32
+ add r4, r4, r0
+ sub ip, ip, r0
+
+ vld1.8 {e3}, [r8] // permute vector for key stream
+ vld1.8 {e2}, [ip] // permute vector for ghash input
+
+ vtbl.8 e3l, {e0}, e3l
+ vtbl.8 e3h, {e0}, e3h
+
+ vld1.8 {e0}, [r4]
+
+ vtbl.8 T1_L, {e0}, e2l
+ vtbl.8 T1_H, {e0}, e2h
+
+ veor e0, e0, e3
+ vst1.8 {e0}, [r4]
+
+ vld1.64 {XL}, [r1]
+.Ldec_final:
+ vld1.64 {SHASH}, [r3]
+ vmov.i8 MASK, #0xe1
+ veor SHASH2_p64, SHASH_L, SHASH_H
+ vshl.u64 MASK, MASK, #57
+ mov r0, #1
+ bne 3f // process head block first
+ ghash_update p64, aggregate=0, head=0
+
+ vrev64.8 XL, XL
+ vext.8 XL, XL, XL, #8
+ veor XL, XL, e1
+
+ mov_l ip, .Lpermute
+ ldrd r2, r3, [sp, #40] // otag and authsize
+ vld1.8 {T1}, [r2]
+ add ip, ip, r3
+ vceq.i8 T1, T1, XL // compare tags
+ vmvn T1, T1 // 0 for eq, -1 for ne
+
+ vld1.8 {e0}, [ip]
+ vtbl.8 XL_L, {T1}, e0l // keep authsize bytes only
+ vtbl.8 XL_H, {T1}, e0h
+
+ vpmin.s8 XL_L, XL_L, XL_H // take the minimum s8 across the vector
+ vpmin.s8 XL_L, XL_L, XL_L
+ vmov.32 r0, XL_L[0] // fail if != 0x0
+
+ pop {r4-r8, pc}
+ENDPROC(pmull_gcm_dec_final)
+
+ .section ".rodata", "a", %progbits
+ .align 5
+.Lpermute:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index f13401f3e669..3ddf05b4234d 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -2,36 +2,53 @@
/*
* Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
*
- * Copyright (C) 2015 - 2018 Linaro Ltd. <[email protected]>
+ * Copyright (C) 2015 - 2018 Linaro Ltd.
+ * Copyright (C) 2023 Google LLC.
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/gcm.h>
#include <crypto/b128ops.h>
#include <crypto/cryptd.h>
+#include <crypto/internal/aead.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
#include <crypto/gf128mul.h>
+#include <crypto/scatterwalk.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/jump_label.h>
#include <linux/module.h>
MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions");
-MODULE_AUTHOR("Ard Biesheuvel <[email protected]>");
-MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Ard Biesheuvel <[email protected]>");
+MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("ghash");
+MODULE_ALIAS_CRYPTO("gcm(aes)");
+MODULE_ALIAS_CRYPTO("rfc4106(gcm(aes))");
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
+#define RFC4106_NONCE_SIZE 4
+
struct ghash_key {
be128 k;
u64 h[][2];
};
+struct gcm_key {
+ u64 h[4][2];
+ u32 rk[AES_MAX_KEYLENGTH_U32];
+ int rounds;
+ u8 nonce[]; // for RFC4106 nonce
+};
+
struct ghash_desc_ctx {
u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
u8 buf[GHASH_BLOCK_SIZE];
@@ -344,6 +361,393 @@ static struct ahash_alg ghash_async_alg = {
},
};
+
+void pmull_gcm_encrypt(int blocks, u64 dg[], const char *src,
+ struct gcm_key const *k, char *dst,
+ const char *iv, int rounds, u32 counter);
+
+void pmull_gcm_enc_final(int blocks, u64 dg[], char *tag,
+ struct gcm_key const *k, char *head,
+ const char *iv, int rounds, u32 counter);
+
+void pmull_gcm_decrypt(int bytes, u64 dg[], const char *src,
+ struct gcm_key const *k, char *dst,
+ const char *iv, int rounds, u32 counter);
+
+int pmull_gcm_dec_final(int bytes, u64 dg[], char *tag,
+ struct gcm_key const *k, char *head,
+ const char *iv, int rounds, u32 counter,
+ const char *otag, int authsize);
+
+static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey,
+ unsigned int keylen)
+{
+ struct gcm_key *ctx = crypto_aead_ctx(tfm);
+ struct crypto_aes_ctx aes_ctx;
+ be128 h, k;
+ int ret;
+
+ ret = aes_expandkey(&aes_ctx, inkey, keylen);
+ if (ret)
+ return -EINVAL;
+
+ aes_encrypt(&aes_ctx, (u8 *)&k, (u8[AES_BLOCK_SIZE]){});
+
+ memcpy(ctx->rk, aes_ctx.key_enc, sizeof(ctx->rk));
+ ctx->rounds = 6 + keylen / 4;
+
+ memzero_explicit(&aes_ctx, sizeof(aes_ctx));
+
+ ghash_reflect(ctx->h[0], &k);
+
+ h = k;
+ gf128mul_lle(&h, &k);
+ ghash_reflect(ctx->h[1], &h);
+
+ gf128mul_lle(&h, &k);
+ ghash_reflect(ctx->h[2], &h);
+
+ gf128mul_lle(&h, &k);
+ ghash_reflect(ctx->h[3], &h);
+
+ return 0;
+}
+
+static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ return crypto_gcm_check_authsize(authsize);
+}
+
+static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
+ int *buf_count, struct gcm_key *ctx)
+{
+ if (*buf_count > 0) {
+ int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count);
+
+ memcpy(&buf[*buf_count], src, buf_added);
+
+ *buf_count += buf_added;
+ src += buf_added;
+ count -= buf_added;
+ }
+
+ if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
+ int blocks = count / GHASH_BLOCK_SIZE;
+
+ pmull_ghash_update_p64(blocks, dg, src, ctx->h,
+ *buf_count ? buf : NULL);
+
+ src += blocks * GHASH_BLOCK_SIZE;
+ count %= GHASH_BLOCK_SIZE;
+ *buf_count = 0;
+ }
+
+ if (count > 0) {
+ memcpy(buf, src, count);
+ *buf_count = count;
+ }
+}
+
+static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_key *ctx = crypto_aead_ctx(aead);
+ u8 buf[GHASH_BLOCK_SIZE];
+ struct scatter_walk walk;
+ int buf_count = 0;
+
+ scatterwalk_start(&walk, req->src);
+
+ do {
+ u32 n = scatterwalk_clamp(&walk, len);
+ u8 *p;
+
+ if (!n) {
+ scatterwalk_start(&walk, sg_next(walk.sg));
+ n = scatterwalk_clamp(&walk, len);
+ }
+
+ p = scatterwalk_map(&walk);
+ gcm_update_mac(dg, p, n, buf, &buf_count, ctx);
+ scatterwalk_unmap(p);
+
+ if (unlikely(len / SZ_4K > (len - n) / SZ_4K)) {
+ kernel_neon_end();
+ kernel_neon_begin();
+ }
+
+ len -= n;
+ scatterwalk_advance(&walk, n);
+ scatterwalk_done(&walk, 0, len);
+ } while (len);
+
+ if (buf_count) {
+ memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
+ pmull_ghash_update_p64(1, dg, buf, ctx->h, NULL);
+ }
+}
+
+static int gcm_encrypt(struct aead_request *req, const u8 *iv, u32 assoclen)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_key *ctx = crypto_aead_ctx(aead);
+ struct skcipher_walk walk;
+ u8 buf[AES_BLOCK_SIZE];
+ u32 counter = 2;
+ u64 dg[2] = {};
+ be128 lengths;
+ const u8 *src;
+ u8 *tag, *dst;
+ int tail, err;
+
+ if (WARN_ON_ONCE(!may_use_simd()))
+ return -EBUSY;
+
+ err = skcipher_walk_aead_encrypt(&walk, req, false);
+
+ kernel_neon_begin();
+
+ if (assoclen)
+ gcm_calculate_auth_mac(req, dg, assoclen);
+
+ src = walk.src.virt.addr;
+ dst = walk.dst.virt.addr;
+
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int nblocks = walk.nbytes / AES_BLOCK_SIZE;
+
+ pmull_gcm_encrypt(nblocks, dg, src, ctx, dst, iv,
+ ctx->rounds, counter);
+ counter += nblocks;
+
+ if (walk.nbytes == walk.total) {
+ src += nblocks * AES_BLOCK_SIZE;
+ dst += nblocks * AES_BLOCK_SIZE;
+ break;
+ }
+
+ kernel_neon_end();
+
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ if (err)
+ return err;
+
+ src = walk.src.virt.addr;
+ dst = walk.dst.virt.addr;
+
+ kernel_neon_begin();
+ }
+
+
+ lengths.a = cpu_to_be64(assoclen * 8);
+ lengths.b = cpu_to_be64(req->cryptlen * 8);
+
+ tag = (u8 *)&lengths;
+ tail = walk.nbytes % AES_BLOCK_SIZE;
+
+ /*
+ * Bounce via a buffer unless we are encrypting in place and src/dst
+ * are not pointing to the start of the walk buffer. In that case, we
+ * can do a NEON load/xor/store sequence in place as long as we move
+ * the plain/ciphertext and keystream to the start of the register. If
+ * not, do a memcpy() to the end of the buffer so we can reuse the same
+ * logic.
+ */
+ if (unlikely(tail && (tail == walk.nbytes || src != dst)))
+ src = memcpy(buf + sizeof(buf) - tail, src, tail);
+
+ pmull_gcm_enc_final(tail, dg, tag, ctx, (u8 *)src, iv,
+ ctx->rounds, counter);
+ kernel_neon_end();
+
+ if (unlikely(tail && src != dst))
+ memcpy(dst, src, tail);
+
+ if (walk.nbytes) {
+ err = skcipher_walk_done(&walk, 0);
+ if (err)
+ return err;
+ }
+
+ /* copy authtag to end of dst */
+ scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
+ crypto_aead_authsize(aead), 1);
+
+ return 0;
+}
+
+static int gcm_decrypt(struct aead_request *req, const u8 *iv, u32 assoclen)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_key *ctx = crypto_aead_ctx(aead);
+ int authsize = crypto_aead_authsize(aead);
+ struct skcipher_walk walk;
+ u8 otag[AES_BLOCK_SIZE];
+ u8 buf[AES_BLOCK_SIZE];
+ u32 counter = 2;
+ u64 dg[2] = {};
+ be128 lengths;
+ const u8 *src;
+ u8 *tag, *dst;
+ int tail, err, ret;
+
+ if (WARN_ON_ONCE(!may_use_simd()))
+ return -EBUSY;
+
+ scatterwalk_map_and_copy(otag, req->src,
+ req->assoclen + req->cryptlen - authsize,
+ authsize, 0);
+
+ err = skcipher_walk_aead_decrypt(&walk, req, false);
+
+ kernel_neon_begin();
+
+ if (assoclen)
+ gcm_calculate_auth_mac(req, dg, assoclen);
+
+ src = walk.src.virt.addr;
+ dst = walk.dst.virt.addr;
+
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int nblocks = walk.nbytes / AES_BLOCK_SIZE;
+
+ pmull_gcm_decrypt(nblocks, dg, src, ctx, dst, iv,
+ ctx->rounds, counter);
+ counter += nblocks;
+
+ if (walk.nbytes == walk.total) {
+ src += nblocks * AES_BLOCK_SIZE;
+ dst += nblocks * AES_BLOCK_SIZE;
+ break;
+ }
+
+ kernel_neon_end();
+
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ if (err)
+ return err;
+
+ src = walk.src.virt.addr;
+ dst = walk.dst.virt.addr;
+
+ kernel_neon_begin();
+ }
+
+ lengths.a = cpu_to_be64(assoclen * 8);
+ lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8);
+
+ tag = (u8 *)&lengths;
+ tail = walk.nbytes % AES_BLOCK_SIZE;
+
+ if (unlikely(tail && (tail == walk.nbytes || src != dst)))
+ src = memcpy(buf + sizeof(buf) - tail, src, tail);
+
+ ret = pmull_gcm_dec_final(tail, dg, tag, ctx, (u8 *)src, iv,
+ ctx->rounds, counter, otag, authsize);
+ kernel_neon_end();
+
+ if (unlikely(tail && src != dst))
+ memcpy(dst, src, tail);
+
+ if (walk.nbytes) {
+ err = skcipher_walk_done(&walk, 0);
+ if (err)
+ return err;
+ }
+
+ return ret ? -EBADMSG : 0;
+}
+
+static int gcm_aes_encrypt(struct aead_request *req)
+{
+ return gcm_encrypt(req, req->iv, req->assoclen);
+}
+
+static int gcm_aes_decrypt(struct aead_request *req)
+{
+ return gcm_decrypt(req, req->iv, req->assoclen);
+}
+
+static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey,
+ unsigned int keylen)
+{
+ struct gcm_key *ctx = crypto_aead_ctx(tfm);
+ int err;
+
+ keylen -= RFC4106_NONCE_SIZE;
+ err = gcm_aes_setkey(tfm, inkey, keylen);
+ if (err)
+ return err;
+
+ memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE);
+ return 0;
+}
+
+static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ return crypto_rfc4106_check_authsize(authsize);
+}
+
+static int rfc4106_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_key *ctx = crypto_aead_ctx(aead);
+ u8 iv[GCM_AES_IV_SIZE];
+
+ memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE);
+ memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE);
+
+ return crypto_ipsec_check_assoclen(req->assoclen) ?:
+ gcm_encrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE);
+}
+
+static int rfc4106_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_key *ctx = crypto_aead_ctx(aead);
+ u8 iv[GCM_AES_IV_SIZE];
+
+ memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE);
+ memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE);
+
+ return crypto_ipsec_check_assoclen(req->assoclen) ?:
+ gcm_decrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE);
+}
+
+static struct aead_alg gcm_aes_algs[] = {{
+ .ivsize = GCM_AES_IV_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .setkey = gcm_aes_setkey,
+ .setauthsize = gcm_aes_setauthsize,
+ .encrypt = gcm_aes_encrypt,
+ .decrypt = gcm_aes_decrypt,
+
+ .base.cra_name = "gcm(aes)",
+ .base.cra_driver_name = "gcm-aes-ce",
+ .base.cra_priority = 400,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct gcm_key),
+ .base.cra_module = THIS_MODULE,
+}, {
+ .ivsize = GCM_RFC4106_IV_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .setkey = rfc4106_setkey,
+ .setauthsize = rfc4106_setauthsize,
+ .encrypt = rfc4106_encrypt,
+ .decrypt = rfc4106_decrypt,
+
+ .base.cra_name = "rfc4106(gcm(aes))",
+ .base.cra_driver_name = "rfc4106-gcm-aes-ce",
+ .base.cra_priority = 400,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct gcm_key) + RFC4106_NONCE_SIZE,
+ .base.cra_module = THIS_MODULE,
+}};
+
static int __init ghash_ce_mod_init(void)
{
int err;
@@ -352,13 +756,17 @@ static int __init ghash_ce_mod_init(void)
return -ENODEV;
if (elf_hwcap2 & HWCAP2_PMULL) {
+ err = crypto_register_aeads(gcm_aes_algs,
+ ARRAY_SIZE(gcm_aes_algs));
+ if (err)
+ return err;
ghash_alg.base.cra_ctxsize += 3 * sizeof(u64[2]);
static_branch_enable(&use_p64);
}
err = crypto_register_shash(&ghash_alg);
if (err)
- return err;
+ goto err_aead;
err = crypto_register_ahash(&ghash_async_alg);
if (err)
goto err_shash;
@@ -367,6 +775,10 @@ static int __init ghash_ce_mod_init(void)
err_shash:
crypto_unregister_shash(&ghash_alg);
+err_aead:
+ if (elf_hwcap2 & HWCAP2_PMULL)
+ crypto_unregister_aeads(gcm_aes_algs,
+ ARRAY_SIZE(gcm_aes_algs));
return err;
}
@@ -374,6 +786,9 @@ static void __exit ghash_ce_mod_exit(void)
{
crypto_unregister_ahash(&ghash_async_alg);
crypto_unregister_shash(&ghash_alg);
+ if (elf_hwcap2 & HWCAP2_PMULL)
+ crypto_unregister_aeads(gcm_aes_algs,
+ ARRAY_SIZE(gcm_aes_algs));
}
module_init(ghash_ce_mod_init);
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index 6c2b849e459d..95a727bcd664 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -21,31 +21,29 @@
#include "sha1.h"
-asmlinkage void sha1_block_data_order(u32 *digest,
- const unsigned char *data, unsigned int rounds);
+asmlinkage void sha1_block_data_order(struct sha1_state *digest,
+ const u8 *data, int rounds);
int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- /* make sure casting to sha1_block_fn() is safe */
+ /* make sure signature matches sha1_block_fn() */
BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
- return sha1_base_do_update(desc, data, len,
- (sha1_block_fn *)sha1_block_data_order);
+ return sha1_base_do_update(desc, data, len, sha1_block_data_order);
}
EXPORT_SYMBOL_GPL(sha1_update_arm);
static int sha1_final(struct shash_desc *desc, u8 *out)
{
- sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order);
+ sha1_base_do_finalize(desc, sha1_block_data_order);
return sha1_base_finish(desc, out);
}
int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- sha1_base_do_update(desc, data, len,
- (sha1_block_fn *)sha1_block_data_order);
+ sha1_base_do_update(desc, data, len, sha1_block_data_order);
return sha1_final(desc, out);
}
EXPORT_SYMBOL_GPL(sha1_finup_arm);
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index f82a819eb0db..311e83038bdb 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -252,5 +252,10 @@ static inline void gic_arch_enable_irqs(void)
WARN_ON_ONCE(true);
}
+static inline bool gic_has_relaxed_pmr_sync(void)
+{
+ return false;
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* !__ASM_ARCH_GICV3_H */
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 28e18f79c300..06b48ce23e1c 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -236,21 +236,26 @@ THUMB( fpreg .req r7 )
sub \tmp, \tmp, #1 @ decrement it
str \tmp, [\ti, #TI_PREEMPT]
.endm
-
- .macro dec_preempt_count_ti, ti, tmp
- get_thread_info \ti
- dec_preempt_count \ti, \tmp
- .endm
#else
.macro inc_preempt_count, ti, tmp
.endm
.macro dec_preempt_count, ti, tmp
.endm
+#endif
+
+ .macro local_bh_disable, ti, tmp
+ ldr \tmp, [\ti, #TI_PREEMPT]
+ add \tmp, \tmp, #SOFTIRQ_DISABLE_OFFSET
+ str \tmp, [\ti, #TI_PREEMPT]
+ .endm
- .macro dec_preempt_count_ti, ti, tmp
+ .macro local_bh_enable_ti, ti, tmp
+ get_thread_info \ti
+ ldr \tmp, [\ti, #TI_PREEMPT]
+ sub \tmp, \tmp, #SOFTIRQ_DISABLE_OFFSET
+ str \tmp, [\ti, #TI_PREEMPT]
.endm
-#endif
#define USERL(l, x...) \
9999: x; \
diff --git a/arch/arm/include/asm/checksum.h b/arch/arm/include/asm/checksum.h
index f0f54aef3724..d8a13959bff0 100644
--- a/arch/arm/include/asm/checksum.h
+++ b/arch/arm/include/asm/checksum.h
@@ -11,6 +11,7 @@
#define __ASM_ARM_CHECKSUM_H
#include <linux/in6.h>
+#include <linux/uaccess.h>
/*
* computes the checksum of a memory block at buff, length len,
diff --git a/arch/arm/include/asm/simd.h b/arch/arm/include/asm/simd.h
new file mode 100644
index 000000000000..82191dbd7e78
--- /dev/null
+++ b/arch/arm/include/asm/simd.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/hardirq.h>
+
+static __must_check inline bool may_use_simd(void)
+{
+ return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq();
+}
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 2c8d76fd7c66..38121c59cbc2 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ int main(void)
DEFINE(VFP_CPU, offsetof(union vfp_state, hard.cpu));
#endif
#endif
+ DEFINE(SOFTIRQ_DISABLE_OFFSET,SOFTIRQ_DISABLE_OFFSET);
#ifdef CONFIG_ARM_THUMBEE
DEFINE(TI_THUMBEE_STATE, offsetof(struct thread_info, thumbee_state));
#endif
diff --git a/arch/arm/mach-rda/Makefile b/arch/arm/mach-rda/Makefile
deleted file mode 100644
index f126d00ecd53..000000000000
--- a/arch/arm/mach-rda/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj- += dummy.o
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index fc439c2c16f8..c5bbae86f725 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -743,7 +743,7 @@ config SWP_EMULATE
If unsure, say Y.
choice
- prompt "CPU Endianess"
+ prompt "CPU Endianness"
default CPU_LITTLE_ENDIAN
config CPU_LITTLE_ENDIAN
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index 27b0a1f27fbd..9a89264cdcc0 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -22,7 +22,7 @@
@ IRQs enabled.
@
ENTRY(do_vfp)
- inc_preempt_count r10, r4
+ local_bh_disable r10, r4
ldr r4, .LCvfp
ldr r11, [r10, #TI_CPU] @ CPU number
add r10, r10, #TI_VFPSTATE @ r10 = workspace
@@ -30,7 +30,7 @@ ENTRY(do_vfp)
ENDPROC(do_vfp)
ENTRY(vfp_null_entry)
- dec_preempt_count_ti r10, r4
+ local_bh_enable_ti r10, r4
ret lr
ENDPROC(vfp_null_entry)
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 6f7926c9c179..26c4f61ecfa3 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -175,7 +175,7 @@ vfp_hw_state_valid:
@ else it's one 32-bit instruction, so
@ always subtract 4 from the following
@ instruction address.
- dec_preempt_count_ti r10, r4
+ local_bh_enable_ti r10, r4
ret r9 @ we think we have handled things
@@ -200,7 +200,7 @@ skip:
@ not recognised by VFP
DBGSTR "not VFP"
- dec_preempt_count_ti r10, r4
+ local_bh_enable_ti r10, r4
ret lr
process_exception:
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 281110423871..01bc48d73847 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -416,7 +416,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
if (exceptions)
vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
exit:
- preempt_enable();
+ local_bh_enable();
}
static void vfp_enable(void *unused)
@@ -517,6 +517,8 @@ void vfp_sync_hwstate(struct thread_info *thread)
{
unsigned int cpu = get_cpu();
+ local_bh_disable();
+
if (vfp_state_in_hw(cpu, thread)) {
u32 fpexc = fmrx(FPEXC);
@@ -528,6 +530,7 @@ void vfp_sync_hwstate(struct thread_info *thread)
fmxr(FPEXC, fpexc);
}
+ local_bh_enable();
put_cpu();
}
@@ -717,13 +720,15 @@ void kernel_neon_begin(void)
unsigned int cpu;
u32 fpexc;
+ local_bh_disable();
+
/*
- * Kernel mode NEON is only allowed outside of interrupt context
- * with preemption disabled. This will make sure that the kernel
- * mode NEON register contents never need to be preserved.
+ * Kernel mode NEON is only allowed outside of hardirq context with
+ * preemption and softirq processing disabled. This will make sure that
+ * the kernel mode NEON register contents never need to be preserved.
*/
- BUG_ON(in_interrupt());
- cpu = get_cpu();
+ BUG_ON(in_hardirq());
+ cpu = __smp_processor_id();
fpexc = fmrx(FPEXC) | FPEXC_EN;
fmxr(FPEXC, fpexc);
@@ -746,7 +751,7 @@ void kernel_neon_end(void)
{
/* Disable the NEON/VFP unit. */
fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
- put_cpu();
+ local_bh_enable();
}
EXPORT_SYMBOL(kernel_neon_end);
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c5ccca26a408..27b2592698b0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -123,6 +123,8 @@ config ARM64
select DMA_DIRECT_REMAP
select EDAC_SUPPORT
select FRAME_POINTER
+ select FUNCTION_ALIGNMENT_4B
+ select FUNCTION_ALIGNMENT_8B if DYNAMIC_FTRACE_WITH_CALL_OPS
select GENERIC_ALLOCATOR
select GENERIC_ARCH_TOPOLOGY
select GENERIC_CLOCKEVENTS_BROADCAST
@@ -184,6 +186,8 @@ config ARM64
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
+ select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \
+ if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG)
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
if DYNAMIC_FTRACE_WITH_ARGS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -1470,10 +1474,23 @@ config XEN
help
Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
+# include/linux/mmzone.h requires the following to be true:
+#
+# MAX_ORDER - 1 + PAGE_SHIFT <= SECTION_SIZE_BITS
+#
+# so the maximum value of MAX_ORDER is SECTION_SIZE_BITS + 1 - PAGE_SHIFT:
+#
+# | SECTION_SIZE_BITS | PAGE_SHIFT | max MAX_ORDER | default MAX_ORDER |
+# ----+-------------------+--------------+-----------------+--------------------+
+# 4K | 27 | 12 | 16 | 11 |
+# 16K | 27 | 14 | 14 | 12 |
+# 64K | 29 | 16 | 14 | 14 |
config ARCH_FORCE_MAX_ORDER
- int
+ int "Maximum zone order" if ARM64_4K_PAGES || ARM64_16K_PAGES
default "14" if ARM64_64K_PAGES
+ range 12 14 if ARM64_16K_PAGES
default "12" if ARM64_16K_PAGES
+ range 11 16 if ARM64_4K_PAGES
default "11"
help
The kernel memory allocator divides physically contiguous memory
@@ -1486,7 +1503,7 @@ config ARCH_FORCE_MAX_ORDER
This config option is actually maximum order plus one. For example,
a value of 11 means that the largest free memory block is 2^10 pages.
- We make sure that we can allocate upto a HugePage size for each configuration.
+ We make sure that we can allocate up to a HugePage size for each configuration.
Hence we have :
MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2
@@ -1832,7 +1849,7 @@ config ARM64_PTR_AUTH_KERNEL
bool "Use pointer authentication for kernel"
default y
depends on ARM64_PTR_AUTH
- depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC
+ depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_ARMV8_3
# Modern compilers insert a .note.gnu.property section note for PAC
# which is only understood by binutils starting with version 2.33.1.
depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100)
@@ -1857,7 +1874,7 @@ config CC_HAS_SIGN_RETURN_ADDRESS
# GCC 7, 8
def_bool $(cc-option,-msign-return-address=all)
-config AS_HAS_PAC
+config AS_HAS_ARMV8_3
def_bool $(cc-option,-Wa$(comma)-march=armv8.3-a)
config AS_HAS_CFI_NEGATE_RA_STATE
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 165e544aa7f9..89a0b13b058d 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -187,7 +187,7 @@ config ARCH_MVEBU
select PINCTRL_ARMADA_CP110
select PINCTRL_AC5
help
- This enables support for Marvell EBU familly, including:
+ This enables support for Marvell EBU family, including:
- Armada 3700 SoC Family
- Armada 7K SoC Family
- Armada 8K SoC Family
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index d7dfe00df7d2..2d49aea0ff67 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -63,50 +63,37 @@ stack_protector_prepare: prepare0
include/generated/asm-offsets.h))
endif
-ifeq ($(CONFIG_AS_HAS_ARMV8_2), y)
-# make sure to pass the newest target architecture to -march.
-asm-arch := armv8.2-a
-endif
-
-# Ensure that if the compiler supports branch protection we default it
-# off, this will be overridden if we are using branch protection.
-branch-prot-flags-y += $(call cc-option,-mbranch-protection=none)
-
-ifeq ($(CONFIG_ARM64_PTR_AUTH_KERNEL),y)
-branch-prot-flags-$(CONFIG_CC_HAS_SIGN_RETURN_ADDRESS) := -msign-return-address=all
-# We enable additional protection for leaf functions as there is some
-# narrow potential for ROP protection benefits and no substantial
-# performance impact has been observed.
-PACRET-y := pac-ret+leaf
-
-# Using a shadow call stack in leaf functions is too costly, so avoid PAC there
-# as well when we may be patching PAC into SCS
-PACRET-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) := pac-ret
-
ifeq ($(CONFIG_ARM64_BTI_KERNEL),y)
-branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI) := -mbranch-protection=$(PACRET-y)+bti
+ KBUILD_CFLAGS += -mbranch-protection=pac-ret+bti
+else ifeq ($(CONFIG_ARM64_PTR_AUTH_KERNEL),y)
+ ifeq ($(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET),y)
+ KBUILD_CFLAGS += -mbranch-protection=pac-ret
+ else
+ KBUILD_CFLAGS += -msign-return-address=non-leaf
+ endif
else
-branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=$(PACRET-y)
-endif
-# -march=armv8.3-a enables the non-nops instructions for PAC, to avoid the
-# compiler to generate them and consequently to break the single image contract
-# we pass it only to the assembler. This option is utilized only in case of non
-# integrated assemblers.
-ifeq ($(CONFIG_AS_HAS_PAC), y)
-asm-arch := armv8.3-a
-endif
-endif
-
-KBUILD_CFLAGS += $(branch-prot-flags-y)
-
-ifeq ($(CONFIG_AS_HAS_ARMV8_4), y)
-# make sure to pass the newest target architecture to -march.
-asm-arch := armv8.4-a
+ KBUILD_CFLAGS += $(call cc-option,-mbranch-protection=none)
endif
+# Tell the assembler to support instructions from the latest target
+# architecture.
+#
+# For non-integrated assemblers we'll pass this on the command line, and for
+# integrated assemblers we'll define ARM64_ASM_ARCH and ARM64_ASM_PREAMBLE for
+# inline usage.
+#
+# We cannot pass the same arch flag to the compiler as this would allow it to
+# freely generate instructions which are not supported by earlier architecture
+# versions, which would prevent a single kernel image from working on earlier
+# hardware.
ifeq ($(CONFIG_AS_HAS_ARMV8_5), y)
-# make sure to pass the newest target architecture to -march.
-asm-arch := armv8.5-a
+ asm-arch := armv8.5-a
+else ifeq ($(CONFIG_AS_HAS_ARMV8_4), y)
+ asm-arch := armv8.4-a
+else ifeq ($(CONFIG_AS_HAS_ARMV8_3), y)
+ asm-arch := armv8.3-a
+else ifeq ($(CONFIG_AS_HAS_ARMV8_2), y)
+ asm-arch := armv8.2-a
endif
ifdef asm-arch
@@ -139,7 +126,10 @@ endif
CHECKFLAGS += -D__aarch64__
-ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_ARGS),y)
+ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS),y)
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=4,2
+else ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_ARGS),y)
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
CC_FLAGS_FTRACE := -fpatchable-function-entry=2
endif
diff --git a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts
index c928b6824e41..fefb93487291 100644
--- a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts
@@ -47,6 +47,46 @@
status = "okay";
};
+&eqos {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_eqos>;
+ phy-mode = "rgmii-id";
+ phy-handle = <&ethphy1>;
+ status = "okay";
+
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clock-frequency = <5000000>;
+
+ ethphy1: ethernet-phy@1 {
+ reg = <1>;
+ eee-broken-1000t;
+ };
+ };
+};
+
+&fec {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_fec>;
+ phy-mode = "rgmii-id";
+ phy-handle = <&ethphy2>;
+ fsl,magic-packet;
+ status = "okay";
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clock-frequency = <5000000>;
+
+ ethphy2: ethernet-phy@2 {
+ reg = <2>;
+ eee-broken-1000t;
+ };
+ };
+};
+
&lpuart1 { /* console */
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_uart1>;
@@ -77,6 +117,44 @@
};
&iomuxc {
+ pinctrl_eqos: eqosgrp {
+ fsl,pins = <
+ MX93_PAD_ENET1_MDC__ENET_QOS_MDC 0x57e
+ MX93_PAD_ENET1_MDIO__ENET_QOS_MDIO 0x57e
+ MX93_PAD_ENET1_RD0__ENET_QOS_RGMII_RD0 0x57e
+ MX93_PAD_ENET1_RD1__ENET_QOS_RGMII_RD1 0x57e
+ MX93_PAD_ENET1_RD2__ENET_QOS_RGMII_RD2 0x57e
+ MX93_PAD_ENET1_RD3__ENET_QOS_RGMII_RD3 0x57e
+ MX93_PAD_ENET1_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x5fe
+ MX93_PAD_ENET1_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x57e
+ MX93_PAD_ENET1_TD0__ENET_QOS_RGMII_TD0 0x57e
+ MX93_PAD_ENET1_TD1__ENET_QOS_RGMII_TD1 0x57e
+ MX93_PAD_ENET1_TD2__ENET_QOS_RGMII_TD2 0x57e
+ MX93_PAD_ENET1_TD3__ENET_QOS_RGMII_TD3 0x57e
+ MX93_PAD_ENET1_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x5fe
+ MX93_PAD_ENET1_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x57e
+ >;
+ };
+
+ pinctrl_fec: fecgrp {
+ fsl,pins = <
+ MX93_PAD_ENET2_MDC__ENET1_MDC 0x57e
+ MX93_PAD_ENET2_MDIO__ENET1_MDIO 0x57e
+ MX93_PAD_ENET2_RD0__ENET1_RGMII_RD0 0x57e
+ MX93_PAD_ENET2_RD1__ENET1_RGMII_RD1 0x57e
+ MX93_PAD_ENET2_RD2__ENET1_RGMII_RD2 0x57e
+ MX93_PAD_ENET2_RD3__ENET1_RGMII_RD3 0x57e
+ MX93_PAD_ENET2_RXC__ENET1_RGMII_RXC 0x5fe
+ MX93_PAD_ENET2_RX_CTL__ENET1_RGMII_RX_CTL 0x57e
+ MX93_PAD_ENET2_TD0__ENET1_RGMII_TD0 0x57e
+ MX93_PAD_ENET2_TD1__ENET1_RGMII_TD1 0x57e
+ MX93_PAD_ENET2_TD2__ENET1_RGMII_TD2 0x57e
+ MX93_PAD_ENET2_TD3__ENET1_RGMII_TD3 0x57e
+ MX93_PAD_ENET2_TXC__ENET1_RGMII_TXC 0x5fe
+ MX93_PAD_ENET2_TX_CTL__ENET1_RGMII_TX_CTL 0x57e
+ >;
+ };
+
pinctrl_uart1: uart1grp {
fsl,pins = <
MX93_PAD_UART1_RXD__LPUART1_RX 0x31e
diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi
index abb3fbe4ba22..2076f9c9983a 100644
--- a/arch/arm64/boot/dts/freescale/imx93.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx93.dtsi
@@ -577,6 +577,54 @@
status = "disabled";
};
+ eqos: ethernet@428a0000 {
+ compatible = "nxp,imx93-dwmac-eqos", "snps,dwmac-5.10a";
+ reg = <0x428a0000 0x10000>;
+ interrupts = <GIC_SPI 183 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "eth_wake_irq", "macirq";
+ clocks = <&clk IMX93_CLK_ENET_QOS_GATE>,
+ <&clk IMX93_CLK_ENET_QOS_GATE>,
+ <&clk IMX93_CLK_ENET_TIMER2>,
+ <&clk IMX93_CLK_ENET>,
+ <&clk IMX93_CLK_ENET_QOS_GATE>;
+ clock-names = "stmmaceth", "pclk", "ptp_ref", "tx", "mem";
+ assigned-clocks = <&clk IMX93_CLK_ENET_TIMER2>,
+ <&clk IMX93_CLK_ENET>;
+ assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>,
+ <&clk IMX93_CLK_SYS_PLL_PFD0_DIV2>;
+ assigned-clock-rates = <100000000>, <250000000>;
+ intf_mode = <&wakeupmix_gpr 0x28>;
+ clk_csr = <0>;
+ status = "disabled";
+ };
+
+ fec: ethernet@42890000 {
+ compatible = "fsl,imx93-fec", "fsl,imx8mq-fec", "fsl,imx6sx-fec";
+ reg = <0x42890000 0x10000>;
+ interrupts = <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 180 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 181 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 182 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk IMX93_CLK_ENET1_GATE>,
+ <&clk IMX93_CLK_ENET1_GATE>,
+ <&clk IMX93_CLK_ENET_TIMER1>,
+ <&clk IMX93_CLK_ENET_REF>,
+ <&clk IMX93_CLK_ENET_REF_PHY>;
+ clock-names = "ipg", "ahb", "ptp",
+ "enet_clk_ref", "enet_out";
+ assigned-clocks = <&clk IMX93_CLK_ENET_TIMER1>,
+ <&clk IMX93_CLK_ENET_REF>,
+ <&clk IMX93_CLK_ENET_REF_PHY>;
+ assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>,
+ <&clk IMX93_CLK_SYS_PLL_PFD0_DIV2>,
+ <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>;
+ assigned-clock-rates = <100000000>, <250000000>, <50000000>;
+ fsl,num-tx-queues = <3>;
+ fsl,num-rx-queues = <3>;
+ status = "disabled";
+ };
+
usdhc3: mmc@428b0000 {
compatible = "fsl,imx93-usdhc", "fsl,imx8mm-usdhc";
reg = <0x428b0000 0x10000>;
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index c4f14415f5f0..25cd3808ecbe 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -161,43 +161,39 @@ static int ccm_encrypt(struct aead_request *req)
memcpy(buf, req->iv, AES_BLOCK_SIZE);
err = skcipher_walk_aead_encrypt(&walk, req, false);
- if (unlikely(err))
- return err;
kernel_neon_begin();
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
- do {
+ while (walk.nbytes) {
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+ bool final = walk.nbytes == walk.total;
- if (walk.nbytes == walk.total)
+ if (final)
tail = 0;
ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes - tail, ctx->key_enc,
num_rounds(ctx), mac, walk.iv);
- if (walk.nbytes == walk.total)
- ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+ if (!final)
+ kernel_neon_end();
+ err = skcipher_walk_done(&walk, tail);
+ if (!final)
+ kernel_neon_begin();
+ }
- kernel_neon_end();
+ ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
- if (walk.nbytes) {
- err = skcipher_walk_done(&walk, tail);
- if (unlikely(err))
- return err;
- if (unlikely(walk.nbytes))
- kernel_neon_begin();
- }
- } while (walk.nbytes);
+ kernel_neon_end();
/* copy authtag to end of dst */
scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
crypto_aead_authsize(aead), 1);
- return 0;
+ return err;
}
static int ccm_decrypt(struct aead_request *req)
@@ -219,37 +215,36 @@ static int ccm_decrypt(struct aead_request *req)
memcpy(buf, req->iv, AES_BLOCK_SIZE);
err = skcipher_walk_aead_decrypt(&walk, req, false);
- if (unlikely(err))
- return err;
kernel_neon_begin();
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
- do {
+ while (walk.nbytes) {
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+ bool final = walk.nbytes == walk.total;
- if (walk.nbytes == walk.total)
+ if (final)
tail = 0;
ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes - tail, ctx->key_enc,
num_rounds(ctx), mac, walk.iv);
- if (walk.nbytes == walk.total)
- ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+ if (!final)
+ kernel_neon_end();
+ err = skcipher_walk_done(&walk, tail);
+ if (!final)
+ kernel_neon_begin();
+ }
- kernel_neon_end();
+ ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
- if (walk.nbytes) {
- err = skcipher_walk_done(&walk, tail);
- if (unlikely(err))
- return err;
- if (unlikely(walk.nbytes))
- kernel_neon_begin();
- }
- } while (walk.nbytes);
+ kernel_neon_end();
+
+ if (unlikely(err))
+ return err;
/* compare calculated auth tag with the stored one */
scatterwalk_map_and_copy(buf, req->src,
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index e5e9adc1fcf4..97331b454ea8 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -9,6 +9,7 @@
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/aes.h>
+#include <crypto/gcm.h>
#include <crypto/algapi.h>
#include <crypto/b128ops.h>
#include <crypto/gf128mul.h>
@@ -28,7 +29,8 @@ MODULE_ALIAS_CRYPTO("ghash");
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
-#define GCM_IV_SIZE 12
+
+#define RFC4106_NONCE_SIZE 4
struct ghash_key {
be128 k;
@@ -43,6 +45,7 @@ struct ghash_desc_ctx {
struct gcm_aes_ctx {
struct crypto_aes_ctx aes_key;
+ u8 nonce[RFC4106_NONCE_SIZE];
struct ghash_key ghash_key;
};
@@ -226,8 +229,8 @@ static int num_rounds(struct crypto_aes_ctx *ctx)
return 6 + ctx->key_length / 4;
}
-static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
- unsigned int keylen)
+static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey,
+ unsigned int keylen)
{
struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
u8 key[GHASH_BLOCK_SIZE];
@@ -258,17 +261,9 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
return 0;
}
-static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
{
- switch (authsize) {
- case 4:
- case 8:
- case 12 ... 16:
- break;
- default:
- return -EINVAL;
- }
- return 0;
+ return crypto_gcm_check_authsize(authsize);
}
static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
@@ -302,13 +297,12 @@ static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
}
}
-static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
+static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
u8 buf[GHASH_BLOCK_SIZE];
struct scatter_walk walk;
- u32 len = req->assoclen;
int buf_count = 0;
scatterwalk_start(&walk, req->src);
@@ -338,27 +332,25 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
}
}
-static int gcm_encrypt(struct aead_request *req)
+static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
int nrounds = num_rounds(&ctx->aes_key);
struct skcipher_walk walk;
u8 buf[AES_BLOCK_SIZE];
- u8 iv[AES_BLOCK_SIZE];
u64 dg[2] = {};
be128 lengths;
u8 *tag;
int err;
- lengths.a = cpu_to_be64(req->assoclen * 8);
+ lengths.a = cpu_to_be64(assoclen * 8);
lengths.b = cpu_to_be64(req->cryptlen * 8);
- if (req->assoclen)
- gcm_calculate_auth_mac(req, dg);
+ if (assoclen)
+ gcm_calculate_auth_mac(req, dg, assoclen);
- memcpy(iv, req->iv, GCM_IV_SIZE);
- put_unaligned_be32(2, iv + GCM_IV_SIZE);
+ put_unaligned_be32(2, iv + GCM_AES_IV_SIZE);
err = skcipher_walk_aead_encrypt(&walk, req, false);
@@ -403,7 +395,7 @@ static int gcm_encrypt(struct aead_request *req)
return 0;
}
-static int gcm_decrypt(struct aead_request *req)
+static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
@@ -412,21 +404,19 @@ static int gcm_decrypt(struct aead_request *req)
struct skcipher_walk walk;
u8 otag[AES_BLOCK_SIZE];
u8 buf[AES_BLOCK_SIZE];
- u8 iv[AES_BLOCK_SIZE];
u64 dg[2] = {};
be128 lengths;
u8 *tag;
int ret;
int err;
- lengths.a = cpu_to_be64(req->assoclen * 8);
+ lengths.a = cpu_to_be64(assoclen * 8);
lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8);
- if (req->assoclen)
- gcm_calculate_auth_mac(req, dg);
+ if (assoclen)
+ gcm_calculate_auth_mac(req, dg, assoclen);
- memcpy(iv, req->iv, GCM_IV_SIZE);
- put_unaligned_be32(2, iv + GCM_IV_SIZE);
+ put_unaligned_be32(2, iv + GCM_AES_IV_SIZE);
scatterwalk_map_and_copy(otag, req->src,
req->assoclen + req->cryptlen - authsize,
@@ -471,14 +461,76 @@ static int gcm_decrypt(struct aead_request *req)
return ret ? -EBADMSG : 0;
}
-static struct aead_alg gcm_aes_alg = {
- .ivsize = GCM_IV_SIZE,
+static int gcm_aes_encrypt(struct aead_request *req)
+{
+ u8 iv[AES_BLOCK_SIZE];
+
+ memcpy(iv, req->iv, GCM_AES_IV_SIZE);
+ return gcm_encrypt(req, iv, req->assoclen);
+}
+
+static int gcm_aes_decrypt(struct aead_request *req)
+{
+ u8 iv[AES_BLOCK_SIZE];
+
+ memcpy(iv, req->iv, GCM_AES_IV_SIZE);
+ return gcm_decrypt(req, iv, req->assoclen);
+}
+
+static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey,
+ unsigned int keylen)
+{
+ struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
+ int err;
+
+ keylen -= RFC4106_NONCE_SIZE;
+ err = gcm_aes_setkey(tfm, inkey, keylen);
+ if (err)
+ return err;
+
+ memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE);
+ return 0;
+}
+
+static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ return crypto_rfc4106_check_authsize(authsize);
+}
+
+static int rfc4106_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+ u8 iv[AES_BLOCK_SIZE];
+
+ memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE);
+ memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE);
+
+ return crypto_ipsec_check_assoclen(req->assoclen) ?:
+ gcm_encrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE);
+}
+
+static int rfc4106_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+ u8 iv[AES_BLOCK_SIZE];
+
+ memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE);
+ memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE);
+
+ return crypto_ipsec_check_assoclen(req->assoclen) ?:
+ gcm_decrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE);
+}
+
+static struct aead_alg gcm_aes_algs[] = {{
+ .ivsize = GCM_AES_IV_SIZE,
.chunksize = AES_BLOCK_SIZE,
.maxauthsize = AES_BLOCK_SIZE,
- .setkey = gcm_setkey,
- .setauthsize = gcm_setauthsize,
- .encrypt = gcm_encrypt,
- .decrypt = gcm_decrypt,
+ .setkey = gcm_aes_setkey,
+ .setauthsize = gcm_aes_setauthsize,
+ .encrypt = gcm_aes_encrypt,
+ .decrypt = gcm_aes_decrypt,
.base.cra_name = "gcm(aes)",
.base.cra_driver_name = "gcm-aes-ce",
@@ -487,7 +539,23 @@ static struct aead_alg gcm_aes_alg = {
.base.cra_ctxsize = sizeof(struct gcm_aes_ctx) +
4 * sizeof(u64[2]),
.base.cra_module = THIS_MODULE,
-};
+}, {
+ .ivsize = GCM_RFC4106_IV_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .setkey = rfc4106_setkey,
+ .setauthsize = rfc4106_setauthsize,
+ .encrypt = rfc4106_encrypt,
+ .decrypt = rfc4106_decrypt,
+
+ .base.cra_name = "rfc4106(gcm(aes))",
+ .base.cra_driver_name = "rfc4106-gcm-aes-ce",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct gcm_aes_ctx) +
+ 4 * sizeof(u64[2]),
+ .base.cra_module = THIS_MODULE,
+}};
static int __init ghash_ce_mod_init(void)
{
@@ -495,7 +563,8 @@ static int __init ghash_ce_mod_init(void)
return -ENODEV;
if (cpu_have_named_feature(PMULL))
- return crypto_register_aead(&gcm_aes_alg);
+ return crypto_register_aeads(gcm_aes_algs,
+ ARRAY_SIZE(gcm_aes_algs));
return crypto_register_shash(&ghash_alg);
}
@@ -503,7 +572,7 @@ static int __init ghash_ce_mod_init(void)
static void __exit ghash_ce_mod_exit(void)
{
if (cpu_have_named_feature(PMULL))
- crypto_unregister_aead(&gcm_aes_alg);
+ crypto_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs));
else
crypto_unregister_shash(&ghash_alg);
}
diff --git a/arch/arm64/crypto/sm4-ce-ccm-glue.c b/arch/arm64/crypto/sm4-ce-ccm-glue.c
index f2cec7b52efc..5e7e17bbec81 100644
--- a/arch/arm64/crypto/sm4-ce-ccm-glue.c
+++ b/arch/arm64/crypto/sm4-ce-ccm-glue.c
@@ -166,7 +166,7 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk,
unsigned int nbytes, u8 *mac))
{
u8 __aligned(8) ctr0[SM4_BLOCK_SIZE];
- int err;
+ int err = 0;
/* preserve the initial ctr0 for the TAG */
memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE);
@@ -177,33 +177,37 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk,
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
- do {
+ while (walk->nbytes && walk->nbytes != walk->total) {
unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
- const u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
- if (walk->nbytes == walk->total)
- tail = 0;
+ sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
+ walk->src.virt.addr, walk->iv,
+ walk->nbytes - tail, mac);
+
+ kernel_neon_end();
+
+ err = skcipher_walk_done(walk, tail);
+
+ kernel_neon_begin();
+ }
- if (walk->nbytes - tail)
- sm4_ce_ccm_crypt(rkey_enc, dst, src, walk->iv,
- walk->nbytes - tail, mac);
+ if (walk->nbytes) {
+ sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
+ walk->src.virt.addr, walk->iv,
+ walk->nbytes, mac);
- if (walk->nbytes == walk->total)
- sm4_ce_ccm_final(rkey_enc, ctr0, mac);
+ sm4_ce_ccm_final(rkey_enc, ctr0, mac);
kernel_neon_end();
- if (walk->nbytes) {
- err = skcipher_walk_done(walk, tail);
- if (err)
- return err;
- if (walk->nbytes)
- kernel_neon_begin();
- }
- } while (walk->nbytes > 0);
+ err = skcipher_walk_done(walk, 0);
+ } else {
+ sm4_ce_ccm_final(rkey_enc, ctr0, mac);
- return 0;
+ kernel_neon_end();
+ }
+
+ return err;
}
static int ccm_encrypt(struct aead_request *req)
diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c
index c450a2025ca9..73bfb6972d3a 100644
--- a/arch/arm64/crypto/sm4-ce-gcm-glue.c
+++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c
@@ -135,22 +135,23 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u8 ghash[])
}
static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
- struct sm4_gcm_ctx *ctx, u8 ghash[],
+ u8 ghash[], int err,
void (*sm4_ce_pmull_gcm_crypt)(const u32 *rkey_enc,
u8 *dst, const u8 *src, u8 *iv,
unsigned int nbytes, u8 *ghash,
const u8 *ghash_table, const u8 *lengths))
{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) iv[SM4_BLOCK_SIZE];
be128 __aligned(8) lengths;
- int err;
memset(ghash, 0, SM4_BLOCK_SIZE);
lengths.a = cpu_to_be64(req->assoclen * 8);
lengths.b = cpu_to_be64(walk->total * 8);
- memcpy(iv, walk->iv, GCM_IV_SIZE);
+ memcpy(iv, req->iv, GCM_IV_SIZE);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
kernel_neon_begin();
@@ -158,49 +159,51 @@ static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
if (req->assoclen)
gcm_calculate_auth_mac(req, ghash);
- do {
+ while (walk->nbytes) {
unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
const u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
if (walk->nbytes == walk->total) {
- tail = 0;
-
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
walk->nbytes, ghash,
ctx->ghash_table,
(const u8 *)&lengths);
- } else if (walk->nbytes - tail) {
- sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
- walk->nbytes - tail, ghash,
- ctx->ghash_table, NULL);
+
+ kernel_neon_end();
+
+ return skcipher_walk_done(walk, 0);
}
+ sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
+ walk->nbytes - tail, ghash,
+ ctx->ghash_table, NULL);
+
kernel_neon_end();
err = skcipher_walk_done(walk, tail);
- if (err)
- return err;
- if (walk->nbytes)
- kernel_neon_begin();
- } while (walk->nbytes > 0);
- return 0;
+ kernel_neon_begin();
+ }
+
+ sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, NULL, NULL, iv,
+ walk->nbytes, ghash, ctx->ghash_table,
+ (const u8 *)&lengths);
+
+ kernel_neon_end();
+
+ return err;
}
static int gcm_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
struct skcipher_walk walk;
int err;
err = skcipher_walk_aead_encrypt(&walk, req, false);
- if (err)
- return err;
-
- err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_enc);
+ err = gcm_crypt(req, &walk, ghash, err, sm4_ce_pmull_gcm_enc);
if (err)
return err;
@@ -215,17 +218,13 @@ static int gcm_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
unsigned int authsize = crypto_aead_authsize(aead);
- struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
u8 authtag[SM4_BLOCK_SIZE];
struct skcipher_walk walk;
int err;
err = skcipher_walk_aead_decrypt(&walk, req, false);
- if (err)
- return err;
-
- err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_dec);
+ err = gcm_crypt(req, &walk, ghash, err, sm4_ce_pmull_gcm_dec);
if (err)
return err;
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 48d4473e8eee..01281a5336cf 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -190,5 +190,10 @@ static inline void gic_arch_enable_irqs(void)
asm volatile ("msr daifclr, #3" : : : "memory");
}
+static inline bool gic_has_relaxed_pmr_sync(void)
+{
+ return cpus_have_cap(ARM64_HAS_GIC_PRIO_RELAXED_SYNC);
+}
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_ARCH_GICV3_H */
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 2cfc4245d2e2..3dd8982a9ce3 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -11,6 +11,8 @@
#include <linux/kasan-checks.h>
+#include <asm/alternative-macros.h>
+
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
#define nops(n) asm volatile(__nops(n))
@@ -41,10 +43,11 @@
#ifdef CONFIG_ARM64_PSEUDO_NMI
#define pmr_sync() \
do { \
- extern struct static_key_false gic_pmr_sync; \
- \
- if (static_branch_unlikely(&gic_pmr_sync)) \
- dsb(sy); \
+ asm volatile( \
+ ALTERNATIVE_CB("dsb sy", \
+ ARM64_HAS_GIC_PRIO_RELAXED_SYNC, \
+ alt_cb_patch_nops) \
+ ); \
} while(0)
#else
#define pmr_sync() do {} while (0)
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 03d1c9d7af82..6bf013fb110d 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -769,6 +769,12 @@ static __always_inline bool system_supports_sme(void)
cpus_have_const_cap(ARM64_SME);
}
+static __always_inline bool system_supports_sme2(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_SME) &&
+ cpus_have_const_cap(ARM64_SME2);
+}
+
static __always_inline bool system_supports_fa64(void)
{
return IS_ENABLED(CONFIG_ARM64_SME) &&
@@ -806,7 +812,7 @@ static inline bool system_has_full_ptr_auth(void)
static __always_inline bool system_uses_irq_prio_masking(void)
{
return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
- cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING);
+ cpus_have_const_cap(ARM64_HAS_GIC_PRIO_MASKING);
}
static inline bool system_supports_mte(void)
@@ -864,7 +870,11 @@ static inline bool cpu_has_hw_af(void)
if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM))
return false;
- mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+ /*
+ * Use cached version to avoid emulated msr operation on KVM
+ * guests.
+ */
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
return cpuid_feature_extract_unsigned_field(mmfr1,
ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
}
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index de4ff90785b2..acaa39f6381a 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -114,6 +114,8 @@ static inline unsigned long efi_get_kimg_min_align(void)
#define EFI_ALLOC_ALIGN SZ_64K
#define EFI_ALLOC_LIMIT ((1UL << 48) - 1)
+extern unsigned long primary_entry_offset(void);
+
/*
* On ARM systems, virtually remapped UEFI runtime services are set up in two
* distinct stages:
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 668569adf4d3..2cdd010f9524 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -53,10 +53,10 @@
cbz x0, .Lskip_spe_\@ // Skip if SPE not present
mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2,
- and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
+ and x0, x0, #(1 << PMBIDR_EL1_P_SHIFT)
cbnz x0, .Lskip_spe_el2_\@ // then permit sampling of physical
- mov x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
- 1 << SYS_PMSCR_EL2_PA_SHIFT)
+ mov x0, #(1 << PMSCR_EL2_PCT_SHIFT | \
+ 1 << PMSCR_EL2_PA_SHIFT)
msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter
.Lskip_spe_el2_\@:
mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
@@ -177,7 +177,7 @@
/**
* Initialize EL2 registers to sane values. This should be called early on all
* cores that were booted in EL2. Note that everything gets initialised as
- * if VHE was not evailable. The kernel context will be upgraded to VHE
+ * if VHE was not available. The kernel context will be upgraded to VHE
* if possible later on in the boot process
*
* Regs: x0, x1 and x2 are clobbered.
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 206de10524e3..c9f15b9e3c71 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -350,6 +350,7 @@
#define ESR_ELx_SME_ISS_ILL 1
#define ESR_ELx_SME_ISS_SM_DISABLED 2
#define ESR_ELx_SME_ISS_ZA_DISABLED 3
+#define ESR_ELx_SME_ISS_ZT_DISABLED 4
#ifndef __ASSEMBLY__
#include <asm/types.h>
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index e6fa1e2982c8..67f2fb781f59 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -61,7 +61,7 @@ extern void fpsimd_kvm_prepare(void);
struct cpu_fp_state {
struct user_fpsimd_state *st;
void *sve_state;
- void *za_state;
+ void *sme_state;
u64 *svcr;
unsigned int sve_vl;
unsigned int sme_vl;
@@ -105,6 +105,13 @@ static inline void *sve_pffr(struct thread_struct *thread)
return (char *)thread->sve_state + sve_ffr_offset(vl);
}
+static inline void *thread_zt_state(struct thread_struct *thread)
+{
+ /* The ZT register state is stored immediately after the ZA state */
+ unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread));
+ return thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq);
+}
+
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
extern void sve_load_state(void const *state, u32 const *pfpsr,
int restore_ffr);
@@ -112,12 +119,13 @@ extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
extern void sve_set_vq(unsigned long vq_minus_1);
extern void sme_set_vq(unsigned long vq_minus_1);
-extern void za_save_state(void *state);
-extern void za_load_state(void const *state);
+extern void sme_save_state(void *state, int zt);
+extern void sme_load_state(void const *state, int zt);
struct arm64_cpu_capabilities;
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void sme2_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern u64 read_zcr_features(void);
@@ -355,14 +363,20 @@ extern int sme_get_current_vl(void);
/*
* Return how many bytes of memory are required to store the full SME
- * specific state (currently just ZA) for task, given task's currently
- * configured vector length.
+ * specific state for task, given task's currently configured vector
+ * length.
*/
-static inline size_t za_state_size(struct task_struct const *task)
+static inline size_t sme_state_size(struct task_struct const *task)
{
unsigned int vl = task_get_sme_vl(task);
+ size_t size;
+
+ size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+
+ if (system_supports_sme2())
+ size += ZT_SIG_REG_SIZE;
- return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+ return size;
}
#else
@@ -382,7 +396,7 @@ static inline int sme_max_virtualisable_vl(void) { return 0; }
static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
static inline int sme_get_current_vl(void) { return -EINVAL; }
-static inline size_t za_state_size(struct task_struct const *task)
+static inline size_t sme_state_size(struct task_struct const *task)
{
return 0;
}
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 5e0910cf4832..cd03819a3b68 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -221,6 +221,28 @@
.endm
/*
+ * LDR (ZT0)
+ *
+ * LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+ _check_general_reg \nx
+ .inst 0xe11f8000 \
+ | (\nx << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ * STR ZT0, nx
+ */
+.macro _str_zt nx
+ _check_general_reg \nx
+ .inst 0xe13f8000 \
+ | (\nx << 5)
+.endm
+
+/*
* Zero the entire ZA array
* ZERO ZA
*/
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 5664729800ae..1c2672bbbf37 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -62,20 +62,7 @@ extern unsigned long ftrace_graph_call;
extern void return_to_handler(void);
-static inline unsigned long ftrace_call_adjust(unsigned long addr)
-{
- /*
- * Adjust addr to point at the BL in the callsite.
- * See ftrace_init_nop() for the callsite sequence.
- */
- if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS))
- return addr + AARCH64_INSN_SIZE;
- /*
- * addr is the address of the mcount call instruction.
- * recordmcount does the necessary offset calculation.
- */
- return addr;
-}
+unsigned long ftrace_call_adjust(unsigned long addr);
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
struct dyn_ftrace;
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 06dd12c514e6..5d45f19fda7f 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -31,12 +31,20 @@
#define COMPAT_HWCAP_VFPD32 (1 << 19)
#define COMPAT_HWCAP_LPAE (1 << 20)
#define COMPAT_HWCAP_EVTSTRM (1 << 21)
+#define COMPAT_HWCAP_FPHP (1 << 22)
+#define COMPAT_HWCAP_ASIMDHP (1 << 23)
+#define COMPAT_HWCAP_ASIMDDP (1 << 24)
+#define COMPAT_HWCAP_ASIMDFHM (1 << 25)
+#define COMPAT_HWCAP_ASIMDBF16 (1 << 26)
+#define COMPAT_HWCAP_I8MM (1 << 27)
#define COMPAT_HWCAP2_AES (1 << 0)
#define COMPAT_HWCAP2_PMULL (1 << 1)
#define COMPAT_HWCAP2_SHA1 (1 << 2)
#define COMPAT_HWCAP2_SHA2 (1 << 3)
#define COMPAT_HWCAP2_CRC32 (1 << 4)
+#define COMPAT_HWCAP2_SB (1 << 5)
+#define COMPAT_HWCAP2_SSBS (1 << 6)
#ifndef __ASSEMBLY__
#include <linux/log2.h>
@@ -123,6 +131,12 @@
#define KERNEL_HWCAP_CSSC __khwcap2_feature(CSSC)
#define KERNEL_HWCAP_RPRFM __khwcap2_feature(RPRFM)
#define KERNEL_HWCAP_SVE2P1 __khwcap2_feature(SVE2P1)
+#define KERNEL_HWCAP_SME2 __khwcap2_feature(SME2)
+#define KERNEL_HWCAP_SME2P1 __khwcap2_feature(SME2P1)
+#define KERNEL_HWCAP_SME_I16I32 __khwcap2_feature(SME_I16I32)
+#define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32)
+#define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16)
+#define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index aaf1f52fbf3e..139a88e4e852 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -420,6 +420,7 @@ __AARCH64_INSN_FUNCS(sb, 0xFFFFFFFF, 0xD50330FF)
__AARCH64_INSN_FUNCS(clrex, 0xFFFFF0FF, 0xD503305F)
__AARCH64_INSN_FUNCS(ssbb, 0xFFFFFFFF, 0xD503309F)
__AARCH64_INSN_FUNCS(pssbb, 0xFFFFFFFF, 0xD503349F)
+__AARCH64_INSN_FUNCS(bti, 0xFFFFFF3F, 0xD503241f)
#undef __AARCH64_INSN_FUNCS
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index b57b9b1e4344..e0f5f6b73edd 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -21,43 +21,77 @@
* exceptions should be unmasked.
*/
-/*
- * CPU interrupt mask handling.
- */
-static inline void arch_local_irq_enable(void)
+static __always_inline bool __irqflags_uses_pmr(void)
{
- if (system_has_prio_mask_debugging()) {
- u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+ return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
+ alternative_has_feature_unlikely(ARM64_HAS_GIC_PRIO_MASKING);
+}
+static __always_inline void __daif_local_irq_enable(void)
+{
+ barrier();
+ asm volatile("msr daifclr, #3");
+ barrier();
+}
+
+static __always_inline void __pmr_local_irq_enable(void)
+{
+ if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) {
+ u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
}
- asm volatile(ALTERNATIVE(
- "msr daifclr, #3 // arch_local_irq_enable",
- __msr_s(SYS_ICC_PMR_EL1, "%0"),
- ARM64_HAS_IRQ_PRIO_MASKING)
- :
- : "r" ((unsigned long) GIC_PRIO_IRQON)
- : "memory");
-
+ barrier();
+ write_sysreg_s(GIC_PRIO_IRQON, SYS_ICC_PMR_EL1);
pmr_sync();
+ barrier();
}
-static inline void arch_local_irq_disable(void)
+static inline void arch_local_irq_enable(void)
{
- if (system_has_prio_mask_debugging()) {
- u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+ if (__irqflags_uses_pmr()) {
+ __pmr_local_irq_enable();
+ } else {
+ __daif_local_irq_enable();
+ }
+}
+static __always_inline void __daif_local_irq_disable(void)
+{
+ barrier();
+ asm volatile("msr daifset, #3");
+ barrier();
+}
+
+static __always_inline void __pmr_local_irq_disable(void)
+{
+ if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) {
+ u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
}
- asm volatile(ALTERNATIVE(
- "msr daifset, #3 // arch_local_irq_disable",
- __msr_s(SYS_ICC_PMR_EL1, "%0"),
- ARM64_HAS_IRQ_PRIO_MASKING)
- :
- : "r" ((unsigned long) GIC_PRIO_IRQOFF)
- : "memory");
+ barrier();
+ write_sysreg_s(GIC_PRIO_IRQOFF, SYS_ICC_PMR_EL1);
+ barrier();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+ if (__irqflags_uses_pmr()) {
+ __pmr_local_irq_disable();
+ } else {
+ __daif_local_irq_disable();
+ }
+}
+
+static __always_inline unsigned long __daif_local_save_flags(void)
+{
+ return read_sysreg(daif);
+}
+
+static __always_inline unsigned long __pmr_local_save_flags(void)
+{
+ return read_sysreg_s(SYS_ICC_PMR_EL1);
}
/*
@@ -65,69 +99,108 @@ static inline void arch_local_irq_disable(void)
*/
static inline unsigned long arch_local_save_flags(void)
{
- unsigned long flags;
+ if (__irqflags_uses_pmr()) {
+ return __pmr_local_save_flags();
+ } else {
+ return __daif_local_save_flags();
+ }
+}
- asm volatile(ALTERNATIVE(
- "mrs %0, daif",
- __mrs_s("%0", SYS_ICC_PMR_EL1),
- ARM64_HAS_IRQ_PRIO_MASKING)
- : "=&r" (flags)
- :
- : "memory");
+static __always_inline bool __daif_irqs_disabled_flags(unsigned long flags)
+{
+ return flags & PSR_I_BIT;
+}
- return flags;
+static __always_inline bool __pmr_irqs_disabled_flags(unsigned long flags)
+{
+ return flags != GIC_PRIO_IRQON;
}
-static inline int arch_irqs_disabled_flags(unsigned long flags)
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
{
- int res;
+ if (__irqflags_uses_pmr()) {
+ return __pmr_irqs_disabled_flags(flags);
+ } else {
+ return __daif_irqs_disabled_flags(flags);
+ }
+}
- asm volatile(ALTERNATIVE(
- "and %w0, %w1, #" __stringify(PSR_I_BIT),
- "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQON),
- ARM64_HAS_IRQ_PRIO_MASKING)
- : "=&r" (res)
- : "r" ((int) flags)
- : "memory");
+static __always_inline bool __daif_irqs_disabled(void)
+{
+ return __daif_irqs_disabled_flags(__daif_local_save_flags());
+}
- return res;
+static __always_inline bool __pmr_irqs_disabled(void)
+{
+ return __pmr_irqs_disabled_flags(__pmr_local_save_flags());
}
-static inline int arch_irqs_disabled(void)
+static inline bool arch_irqs_disabled(void)
{
- return arch_irqs_disabled_flags(arch_local_save_flags());
+ if (__irqflags_uses_pmr()) {
+ return __pmr_irqs_disabled();
+ } else {
+ return __daif_irqs_disabled();
+ }
}
-static inline unsigned long arch_local_irq_save(void)
+static __always_inline unsigned long __daif_local_irq_save(void)
{
- unsigned long flags;
+ unsigned long flags = __daif_local_save_flags();
+
+ __daif_local_irq_disable();
+
+ return flags;
+}
- flags = arch_local_save_flags();
+static __always_inline unsigned long __pmr_local_irq_save(void)
+{
+ unsigned long flags = __pmr_local_save_flags();
/*
* There are too many states with IRQs disabled, just keep the current
* state if interrupts are already disabled/masked.
*/
- if (!arch_irqs_disabled_flags(flags))
- arch_local_irq_disable();
+ if (!__pmr_irqs_disabled_flags(flags))
+ __pmr_local_irq_disable();
return flags;
}
+static inline unsigned long arch_local_irq_save(void)
+{
+ if (__irqflags_uses_pmr()) {
+ return __pmr_local_irq_save();
+ } else {
+ return __daif_local_irq_save();
+ }
+}
+
+static __always_inline void __daif_local_irq_restore(unsigned long flags)
+{
+ barrier();
+ write_sysreg(flags, daif);
+ barrier();
+}
+
+static __always_inline void __pmr_local_irq_restore(unsigned long flags)
+{
+ barrier();
+ write_sysreg_s(flags, SYS_ICC_PMR_EL1);
+ pmr_sync();
+ barrier();
+}
+
/*
* restore saved IRQ state
*/
static inline void arch_local_irq_restore(unsigned long flags)
{
- asm volatile(ALTERNATIVE(
- "msr daif, %0",
- __msr_s(SYS_ICC_PMR_EL1, "%0"),
- ARM64_HAS_IRQ_PRIO_MASKING)
- :
- : "r" (flags)
- : "memory");
-
- pmr_sync();
+ if (__irqflags_uses_pmr()) {
+ __pmr_local_irq_restore(flags);
+ } else {
+ __daif_local_irq_restore(flags);
+ }
}
#endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index 1436fa1cde24..d3acd9c87509 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -5,8 +5,8 @@
#include <asm/assembler.h>
#endif
-#define __ALIGN .align 2
-#define __ALIGN_STR ".align 2"
+#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT
+#define __ALIGN_STR ".balign " #CONFIG_FUNCTION_ALIGNMENT
/*
* When using in-kernel BTI we need to ensure that PCS-conformant
diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
index 6bf5adc56295..68908b82b168 100644
--- a/arch/arm64/include/asm/patching.h
+++ b/arch/arm64/include/asm/patching.h
@@ -7,6 +7,8 @@
int aarch64_insn_read(void *addr, u32 *insnp);
int aarch64_insn_write(void *addr, u32 insn);
+int aarch64_insn_write_literal_u64(void *addr, u64 val);
+
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 65e78999c75d..27455bfd64bc 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -275,6 +275,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
}
extern void __sync_icache_dcache(pte_t pteval);
+bool pgattr_change_is_safe(u64 old, u64 new);
/*
* PTE bits configuration in the presence of hardware Dirty Bit Management
@@ -292,7 +293,7 @@ extern void __sync_icache_dcache(pte_t pteval);
* PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
*/
-static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
+static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
pte_t pte)
{
pte_t old_pte;
@@ -318,6 +319,9 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
"%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(old_pte), pte_val(pte));
+ VM_WARN_ONCE(!pgattr_change_is_safe(pte_val(old_pte), pte_val(pte)),
+ "%s: unsafe attribute change: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(old_pte), pte_val(pte));
}
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -346,7 +350,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
mte_sync_tags(old_pte, pte);
}
- __check_racy_pte_update(mm, ptep, pte);
+ __check_safe_pte_update(mm, ptep, pte);
set_pte(ptep, pte);
}
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index d51b32a69309..3918f2a67970 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -161,7 +161,7 @@ struct thread_struct {
enum fp_type fp_type; /* registers FPSIMD or SVE? */
unsigned int fpsimd_cpu;
void *sve_state; /* SVE registers, if any */
- void *za_state; /* ZA register, if any */
+ void *sme_state; /* ZA and ZT state, if any */
unsigned int vl[ARM64_VEC_MAX]; /* vector length */
unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */
unsigned long fault_address; /* fault info */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 41b332c054ab..47ec58031f11 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -194,7 +194,7 @@ struct pt_regs {
u32 unused2;
#endif
u64 sdei_ttbr1;
- /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */
+ /* Only valid when ARM64_HAS_GIC_PRIO_MASKING is enabled. */
u64 pmr_save;
u64 stackframe[2];
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index ff7da1268a52..13df982a0808 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -10,15 +10,16 @@
#ifdef CONFIG_SHADOW_CALL_STACK
scs_sp .req x18
- .macro scs_load tsk
- ldr scs_sp, [\tsk, #TSK_TI_SCS_SP]
+ .macro scs_load_current
+ get_current_task scs_sp
+ ldr scs_sp, [scs_sp, #TSK_TI_SCS_SP]
.endm
.macro scs_save tsk
str scs_sp, [\tsk, #TSK_TI_SCS_SP]
.endm
#else
- .macro scs_load tsk
+ .macro scs_load_current
.endm
.macro scs_save tsk
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 1312fb48f18b..043ecc3405e7 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -216,101 +216,22 @@
#define SYS_PAR_EL1_FST GENMASK(6, 1)
/*** Statistical Profiling Extension ***/
-/* ID registers */
-#define SYS_PMSIDR_EL1 sys_reg(3, 0, 9, 9, 7)
-#define SYS_PMSIDR_EL1_FE_SHIFT 0
-#define SYS_PMSIDR_EL1_FT_SHIFT 1
-#define SYS_PMSIDR_EL1_FL_SHIFT 2
-#define SYS_PMSIDR_EL1_ARCHINST_SHIFT 3
-#define SYS_PMSIDR_EL1_LDS_SHIFT 4
-#define SYS_PMSIDR_EL1_ERND_SHIFT 5
-#define SYS_PMSIDR_EL1_INTERVAL_SHIFT 8
-#define SYS_PMSIDR_EL1_INTERVAL_MASK 0xfUL
-#define SYS_PMSIDR_EL1_MAXSIZE_SHIFT 12
-#define SYS_PMSIDR_EL1_MAXSIZE_MASK 0xfUL
-#define SYS_PMSIDR_EL1_COUNTSIZE_SHIFT 16
-#define SYS_PMSIDR_EL1_COUNTSIZE_MASK 0xfUL
-
-#define SYS_PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7)
-#define SYS_PMBIDR_EL1_ALIGN_SHIFT 0
-#define SYS_PMBIDR_EL1_ALIGN_MASK 0xfU
-#define SYS_PMBIDR_EL1_P_SHIFT 4
-#define SYS_PMBIDR_EL1_F_SHIFT 5
-
-/* Sampling controls */
-#define SYS_PMSCR_EL1 sys_reg(3, 0, 9, 9, 0)
-#define SYS_PMSCR_EL1_E0SPE_SHIFT 0
-#define SYS_PMSCR_EL1_E1SPE_SHIFT 1
-#define SYS_PMSCR_EL1_CX_SHIFT 3
-#define SYS_PMSCR_EL1_PA_SHIFT 4
-#define SYS_PMSCR_EL1_TS_SHIFT 5
-#define SYS_PMSCR_EL1_PCT_SHIFT 6
-
-#define SYS_PMSCR_EL2 sys_reg(3, 4, 9, 9, 0)
-#define SYS_PMSCR_EL2_E0HSPE_SHIFT 0
-#define SYS_PMSCR_EL2_E2SPE_SHIFT 1
-#define SYS_PMSCR_EL2_CX_SHIFT 3
-#define SYS_PMSCR_EL2_PA_SHIFT 4
-#define SYS_PMSCR_EL2_TS_SHIFT 5
-#define SYS_PMSCR_EL2_PCT_SHIFT 6
-
-#define SYS_PMSICR_EL1 sys_reg(3, 0, 9, 9, 2)
-
-#define SYS_PMSIRR_EL1 sys_reg(3, 0, 9, 9, 3)
-#define SYS_PMSIRR_EL1_RND_SHIFT 0
-#define SYS_PMSIRR_EL1_INTERVAL_SHIFT 8
-#define SYS_PMSIRR_EL1_INTERVAL_MASK 0xffffffUL
-
-/* Filtering controls */
-#define SYS_PMSNEVFR_EL1 sys_reg(3, 0, 9, 9, 1)
-
-#define SYS_PMSFCR_EL1 sys_reg(3, 0, 9, 9, 4)
-#define SYS_PMSFCR_EL1_FE_SHIFT 0
-#define SYS_PMSFCR_EL1_FT_SHIFT 1
-#define SYS_PMSFCR_EL1_FL_SHIFT 2
-#define SYS_PMSFCR_EL1_B_SHIFT 16
-#define SYS_PMSFCR_EL1_LD_SHIFT 17
-#define SYS_PMSFCR_EL1_ST_SHIFT 18
-
-#define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5)
-#define SYS_PMSEVFR_EL1_RES0_8_2 \
+#define PMSEVFR_EL1_RES0_IMP \
(GENMASK_ULL(47, 32) | GENMASK_ULL(23, 16) | GENMASK_ULL(11, 8) |\
BIT_ULL(6) | BIT_ULL(4) | BIT_ULL(2) | BIT_ULL(0))
-#define SYS_PMSEVFR_EL1_RES0_8_3 \
- (SYS_PMSEVFR_EL1_RES0_8_2 & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11)))
-
-#define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6)
-#define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0
-
-/* Buffer controls */
-#define SYS_PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0)
-#define SYS_PMBLIMITR_EL1_E_SHIFT 0
-#define SYS_PMBLIMITR_EL1_FM_SHIFT 1
-#define SYS_PMBLIMITR_EL1_FM_MASK 0x3UL
-#define SYS_PMBLIMITR_EL1_FM_STOP_IRQ (0 << SYS_PMBLIMITR_EL1_FM_SHIFT)
-
-#define SYS_PMBPTR_EL1 sys_reg(3, 0, 9, 10, 1)
+#define PMSEVFR_EL1_RES0_V1P1 \
+ (PMSEVFR_EL1_RES0_IMP & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11)))
+#define PMSEVFR_EL1_RES0_V1P2 \
+ (PMSEVFR_EL1_RES0_V1P1 & ~BIT_ULL(6))
/* Buffer error reporting */
-#define SYS_PMBSR_EL1 sys_reg(3, 0, 9, 10, 3)
-#define SYS_PMBSR_EL1_COLL_SHIFT 16
-#define SYS_PMBSR_EL1_S_SHIFT 17
-#define SYS_PMBSR_EL1_EA_SHIFT 18
-#define SYS_PMBSR_EL1_DL_SHIFT 19
-#define SYS_PMBSR_EL1_EC_SHIFT 26
-#define SYS_PMBSR_EL1_EC_MASK 0x3fUL
-
-#define SYS_PMBSR_EL1_EC_BUF (0x0UL << SYS_PMBSR_EL1_EC_SHIFT)
-#define SYS_PMBSR_EL1_EC_FAULT_S1 (0x24UL << SYS_PMBSR_EL1_EC_SHIFT)
-#define SYS_PMBSR_EL1_EC_FAULT_S2 (0x25UL << SYS_PMBSR_EL1_EC_SHIFT)
-
-#define SYS_PMBSR_EL1_FAULT_FSC_SHIFT 0
-#define SYS_PMBSR_EL1_FAULT_FSC_MASK 0x3fUL
+#define PMBSR_EL1_FAULT_FSC_SHIFT PMBSR_EL1_MSS_SHIFT
+#define PMBSR_EL1_FAULT_FSC_MASK PMBSR_EL1_MSS_MASK
-#define SYS_PMBSR_EL1_BUF_BSC_SHIFT 0
-#define SYS_PMBSR_EL1_BUF_BSC_MASK 0x3fUL
+#define PMBSR_EL1_BUF_BSC_SHIFT PMBSR_EL1_MSS_SHIFT
+#define PMBSR_EL1_BUF_BSC_MASK PMBSR_EL1_MSS_MASK
-#define SYS_PMBSR_EL1_BUF_BSC_FULL (0x1UL << SYS_PMBSR_EL1_BUF_BSC_SHIFT)
+#define PMBSR_EL1_BUF_BSC_FULL 0x1UL
/*** End of Statistical Profiling Extension ***/
@@ -575,6 +496,7 @@
#define SCTLR_ELx_DSSBS (BIT(44))
#define SCTLR_ELx_ATA (BIT(43))
+#define SCTLR_ELx_EE_SHIFT 25
#define SCTLR_ELx_ENIA_SHIFT 31
#define SCTLR_ELx_ITFSB (BIT(37))
@@ -583,7 +505,7 @@
#define SCTLR_ELx_LSMAOE (BIT(29))
#define SCTLR_ELx_nTLSMD (BIT(28))
#define SCTLR_ELx_ENDA (BIT(27))
-#define SCTLR_ELx_EE (BIT(25))
+#define SCTLR_ELx_EE (BIT(SCTLR_ELx_EE_SHIFT))
#define SCTLR_ELx_EIS (BIT(22))
#define SCTLR_ELx_IESB (BIT(21))
#define SCTLR_ELx_TSCXT (BIT(20))
@@ -809,8 +731,8 @@
#define ARM64_FEATURE_FIELD_BITS 4
-/* Create a mask for the feature bits of the specified feature. */
-#define ARM64_FEATURE_MASK(x) (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
+/* Defined for compatibility only, do not add new users. */
+#define ARM64_FEATURE_MASK(x) (x##_MASK)
#ifdef __ASSEMBLY__
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index b713d30544f1..69a4fb749c65 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -96,5 +96,11 @@
#define HWCAP2_CSSC (1UL << 34)
#define HWCAP2_RPRFM (1UL << 35)
#define HWCAP2_SVE2P1 (1UL << 36)
+#define HWCAP2_SME2 (1UL << 37)
+#define HWCAP2_SME2P1 (1UL << 38)
+#define HWCAP2_SME_I16I32 (1UL << 39)
+#define HWCAP2_SME_BI32I32 (1UL << 40)
+#define HWCAP2_SME_B16B16 (1UL << 41)
+#define HWCAP2_SME_F16F16 (1UL << 42)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 9525041e4a14..656a10ea6c67 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -144,6 +144,14 @@ struct sve_context {
#define SVE_SIG_FLAG_SM 0x1 /* Context describes streaming mode */
+/* TPIDR2_EL0 context */
+#define TPIDR2_MAGIC 0x54504902
+
+struct tpidr2_context {
+ struct _aarch64_ctx head;
+ __u64 tpidr2;
+};
+
#define ZA_MAGIC 0x54366345
struct za_context {
@@ -152,6 +160,14 @@ struct za_context {
__u16 __reserved[3];
};
+#define ZT_MAGIC 0x5a544e01
+
+struct zt_context {
+ struct _aarch64_ctx head;
+ __u16 nregs;
+ __u16 __reserved[3];
+};
+
#endif /* !__ASSEMBLY__ */
#include <asm/sve_context.h>
@@ -304,4 +320,15 @@ struct za_context {
#define ZA_SIG_CONTEXT_SIZE(vq) \
(ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq))
+#define ZT_SIG_REG_SIZE 512
+
+#define ZT_SIG_REG_BYTES (ZT_SIG_REG_SIZE / 8)
+
+#define ZT_SIG_REGS_OFFSET sizeof(struct zt_context)
+
+#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * n)
+
+#define ZT_SIG_CONTEXT_SIZE(n) \
+ (sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n))
+
#endif /* _UAPI__ASM_SIGCONTEXT_H */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 2234624536d9..ae345b06e9f7 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -9,6 +9,7 @@
#include <linux/arm_sdei.h>
#include <linux/sched.h>
+#include <linux/ftrace.h>
#include <linux/kexec.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
@@ -194,5 +195,8 @@ int main(void)
DEFINE(KIMAGE_START, offsetof(struct kimage, start));
BLANK();
#endif
+#ifdef CONFIG_FUNCTION_TRACER
+ DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func));
+#endif
return 0;
}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a77315b338e6..45a42cf2191c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -65,6 +65,7 @@
#include <linux/bsearch.h>
#include <linux/cpumask.h>
#include <linux/crash_dump.h>
+#include <linux/kstrtox.h>
#include <linux/sort.h>
#include <linux/stop_machine.h>
#include <linux/sysfs.h>
@@ -283,16 +284,26 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0),
ARM64_FTR_END,
};
@@ -444,8 +455,8 @@ static const struct arm64_ftr_bits ftr_mvfr0[] = {
static const struct arm64_ftr_bits ftr_mvfr1[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDFMAC_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPHP_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDHP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPHP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDHP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDSP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDInt_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDLS_SHIFT, 4, 0),
@@ -529,12 +540,12 @@ static const struct arm64_ftr_bits ftr_id_mmfr5[] = {
};
static const struct arm64_ftr_bits ftr_id_isar6[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_I8MM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_BF16_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_I8MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_BF16_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_SPECRES_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_SB_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_FHM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_DP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_SB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_FHM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_DP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_JSCVT_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -562,7 +573,7 @@ static const struct arm64_ftr_bits ftr_id_pfr1[] = {
};
static const struct arm64_ftr_bits ftr_id_pfr2[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_EL1_SSBS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_EL1_SSBS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_EL1_CSV3_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -1795,7 +1806,7 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
static int __init parse_kpti(char *str)
{
bool enabled;
- int ret = strtobool(str, &enabled);
+ int ret = kstrtobool(str, &enabled);
if (ret)
return ret;
@@ -2039,14 +2050,50 @@ static bool enable_pseudo_nmi;
static int __init early_enable_pseudo_nmi(char *p)
{
- return strtobool(p, &enable_pseudo_nmi);
+ return kstrtobool(p, &enable_pseudo_nmi);
}
early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi);
static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry,
int scope)
{
- return enable_pseudo_nmi && has_useable_gicv3_cpuif(entry, scope);
+ /*
+ * ARM64_HAS_GIC_CPUIF_SYSREGS has a lower index, and is a boot CPU
+ * feature, so will be detected earlier.
+ */
+ BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GIC_CPUIF_SYSREGS);
+ if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS))
+ return false;
+
+ return enable_pseudo_nmi;
+}
+
+static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ /*
+ * If we're not using priority masking then we won't be poking PMR_EL1,
+ * and there's no need to relax synchronization of writes to it, and
+ * ICC_CTLR_EL1 might not be accessible and we must avoid reads from
+ * that.
+ *
+ * ARM64_HAS_GIC_PRIO_MASKING has a lower index, and is a boot CPU
+ * feature, so will be detected earlier.
+ */
+ BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_RELAXED_SYNC <= ARM64_HAS_GIC_PRIO_MASKING);
+ if (!cpus_have_cap(ARM64_HAS_GIC_PRIO_MASKING))
+ return false;
+
+ /*
+ * When Priority Mask Hint Enable (PMHE) == 0b0, PMR is not used as a
+ * hint for interrupt distribution, a DSB is not necessary when
+ * unmasking IRQs via PMR, and we can relax the barrier to a NOP.
+ *
+ * Linux itself doesn't use 1:N distribution, so has no need to
+ * set PMHE. The only reason to have it set is if EL3 requires it
+ * (and we can't change it).
+ */
+ return (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK) == 0;
}
#endif
@@ -2142,7 +2189,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
},
{
.desc = "GIC system register CPU interface",
- .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
+ .capability = ARM64_HAS_GIC_CPUIF_SYSREGS,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = has_useable_gicv3_cpuif,
.sys_reg = SYS_ID_AA64PFR0_EL1,
@@ -2534,14 +2581,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
* Depends on having GICv3
*/
.desc = "IRQ priority masking",
- .capability = ARM64_HAS_IRQ_PRIO_MASKING,
+ .capability = ARM64_HAS_GIC_PRIO_MASKING,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = can_use_gic_priorities,
- .sys_reg = SYS_ID_AA64PFR0_EL1,
- .field_pos = ID_AA64PFR0_EL1_GIC_SHIFT,
- .field_width = 4,
- .sign = FTR_UNSIGNED,
- .min_field_value = 1,
+ },
+ {
+ /*
+ * Depends on ARM64_HAS_GIC_PRIO_MASKING
+ */
+ .capability = ARM64_HAS_GIC_PRIO_RELAXED_SYNC,
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .matches = has_gic_prio_relaxed_sync,
},
#endif
#ifdef CONFIG_ARM64_E0PD
@@ -2649,6 +2699,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.cpu_enable = fa64_kernel_enable,
},
+ {
+ .desc = "SME2",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_SME2,
+ .sys_reg = SYS_ID_AA64PFR1_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64PFR1_EL1_SME_SHIFT,
+ .field_width = ID_AA64PFR1_EL1_SME_WIDTH,
+ .min_field_value = ID_AA64PFR1_EL1_SME_SME2,
+ .matches = has_cpuid_feature,
+ .cpu_enable = sme2_kernel_enable,
+ },
#endif /* CONFIG_ARM64_SME */
{
.desc = "WFx with timeout",
@@ -2688,13 +2750,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
{},
};
-#define HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \
+#define HWCAP_CPUID_MATCH(reg, field, min_value) \
.matches = has_user_cpuid_feature, \
- .sys_reg = reg, \
- .field_pos = field, \
- .field_width = width, \
- .sign = s, \
- .min_field_value = min_value,
+ .sys_reg = SYS_##reg, \
+ .field_pos = reg##_##field##_SHIFT, \
+ .field_width = reg##_##field##_WIDTH, \
+ .sign = reg##_##field##_SIGNED, \
+ .min_field_value = reg##_##field##_##min_value,
#define __HWCAP_CAP(name, cap_type, cap) \
.desc = name, \
@@ -2702,10 +2764,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.hwcap_type = cap_type, \
.hwcap = cap, \
-#define HWCAP_CAP(reg, field, width, s, min_value, cap_type, cap) \
+#define HWCAP_CAP(reg, field, min_value, cap_type, cap) \
{ \
__HWCAP_CAP(#cap, cap_type, cap) \
- HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \
+ HWCAP_CPUID_MATCH(reg, field, min_value) \
}
#define HWCAP_MULTI_CAP(list, cap_type, cap) \
@@ -2724,115 +2786,114 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
#ifdef CONFIG_ARM64_PTR_AUTH
static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_APA_SHIFT,
- 4, FTR_UNSIGNED,
- ID_AA64ISAR1_EL1_APA_PAuth)
+ HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, APA, PAuth)
},
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_APA3_SHIFT,
- 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_APA3_PAuth)
+ HWCAP_CPUID_MATCH(ID_AA64ISAR2_EL1, APA3, PAuth)
},
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_API_SHIFT,
- 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_API_PAuth)
+ HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, API, PAuth)
},
{},
};
static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_GPA_SHIFT,
- 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_GPA_IMP)
+ HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, GPA, IMP)
},
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_GPA3_SHIFT,
- 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_GPA3_IMP)
+ HWCAP_CPUID_MATCH(ID_AA64ISAR2_EL1, GPA3, IMP)
},
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_GPI_SHIFT,
- 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_GPI_IMP)
+ HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, GPI, IMP)
},
{},
};
#endif
static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_ATOMIC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_EBF16),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
- HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_EL1_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, SHA1, IMP, CAP_HWCAP, KERNEL_HWCAP_SHA1),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, SHA2, SHA256, CAP_HWCAP, KERNEL_HWCAP_SHA2),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, SHA2, SHA512, CAP_HWCAP, KERNEL_HWCAP_SHA512),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, CRC32, IMP, CAP_HWCAP, KERNEL_HWCAP_CRC32),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, ATOMIC, IMP, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, RDM, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SHA3),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, SM3, IMP, CAP_HWCAP, KERNEL_HWCAP_SM3),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SM4),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, DP, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, FHM, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, RNDR, IMP, CAP_HWCAP, KERNEL_HWCAP_RNG),
+ HWCAP_CAP(ID_AA64PFR0_EL1, FP, IMP, CAP_HWCAP, KERNEL_HWCAP_FP),
+ HWCAP_CAP(ID_AA64PFR0_EL1, FP, FP16, CAP_HWCAP, KERNEL_HWCAP_FPHP),
+ HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
+ HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, FP16, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
+ HWCAP_CAP(ID_AA64PFR0_EL1, DIT, IMP, CAP_HWCAP, KERNEL_HWCAP_DIT),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, IMP, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, DPB2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, JSCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, FCMA, IMP, CAP_HWCAP, KERNEL_HWCAP_FCMA),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, IMP, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, LRCPC2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, FRINTTS, IMP, CAP_HWCAP, KERNEL_HWCAP_FRINT),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, SB, IMP, CAP_HWCAP, KERNEL_HWCAP_SB),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_BF16),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM),
+ HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_EL1_SVE_IMP, CAP_HWCAP, KERNEL_HWCAP_SVE),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SVEver_SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SVEver_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BitPerm_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BF16_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BF16_EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SHA3_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SM4_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_I8MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F32MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F64MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+ HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
#endif
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SSBS_SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS),
+ HWCAP_CAP(ID_AA64PFR1_EL1, SSBS, SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS),
#ifdef CONFIG_ARM64_BTI
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_BT_IMP, CAP_HWCAP, KERNEL_HWCAP_BTI),
+ HWCAP_CAP(ID_AA64PFR1_EL1, BT, IMP, CAP_HWCAP, KERNEL_HWCAP_BTI),
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
#endif
#ifdef CONFIG_ARM64_MTE
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE_MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE),
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE_MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3),
+ HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE),
+ HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3),
#endif /* CONFIG_ARM64_MTE */
- HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
- HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
- HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_CSSC_IMP, CAP_HWCAP, KERNEL_HWCAP_CSSC),
- HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_RPRFM_IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM),
- HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
- HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_WFxT_IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
+ HWCAP_CAP(ID_AA64MMFR0_EL1, ECV, IMP, CAP_HWCAP, KERNEL_HWCAP_ECV),
+ HWCAP_CAP(ID_AA64MMFR1_EL1, AFP, IMP, CAP_HWCAP, KERNEL_HWCAP_AFP),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, IMP, CAP_HWCAP, KERNEL_HWCAP_CSSC),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
#ifdef CONFIG_ARM64_SME
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SME_IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I8I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F32F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+ HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
#endif /* CONFIG_ARM64_SME */
{},
};
@@ -2862,15 +2923,23 @@ static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope)
static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
#ifdef CONFIG_COMPAT
HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON),
- HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_EL1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
+ HWCAP_CAP(MVFR1_EL1, SIMDFMAC, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
/* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */
- HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_EL1_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
- HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_EL1_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+ HWCAP_CAP(MVFR0_EL1, FPDP, VFPv3, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
+ HWCAP_CAP(MVFR0_EL1, FPDP, VFPv3, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
+ HWCAP_CAP(MVFR1_EL1, FPHP, FP16, CAP_COMPAT_HWCAP, COMPAT_HWCAP_FPHP),
+ HWCAP_CAP(MVFR1_EL1, SIMDHP, SIMDHP_FLOAT, CAP_COMPAT_HWCAP, COMPAT_HWCAP_ASIMDHP),
+ HWCAP_CAP(ID_ISAR5_EL1, AES, VMULL, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+ HWCAP_CAP(ID_ISAR5_EL1, AES, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+ HWCAP_CAP(ID_ISAR5_EL1, SHA1, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+ HWCAP_CAP(ID_ISAR5_EL1, SHA2, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+ HWCAP_CAP(ID_ISAR5_EL1, CRC32, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+ HWCAP_CAP(ID_ISAR6_EL1, DP, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_ASIMDDP),
+ HWCAP_CAP(ID_ISAR6_EL1, FHM, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_ASIMDFHM),
+ HWCAP_CAP(ID_ISAR6_EL1, SB, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SB),
+ HWCAP_CAP(ID_ISAR6_EL1, BF16, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_ASIMDBF16),
+ HWCAP_CAP(ID_ISAR6_EL1, I8MM, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_I8MM),
+ HWCAP_CAP(ID_PFR2_EL1, SSBS, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SSBS),
#endif
{},
};
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 379695262b77..eb4378c23b3c 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -119,6 +119,12 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_CSSC] = "cssc",
[KERNEL_HWCAP_RPRFM] = "rprfm",
[KERNEL_HWCAP_SVE2P1] = "sve2p1",
+ [KERNEL_HWCAP_SME2] = "sme2",
+ [KERNEL_HWCAP_SME2P1] = "sme2p1",
+ [KERNEL_HWCAP_SME_I16I32] = "smei16i32",
+ [KERNEL_HWCAP_SME_BI32I32] = "smebi32i32",
+ [KERNEL_HWCAP_SME_B16B16] = "smeb16b16",
+ [KERNEL_HWCAP_SME_F16F16] = "smef16f16",
};
#ifdef CONFIG_COMPAT
@@ -146,6 +152,12 @@ static const char *const compat_hwcap_str[] = {
[COMPAT_KERNEL_HWCAP(VFPD32)] = NULL, /* Not possible on arm64 */
[COMPAT_KERNEL_HWCAP(LPAE)] = "lpae",
[COMPAT_KERNEL_HWCAP(EVTSTRM)] = "evtstrm",
+ [COMPAT_KERNEL_HWCAP(FPHP)] = "fphp",
+ [COMPAT_KERNEL_HWCAP(ASIMDHP)] = "asimdhp",
+ [COMPAT_KERNEL_HWCAP(ASIMDDP)] = "asimddp",
+ [COMPAT_KERNEL_HWCAP(ASIMDFHM)] = "asimdfhm",
+ [COMPAT_KERNEL_HWCAP(ASIMDBF16)] = "asimdbf16",
+ [COMPAT_KERNEL_HWCAP(I8MM)] = "i8mm",
};
#define COMPAT_KERNEL_HWCAP2(x) const_ilog2(COMPAT_HWCAP2_ ## x)
@@ -155,6 +167,8 @@ static const char *const compat_hwcap2_str[] = {
[COMPAT_KERNEL_HWCAP2(SHA1)] = "sha1",
[COMPAT_KERNEL_HWCAP2(SHA2)] = "sha2",
[COMPAT_KERNEL_HWCAP2(CRC32)] = "crc32",
+ [COMPAT_KERNEL_HWCAP2(SB)] = "sb",
+ [COMPAT_KERNEL_HWCAP2(SSBS)] = "ssbs",
};
#endif /* CONFIG_COMPAT */
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 229436f33df5..6325db1a2179 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -100,25 +100,35 @@ SYM_FUNC_START(sme_set_vq)
SYM_FUNC_END(sme_set_vq)
/*
- * Save the SME state
+ * Save the ZA and ZT state
*
* x0 - pointer to buffer for state
+ * x1 - number of ZT registers to save
*/
-SYM_FUNC_START(za_save_state)
- _sme_rdsvl 1, 1 // x1 = VL/8
- sme_save_za 0, x1, 12
+SYM_FUNC_START(sme_save_state)
+ _sme_rdsvl 2, 1 // x2 = VL/8
+ sme_save_za 0, x2, 12 // Leaves x0 pointing to the end of ZA
+
+ cbz x1, 1f
+ _str_zt 0
+1:
ret
-SYM_FUNC_END(za_save_state)
+SYM_FUNC_END(sme_save_state)
/*
- * Load the SME state
+ * Load the ZA and ZT state
*
* x0 - pointer to buffer for state
+ * x1 - number of ZT registers to save
*/
-SYM_FUNC_START(za_load_state)
- _sme_rdsvl 1, 1 // x1 = VL/8
- sme_load_za 0, x1, 12
+SYM_FUNC_START(sme_load_state)
+ _sme_rdsvl 2, 1 // x2 = VL/8
+ sme_load_za 0, x2, 12 // Leaves x0 pointing to the end of ZA
+
+ cbz x1, 1f
+ _ldr_zt 0
+1:
ret
-SYM_FUNC_END(za_load_state)
+SYM_FUNC_END(sme_load_state)
#endif /* CONFIG_ARM64_SME */
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 3b625f76ffba..350ed81324ac 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -65,13 +65,35 @@ SYM_CODE_START(ftrace_caller)
stp x29, x30, [sp, #FREGS_SIZE]
add x29, sp, #FREGS_SIZE
- sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
- mov x1, x9 // parent_ip (callsite's LR)
- ldr_l x2, function_trace_op // op
- mov x3, sp // regs
+ /* Prepare arguments for the the tracer func */
+ sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
+ mov x1, x9 // parent_ip (callsite's LR)
+ mov x3, sp // regs
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ /*
+ * The literal pointer to the ops is at an 8-byte aligned boundary
+ * which is either 12 or 16 bytes before the BL instruction in the call
+ * site. See ftrace_call_adjust() for details.
+ *
+ * Therefore here the LR points at `literal + 16` or `literal + 20`,
+ * and we can find the address of the literal in either case by
+ * aligning to an 8-byte boundary and subtracting 16. We do the
+ * alignment first as this allows us to fold the subtraction into the
+ * LDR.
+ */
+ bic x2, x30, 0x7
+ ldr x2, [x2, #-16] // op
+
+ ldr x4, [x2, #FTRACE_OPS_FUNC] // op->func
+ blr x4 // op->func(ip, parent_ip, op, regs)
+
+#else
+ ldr_l x2, function_trace_op // op
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
- bl ftrace_stub
+ bl ftrace_stub // func(ip, parent_ip, op, regs)
+#endif
/*
* At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 11cb99c4d298..ab2a6e33c052 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -275,7 +275,7 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
alternative_else_nop_endif
1:
- scs_load tsk
+ scs_load_current
.else
add x21, sp, #PT_REGS_SIZE
get_current_task tsk
@@ -311,13 +311,16 @@ alternative_else_nop_endif
.endif
#ifdef CONFIG_ARM64_PSEUDO_NMI
- /* Save pmr */
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+alternative_if_not ARM64_HAS_GIC_PRIO_MASKING
+ b .Lskip_pmr_save\@
+alternative_else_nop_endif
+
mrs_s x20, SYS_ICC_PMR_EL1
str x20, [sp, #S_PMR_SAVE]
mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
msr_s SYS_ICC_PMR_EL1, x20
-alternative_else_nop_endif
+
+.Lskip_pmr_save\@:
#endif
/*
@@ -336,15 +339,19 @@ alternative_else_nop_endif
.endif
#ifdef CONFIG_ARM64_PSEUDO_NMI
- /* Restore pmr */
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+alternative_if_not ARM64_HAS_GIC_PRIO_MASKING
+ b .Lskip_pmr_restore\@
+alternative_else_nop_endif
+
ldr x20, [sp, #S_PMR_SAVE]
msr_s SYS_ICC_PMR_EL1, x20
- mrs_s x21, SYS_ICC_CTLR_EL1
- tbz x21, #6, .L__skip_pmr_sync\@ // Check for ICC_CTLR_EL1.PMHE
- dsb sy // Ensure priority change is seen by redistributor
-.L__skip_pmr_sync\@:
+
+ /* Ensure priority change is seen by redistributor */
+alternative_if_not ARM64_HAS_GIC_PRIO_RELAXED_SYNC
+ dsb sy
alternative_else_nop_endif
+
+.Lskip_pmr_restore\@:
#endif
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
@@ -848,7 +855,7 @@ SYM_FUNC_START(cpu_switch_to)
msr sp_el0, x1
ptrauth_keys_install_kernel x1, x8, x9, x10
scs_save x0
- scs_load x1
+ scs_load_current
ret
SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
@@ -876,19 +883,19 @@ NOKPROBE(ret_from_fork)
*/
SYM_FUNC_START(call_on_irq_stack)
#ifdef CONFIG_SHADOW_CALL_STACK
- stp scs_sp, xzr, [sp, #-16]!
+ get_current_task x16
+ scs_save x16
ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x17
#endif
+
/* Create a frame record to save our LR and SP (implicit in FP) */
stp x29, x30, [sp, #-16]!
mov x29, sp
ldr_this_cpu x16, irq_stack_ptr, x17
- mov x15, #IRQ_STACK_SIZE
- add x16, x16, x15
/* Move to the new stack and call the function there */
- mov sp, x16
+ add sp, x16, #IRQ_STACK_SIZE
blr x1
/*
@@ -897,9 +904,7 @@ SYM_FUNC_START(call_on_irq_stack)
*/
mov sp, x29
ldp x29, x30, [sp], #16
-#ifdef CONFIG_SHADOW_CALL_STACK
- ldp scs_sp, xzr, [sp], #16
-#endif
+ scs_load_current
ret
SYM_FUNC_END(call_on_irq_stack)
NOKPROBE(call_on_irq_stack)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index b6ef1af0122e..692dfefbe0ed 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -299,7 +299,7 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
/*
* TIF_SME controls whether a task can use SME without trapping while
* in userspace, when TIF_SME is set then we must have storage
- * alocated in sve_state and za_state to store the contents of both ZA
+ * alocated in sve_state and sme_state to store the contents of both ZA
* and the SVE registers for both streaming and non-streaming modes.
*
* If both SVCR.ZA and SVCR.SM are disabled then at any point we
@@ -429,7 +429,8 @@ static void task_fpsimd_load(void)
write_sysreg_s(current->thread.svcr, SYS_SVCR);
if (thread_za_enabled(&current->thread))
- za_load_state(current->thread.za_state);
+ sme_load_state(current->thread.sme_state,
+ system_supports_sme2());
if (thread_sm_enabled(&current->thread))
restore_ffr = system_supports_fa64();
@@ -490,7 +491,8 @@ static void fpsimd_save(void)
*svcr = read_sysreg_s(SYS_SVCR);
if (*svcr & SVCR_ZA_MASK)
- za_save_state(last->za_state);
+ sme_save_state(last->sme_state,
+ system_supports_sme2());
/* If we are in streaming mode override regular SVE. */
if (*svcr & SVCR_SM_MASK) {
@@ -1257,30 +1259,30 @@ void fpsimd_release_task(struct task_struct *dead_task)
#ifdef CONFIG_ARM64_SME
/*
- * Ensure that task->thread.za_state is allocated and sufficiently large.
+ * Ensure that task->thread.sme_state is allocated and sufficiently large.
*
* This function should be used only in preparation for replacing
- * task->thread.za_state with new data. The memory is always zeroed
+ * task->thread.sme_state with new data. The memory is always zeroed
* here to prevent stale data from showing through: this is done in
* the interest of testability and predictability, the architecture
* guarantees that when ZA is enabled it will be zeroed.
*/
void sme_alloc(struct task_struct *task)
{
- if (task->thread.za_state) {
- memset(task->thread.za_state, 0, za_state_size(task));
+ if (task->thread.sme_state) {
+ memset(task->thread.sme_state, 0, sme_state_size(task));
return;
}
/* This could potentially be up to 64K. */
- task->thread.za_state =
- kzalloc(za_state_size(task), GFP_KERNEL);
+ task->thread.sme_state =
+ kzalloc(sme_state_size(task), GFP_KERNEL);
}
static void sme_free(struct task_struct *task)
{
- kfree(task->thread.za_state);
- task->thread.za_state = NULL;
+ kfree(task->thread.sme_state);
+ task->thread.sme_state = NULL;
}
void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
@@ -1302,6 +1304,17 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
* This must be called after sme_kernel_enable(), we rely on the
* feature table being sorted to ensure this.
*/
+void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ /* Allow use of ZT0 */
+ write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
+ SYS_SMCR_EL1);
+}
+
+/*
+ * This must be called after sme_kernel_enable(), we rely on the
+ * feature table being sorted to ensure this.
+ */
void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
{
/* Allow use of FA64 */
@@ -1322,7 +1335,6 @@ u64 read_smcr_features(void)
unsigned int vq_max;
sme_kernel_enable(NULL);
- sme_smstart_sm();
/*
* Set the maximum possible VL.
@@ -1332,11 +1344,9 @@ u64 read_smcr_features(void)
smcr = read_sysreg_s(SYS_SMCR_EL1);
smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */
- vq_max = sve_vq_from_vl(sve_get_vl());
+ vq_max = sve_vq_from_vl(sme_get_vl());
smcr |= vq_max - 1; /* set LEN field to maximum effective value */
- sme_smstop_sm();
-
return smcr;
}
@@ -1488,7 +1498,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
sve_alloc(current, false);
sme_alloc(current);
- if (!current->thread.sve_state || !current->thread.za_state) {
+ if (!current->thread.sve_state || !current->thread.sme_state) {
force_sig(SIGKILL);
return;
}
@@ -1609,7 +1619,7 @@ static void fpsimd_flush_thread_vl(enum vec_type type)
void fpsimd_flush_thread(void)
{
void *sve_state = NULL;
- void *za_state = NULL;
+ void *sme_state = NULL;
if (!system_supports_fpsimd())
return;
@@ -1634,8 +1644,8 @@ void fpsimd_flush_thread(void)
clear_thread_flag(TIF_SME);
/* Defer kfree() while in atomic context */
- za_state = current->thread.za_state;
- current->thread.za_state = NULL;
+ sme_state = current->thread.sme_state;
+ current->thread.sme_state = NULL;
fpsimd_flush_thread_vl(ARM64_VEC_SME);
current->thread.svcr = 0;
@@ -1645,7 +1655,7 @@ void fpsimd_flush_thread(void)
put_cpu_fpsimd_context();
kfree(sve_state);
- kfree(za_state);
+ kfree(sme_state);
}
/*
@@ -1711,7 +1721,7 @@ static void fpsimd_bind_task_to_cpu(void)
WARN_ON(!system_supports_fpsimd());
last->st = &current->thread.uw.fpsimd_state;
last->sve_state = current->thread.sve_state;
- last->za_state = current->thread.za_state;
+ last->sme_state = current->thread.sme_state;
last->sve_vl = task_get_sve_vl(current);
last->sme_vl = task_get_sme_vl(current);
last->svcr = &current->thread.svcr;
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index b30b955a8921..5545fe1a9012 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -60,6 +60,89 @@ int ftrace_regs_query_register_offset(const char *name)
}
#endif
+unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ /*
+ * When using mcount, addr is the address of the mcount call
+ * instruction, and no adjustment is necessary.
+ */
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS))
+ return addr;
+
+ /*
+ * When using patchable-function-entry without pre-function NOPS, addr
+ * is the address of the first NOP after the function entry point.
+ *
+ * The compiler has either generated:
+ *
+ * addr+00: func: NOP // To be patched to MOV X9, LR
+ * addr+04: NOP // To be patched to BL <caller>
+ *
+ * Or:
+ *
+ * addr-04: BTI C
+ * addr+00: func: NOP // To be patched to MOV X9, LR
+ * addr+04: NOP // To be patched to BL <caller>
+ *
+ * We must adjust addr to the address of the NOP which will be patched
+ * to `BL <caller>`, which is at `addr + 4` bytes in either case.
+ *
+ */
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return addr + AARCH64_INSN_SIZE;
+
+ /*
+ * When using patchable-function-entry with pre-function NOPs, addr is
+ * the address of the first pre-function NOP.
+ *
+ * Starting from an 8-byte aligned base, the compiler has either
+ * generated:
+ *
+ * addr+00: NOP // Literal (first 32 bits)
+ * addr+04: NOP // Literal (last 32 bits)
+ * addr+08: func: NOP // To be patched to MOV X9, LR
+ * addr+12: NOP // To be patched to BL <caller>
+ *
+ * Or:
+ *
+ * addr+00: NOP // Literal (first 32 bits)
+ * addr+04: NOP // Literal (last 32 bits)
+ * addr+08: func: BTI C
+ * addr+12: NOP // To be patched to MOV X9, LR
+ * addr+16: NOP // To be patched to BL <caller>
+ *
+ * We must adjust addr to the address of the NOP which will be patched
+ * to `BL <caller>`, which is at either addr+12 or addr+16 depending on
+ * whether there is a BTI.
+ */
+
+ if (!IS_ALIGNED(addr, sizeof(unsigned long))) {
+ WARN_RATELIMIT(1, "Misaligned patch-site %pS\n",
+ (void *)(addr + 8));
+ return 0;
+ }
+
+ /* Skip the NOPs placed before the function entry point */
+ addr += 2 * AARCH64_INSN_SIZE;
+
+ /* Skip any BTI */
+ if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) {
+ u32 insn = le32_to_cpu(*(__le32 *)addr);
+
+ if (aarch64_insn_is_bti(insn)) {
+ addr += AARCH64_INSN_SIZE;
+ } else if (insn != aarch64_insn_gen_nop()) {
+ WARN_RATELIMIT(1, "unexpected insn in patch-site %pS: 0x%08x\n",
+ (void *)addr, insn);
+ }
+ }
+
+ /* Skip the first NOP after function entry */
+ addr += AARCH64_INSN_SIZE;
+
+ return addr;
+}
+
/*
* Replace a single instruction, which may be a branch or NOP.
* If @validate == true, a replaced instruction is checked against 'old'.
@@ -98,6 +181,13 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
unsigned long pc;
u32 new;
+ /*
+ * When using CALL_OPS, the function to call is associated with the
+ * call site, and we don't have a global function pointer to update.
+ */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return 0;
+
pc = (unsigned long)ftrace_call;
new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func,
AARCH64_INSN_BRANCH_LINK);
@@ -176,6 +266,44 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
return true;
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+static const struct ftrace_ops *arm64_rec_get_ops(struct dyn_ftrace *rec)
+{
+ const struct ftrace_ops *ops = NULL;
+
+ if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+ ops = ftrace_find_unique_ops(rec);
+ WARN_ON_ONCE(!ops);
+ }
+
+ if (!ops)
+ ops = &ftrace_list_ops;
+
+ return ops;
+}
+
+static int ftrace_rec_set_ops(const struct dyn_ftrace *rec,
+ const struct ftrace_ops *ops)
+{
+ unsigned long literal = ALIGN_DOWN(rec->ip - 12, 8);
+ return aarch64_insn_write_literal_u64((void *)literal,
+ (unsigned long)ops);
+}
+
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec)
+{
+ return ftrace_rec_set_ops(rec, &ftrace_nop_ops);
+}
+
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec)
+{
+ return ftrace_rec_set_ops(rec, arm64_rec_get_ops(rec));
+}
+#else
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; }
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; }
+#endif
+
/*
* Turn on the call to ftrace_caller() in instrumented function
*/
@@ -183,6 +311,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long pc = rec->ip;
u32 old, new;
+ int ret;
+
+ ret = ftrace_rec_update_ops(rec);
+ if (ret)
+ return ret;
if (!ftrace_find_callable_addr(rec, NULL, &addr))
return -EINVAL;
@@ -193,6 +326,19 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return ftrace_modify_code(pc, old, new, true);
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ if (WARN_ON_ONCE(old_addr != (unsigned long)ftrace_caller))
+ return -EINVAL;
+ if (WARN_ON_ONCE(addr != (unsigned long)ftrace_caller))
+ return -EINVAL;
+
+ return ftrace_rec_update_ops(rec);
+}
+#endif
+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
/*
* The compiler has inserted two NOPs before the regular function prologue.
@@ -209,7 +355,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* | NOP | MOV X9, LR | MOV X9, LR |
* | NOP | NOP | BL <entry> |
*
- * The LR value will be recovered by ftrace_regs_entry, and restored into LR
+ * The LR value will be recovered by ftrace_caller, and restored into LR
* before returning to the regular function prologue. When a function is not
* being traced, the MOV is not harmful given x9 is not live per the AAPCS.
*
@@ -220,6 +366,11 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
{
unsigned long pc = rec->ip - AARCH64_INSN_SIZE;
u32 old, new;
+ int ret;
+
+ ret = ftrace_rec_set_nop_ops(rec);
+ if (ret)
+ return ret;
old = aarch64_insn_gen_nop();
new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9,
@@ -237,9 +388,14 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
{
unsigned long pc = rec->ip;
u32 old = 0, new;
+ int ret;
new = aarch64_insn_gen_nop();
+ ret = ftrace_rec_set_nop_ops(rec);
+ if (ret)
+ return ret;
+
/*
* When using mcount, callsites in modules may have been initalized to
* call an arbitrary module PLT (which redirects to the _mcount stub)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 952e17bd1c0b..b98970907226 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -70,13 +70,14 @@
__EFI_PE_HEADER
- __INIT
+ .section ".idmap.text","a"
/*
* The following callee saved general purpose registers are used on the
* primary lowlevel boot path:
*
* Register Scope Purpose
+ * x19 primary_entry() .. start_kernel() whether we entered with the MMU on
* x20 primary_entry() .. __primary_switch() CPU boot mode
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
* x22 create_idmap() .. start_kernel() ID map VA of the DT blob
@@ -86,10 +87,23 @@
* x28 create_idmap() callee preserved temp register
*/
SYM_CODE_START(primary_entry)
+ bl record_mmu_state
bl preserve_boot_args
+ bl create_idmap
+
+ /*
+ * If we entered with the MMU and caches on, clean the ID mapped part
+ * of the primary boot code to the PoC so we can safely execute it with
+ * the MMU off.
+ */
+ cbz x19, 0f
+ adrp x0, __idmap_text_start
+ adr_l x1, __idmap_text_end
+ adr_l x2, dcache_clean_poc
+ blr x2
+0: mov x0, x19
bl init_kernel_el // w0=cpu_boot_mode
mov x20, x0
- bl create_idmap
/*
* The following calls CPU setup code, see arch/arm64/mm/proc.S for
@@ -109,6 +123,40 @@ SYM_CODE_START(primary_entry)
b __primary_switch
SYM_CODE_END(primary_entry)
+ __INIT
+SYM_CODE_START_LOCAL(record_mmu_state)
+ mrs x19, CurrentEL
+ cmp x19, #CurrentEL_EL2
+ mrs x19, sctlr_el1
+ b.ne 0f
+ mrs x19, sctlr_el2
+0:
+CPU_LE( tbnz x19, #SCTLR_ELx_EE_SHIFT, 1f )
+CPU_BE( tbz x19, #SCTLR_ELx_EE_SHIFT, 1f )
+ tst x19, #SCTLR_ELx_C // Z := (C == 0)
+ and x19, x19, #SCTLR_ELx_M // isolate M bit
+ csel x19, xzr, x19, eq // clear x19 if Z
+ ret
+
+ /*
+ * Set the correct endianness early so all memory accesses issued
+ * before init_kernel_el() occur in the correct byte order. Note that
+ * this means the MMU must be disabled, or the active ID map will end
+ * up getting interpreted with the wrong byte order.
+ */
+1: eor x19, x19, #SCTLR_ELx_EE
+ bic x19, x19, #SCTLR_ELx_M
+ b.ne 2f
+ pre_disable_mmu_workaround
+ msr sctlr_el2, x19
+ b 3f
+ pre_disable_mmu_workaround
+2: msr sctlr_el1, x19
+3: isb
+ mov x19, xzr
+ ret
+SYM_CODE_END(record_mmu_state)
+
/*
* Preserve the arguments passed by the bootloader in x0 .. x3
*/
@@ -119,11 +167,14 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
stp x21, x1, [x0] // x0 .. x3 at kernel entry
stp x2, x3, [x0, #16]
+ cbnz x19, 0f // skip cache invalidation if MMU is on
dmb sy // needed before dc ivac with
// MMU off
add x1, x0, #0x20 // 4 x 8 bytes
b dcache_inval_poc // tail call
+0: str_l x19, mmu_enabled_at_boot, x0
+ ret
SYM_CODE_END(preserve_boot_args)
SYM_FUNC_START_LOCAL(clear_page_tables)
@@ -360,12 +411,13 @@ SYM_FUNC_START_LOCAL(create_idmap)
* accesses (MMU disabled), invalidate those tables again to
* remove any speculatively loaded cache lines.
*/
+ cbnz x19, 0f // skip cache invalidation if MMU is on
dmb sy
adrp x0, init_idmap_pg_dir
adrp x1, init_idmap_pg_end
bl dcache_inval_poc
- ret x28
+0: ret x28
SYM_FUNC_END(create_idmap)
SYM_FUNC_START_LOCAL(create_kernel_mapping)
@@ -404,7 +456,7 @@ SYM_FUNC_END(create_kernel_mapping)
stp xzr, xzr, [sp, #S_STACKFRAME]
add x29, sp, #S_STACKFRAME
- scs_load \tsk
+ scs_load_current
adr_l \tmp1, __per_cpu_offset
ldr w\tmp2, [\tsk, #TSK_TI_CPU]
@@ -476,7 +528,7 @@ SYM_FUNC_END(__primary_switched)
* end early head section, begin head code that is also used for
* hotplug and needs to have the same protections as the text region
*/
- .section ".idmap.text","awx"
+ .section ".idmap.text","a"
/*
* Starting from EL2 or EL1, configure the CPU to execute at the highest
@@ -489,14 +541,17 @@ SYM_FUNC_END(__primary_switched)
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x0 if
* booted in EL1 or EL2 respectively, with the top 32 bits containing
* potential context flags. These flags are *not* stored in __boot_cpu_mode.
+ *
+ * x0: whether we are being called from the primary boot path with the MMU on
*/
SYM_FUNC_START(init_kernel_el)
- mrs x0, CurrentEL
- cmp x0, #CurrentEL_EL2
+ mrs x1, CurrentEL
+ cmp x1, #CurrentEL_EL2
b.eq init_el2
SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
+ pre_disable_mmu_workaround
msr sctlr_el1, x0
isb
mov_q x0, INIT_PSTATE_EL1
@@ -506,6 +561,15 @@ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
eret
SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
+ msr elr_el2, lr
+
+ // clean all HYP code to the PoC if we booted at EL2 with the MMU on
+ cbz x0, 0f
+ adrp x0, __hyp_idmap_text_start
+ adr_l x1, __hyp_text_end
+ adr_l x2, dcache_clean_poc
+ blr x2
+0:
mov_q x0, HCR_HOST_NVHE_FLAGS
msr hcr_el2, x0
isb
@@ -529,38 +593,27 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
cbz x0, 1f
/* Set a sane SCTLR_EL1, the VHE way */
+ pre_disable_mmu_workaround
msr_s SYS_SCTLR_EL12, x1
mov x2, #BOOT_CPU_FLAG_E2H
b 2f
1:
+ pre_disable_mmu_workaround
msr sctlr_el1, x1
mov x2, xzr
2:
- msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2
orr x0, x0, x2
eret
SYM_FUNC_END(init_kernel_el)
-/*
- * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
- * in w0. See arch/arm64/include/asm/virt.h for more info.
- */
-SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
- adr_l x1, __boot_cpu_mode
- cmp w0, #BOOT_CPU_MODE_EL2
- b.ne 1f
- add x1, x1, #4
-1: str w0, [x1] // Save CPU boot mode
- ret
-SYM_FUNC_END(set_cpu_boot_mode_flag)
-
/*
* This provides a "holding pen" for platforms to hold all secondary
* cores are held until we're ready for them to initialise.
*/
SYM_FUNC_START(secondary_holding_pen)
+ mov x0, xzr
bl init_kernel_el // w0=cpu_boot_mode
mrs x2, mpidr_el1
mov_q x1, MPIDR_HWID_BITMASK
@@ -578,6 +631,7 @@ SYM_FUNC_END(secondary_holding_pen)
* be used where CPUs are brought online dynamically by the kernel.
*/
SYM_FUNC_START(secondary_entry)
+ mov x0, xzr
bl init_kernel_el // w0=cpu_boot_mode
b secondary_startup
SYM_FUNC_END(secondary_entry)
@@ -587,7 +641,6 @@ SYM_FUNC_START_LOCAL(secondary_startup)
* Common entry point for secondary CPUs.
*/
mov x20, x0 // preserve boot mode
- bl finalise_el2
bl __cpu_secondary_check52bitva
#if VA_BITS > 48
ldr_l x0, vabits_actual
@@ -600,9 +653,14 @@ SYM_FUNC_START_LOCAL(secondary_startup)
br x8
SYM_FUNC_END(secondary_startup)
+ .text
SYM_FUNC_START_LOCAL(__secondary_switched)
mov x0, x20
bl set_cpu_boot_mode_flag
+
+ mov x0, x20
+ bl finalise_el2
+
str_l xzr, __early_cpu_boot_status, x3
adr_l x5, vectors
msr vbar_el1, x5
@@ -629,6 +687,19 @@ SYM_FUNC_START_LOCAL(__secondary_too_slow)
SYM_FUNC_END(__secondary_too_slow)
/*
+ * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
+ * in w0. See arch/arm64/include/asm/virt.h for more info.
+ */
+SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
+ adr_l x1, __boot_cpu_mode
+ cmp w0, #BOOT_CPU_MODE_EL2
+ b.ne 1f
+ add x1, x1, #4
+1: str w0, [x1] // Save CPU boot mode
+ ret
+SYM_FUNC_END(set_cpu_boot_mode_flag)
+
+/*
* The booting CPU updates the failed status @__early_cpu_boot_status,
* with MMU turned off.
*
@@ -659,6 +730,7 @@ SYM_FUNC_END(__secondary_too_slow)
* Checks if the selected granule size is supported by the CPU.
* If it isn't, park the CPU
*/
+ .section ".idmap.text","a"
SYM_FUNC_START(__enable_mmu)
mrs x3, ID_AA64MMFR0_EL1
ubfx x3, x3, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 2ee18c860f2a..111ff33d93ee 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -132,6 +132,13 @@ SYM_CODE_START_LOCAL(__finalise_el2)
orr x0, x0, SMCR_ELx_FA64_MASK
.Lskip_sme_fa64:
+ // ZT0 available?
+ mrs_s x1, SYS_ID_AA64SMFR0_EL1
+ __check_override id_aa64smfr0 ID_AA64SMFR0_EL1_SMEver_SHIFT 4 .Linit_sme_zt0 .Lskip_sme_zt0
+.Linit_sme_zt0:
+ orr x0, x0, SMCR_ELx_EZT0_MASK
+.Lskip_sme_zt0:
+
orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector
msr_s SYS_SMCR_EL2, x0 // length for EL1.
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 95133765ed29..d833d78a7f31 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -131,6 +131,7 @@ static const struct ftr_set_desc smfr0 __initconst = {
.name = "id_aa64smfr0",
.override = &id_aa64smfr0_override,
.fields = {
+ FIELD("smever", ID_AA64SMFR0_EL1_SMEver_SHIFT, NULL),
/* FA64 is a one bit field... :-/ */
{ "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
{}
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index d0e9bb5c91fc..8309197c0ebd 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -10,7 +10,7 @@
#error This file should only be included in vmlinux.lds.S
#endif
-PROVIDE(__efistub_primary_entry_offset = primary_entry - _text);
+PROVIDE(__efistub_primary_entry = primary_entry);
/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
@@ -21,10 +21,11 @@ PROVIDE(__efistub_primary_entry_offset = primary_entry - _text);
* linked at. The routines below are all implemented in assembler in a
* position independent manner
*/
-PROVIDE(__efistub_dcache_clean_poc = __pi_dcache_clean_poc);
+PROVIDE(__efistub_caches_clean_inval_pou = __pi_caches_clean_inval_pou);
PROVIDE(__efistub__text = _text);
PROVIDE(__efistub__end = _end);
+PROVIDE(__efistub___inittext_end = __inittext_end);
PROVIDE(__efistub__edata = _edata);
PROVIDE(__efistub_screen_info = screen_info);
PROVIDE(__efistub__ctype = _ctype);
@@ -67,9 +68,7 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
-/* Static key checked in pmr_sync(). */
#ifdef CONFIG_ARM64_PSEUDO_NMI
-KVM_NVHE_ALIAS(gic_pmr_sync);
/* Static key checked in GIC_PRIO_IRQOFF. */
KVM_NVHE_ALIAS(gic_nonsecure_priorities);
#endif
diff --git a/arch/arm64/kernel/patch-scs.c b/arch/arm64/kernel/patch-scs.c
index 1b3da02d5b74..a1fe4b4ff591 100644
--- a/arch/arm64/kernel/patch-scs.c
+++ b/arch/arm64/kernel/patch-scs.c
@@ -130,7 +130,8 @@ struct eh_frame {
static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
bool fde_has_augmentation_data,
- int code_alignment_factor)
+ int code_alignment_factor,
+ bool dry_run)
{
int size = frame->size - offsetof(struct eh_frame, opcodes) + 4;
u64 loc = (u64)offset_to_ptr(&frame->initial_loc);
@@ -184,7 +185,8 @@ static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
break;
case DW_CFA_negate_ra_state:
- scs_patch_loc(loc - 4);
+ if (!dry_run)
+ scs_patch_loc(loc - 4);
break;
case 0x40 ... 0x7f:
@@ -235,9 +237,12 @@ int noinstr scs_patch(const u8 eh_frame[], int size)
} else {
ret = scs_handle_fde_frame(frame,
fde_has_augmentation_data,
- code_alignment_factor);
+ code_alignment_factor,
+ true);
if (ret)
return ret;
+ scs_handle_fde_frame(frame, fde_has_augmentation_data,
+ code_alignment_factor, false);
}
p += sizeof(frame->size) + frame->size;
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index 33e0fabc0b79..b4835f6d594b 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -88,6 +88,23 @@ int __kprobes aarch64_insn_write(void *addr, u32 insn)
return __aarch64_insn_write(addr, cpu_to_le32(insn));
}
+noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val)
+{
+ u64 *waddr;
+ unsigned long flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&patch_lock, flags);
+ waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+ ret = copy_to_kernel_nofault(waddr, &val, sizeof(val));
+
+ patch_unmap(FIX_TEXT_POKE0);
+ raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+ return ret;
+}
+
int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
{
u32 *tp = addr;
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index f35d059a9a36..70b91a8c6bb3 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -387,10 +387,6 @@ int __init arch_populate_kprobe_blacklist(void)
(unsigned long)__irqentry_text_end);
if (ret)
return ret;
- ret = kprobe_add_area_blacklist((unsigned long)__idmap_text_start,
- (unsigned long)__idmap_text_end);
- if (ret)
- return ret;
ret = kprobe_add_area_blacklist((unsigned long)__hyp_text_start,
(unsigned long)__hyp_text_end);
if (ret || is_kernel_in_hyp_mode())
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 269ac1c25ae2..71d59b5abede 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -307,27 +307,28 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
/*
* In the unlikely event that we create a new thread with ZA
- * enabled we should retain the ZA state so duplicate it here.
- * This may be shortly freed if we exec() or if CLONE_SETTLS
- * but it's simpler to do it here. To avoid confusing the rest
- * of the code ensure that we have a sve_state allocated
- * whenever za_state is allocated.
+ * enabled we should retain the ZA and ZT state so duplicate
+ * it here. This may be shortly freed if we exec() or if
+ * CLONE_SETTLS but it's simpler to do it here. To avoid
+ * confusing the rest of the code ensure that we have a
+ * sve_state allocated whenever sme_state is allocated.
*/
if (thread_za_enabled(&src->thread)) {
dst->thread.sve_state = kzalloc(sve_state_size(src),
GFP_KERNEL);
if (!dst->thread.sve_state)
return -ENOMEM;
- dst->thread.za_state = kmemdup(src->thread.za_state,
- za_state_size(src),
- GFP_KERNEL);
- if (!dst->thread.za_state) {
+
+ dst->thread.sme_state = kmemdup(src->thread.sme_state,
+ sme_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sme_state) {
kfree(dst->thread.sve_state);
dst->thread.sve_state = NULL;
return -ENOMEM;
}
} else {
- dst->thread.za_state = NULL;
+ dst->thread.sme_state = NULL;
clear_tsk_thread_flag(dst, TIF_SME);
}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 0c321ad23cd3..d7f4f0d1ae12 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -683,7 +683,7 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
unsigned long tls[2];
tls[0] = target->thread.uw.tp_value;
- if (system_supports_sme())
+ if (system_supports_tpidr2())
tls[1] = target->thread.tpidr2_el0;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, tls, 0, count);
@@ -691,7 +691,7 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
return ret;
target->thread.uw.tp_value = tls[0];
- if (system_supports_sme())
+ if (system_supports_tpidr2())
target->thread.tpidr2_el0 = tls[1];
return ret;
@@ -1045,7 +1045,7 @@ static int za_get(struct task_struct *target,
if (thread_za_enabled(&target->thread)) {
start = end;
end = ZA_PT_SIZE(vq);
- membuf_write(&to, target->thread.za_state, end - start);
+ membuf_write(&to, target->thread.sme_state, end - start);
}
/* Zero any trailing padding */
@@ -1099,7 +1099,7 @@ static int za_set(struct task_struct *target,
/* Allocate/reinit ZA storage */
sme_alloc(target);
- if (!target->thread.za_state) {
+ if (!target->thread.sme_state) {
ret = -ENOMEM;
goto out;
}
@@ -1124,7 +1124,7 @@ static int za_set(struct task_struct *target,
start = ZA_PT_ZA_OFFSET;
end = ZA_PT_SIZE(vq);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.za_state,
+ target->thread.sme_state,
start, end);
if (ret)
goto out;
@@ -1138,6 +1138,51 @@ out:
return ret;
}
+static int zt_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_sme2())
+ return -EINVAL;
+
+ /*
+ * If PSTATE.ZA is not set then ZT will be zeroed when it is
+ * enabled so report the current register value as zero.
+ */
+ if (thread_za_enabled(&target->thread))
+ membuf_write(&to, thread_zt_state(&target->thread),
+ ZT_SIG_REG_BYTES);
+ else
+ membuf_zero(&to, ZT_SIG_REG_BYTES);
+
+ return 0;
+}
+
+static int zt_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!system_supports_sme2())
+ return -EINVAL;
+
+ if (!thread_za_enabled(&target->thread)) {
+ sme_alloc(target);
+ if (!target->thread.sme_state)
+ return -ENOMEM;
+ }
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ thread_zt_state(&target->thread),
+ 0, ZT_SIG_REG_BYTES);
+ if (ret == 0)
+ target->thread.svcr |= SVCR_ZA_MASK;
+
+ return ret;
+}
+
#endif /* CONFIG_ARM64_SME */
#ifdef CONFIG_ARM64_PTR_AUTH
@@ -1360,6 +1405,7 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_SME
REGSET_SSVE,
REGSET_ZA,
+ REGSET_ZT,
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
REGSET_PAC_MASK,
@@ -1467,6 +1513,14 @@ static const struct user_regset aarch64_regsets[] = {
.regset_get = za_get,
.set = za_set,
},
+ [REGSET_ZT] = { /* SME ZT */
+ .core_note_type = NT_ARM_ZT,
+ .n = 1,
+ .size = ZT_SIG_REG_BYTES,
+ .align = sizeof(u64),
+ .regset_get = zt_get,
+ .set = zt_set,
+ },
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
[REGSET_PAC_MASK] = {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 12cfe9d0d3fa..b8ec7b3ac9cb 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -58,6 +58,7 @@ static int num_standard_resources;
static struct resource *standard_resources;
phys_addr_t __fdt_pointer __initdata;
+u64 mmu_enabled_at_boot __initdata;
/*
* Standard memory resources
@@ -332,8 +333,12 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
xen_early_init();
efi_init();
- if (!efi_enabled(EFI_BOOT) && ((u64)_text % MIN_KIMG_ALIGN) != 0)
- pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!");
+ if (!efi_enabled(EFI_BOOT)) {
+ if ((u64)_text % MIN_KIMG_ALIGN)
+ pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!");
+ WARN_TAINT(mmu_enabled_at_boot, TAINT_FIRMWARE_WORKAROUND,
+ FW_BUG "Booted with MMU enabled!");
+ }
arm64_memblock_init();
@@ -442,3 +447,11 @@ static int __init register_arm64_panic_block(void)
return 0;
}
device_initcall(register_arm64_panic_block);
+
+static int __init check_mmu_enabled_at_boot(void)
+{
+ if (!efi_enabled(EFI_BOOT) && mmu_enabled_at_boot)
+ panic("Non-EFI boot detected with MMU and caches enabled");
+ return 0;
+}
+device_initcall_sync(check_mmu_enabled_at_boot);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index be279fd48248..06a02707f488 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -56,7 +56,9 @@ struct rt_sigframe_user_layout {
unsigned long fpsimd_offset;
unsigned long esr_offset;
unsigned long sve_offset;
+ unsigned long tpidr2_offset;
unsigned long za_offset;
+ unsigned long zt_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
@@ -168,6 +170,19 @@ static void __user *apply_user_offset(
return base + offset;
}
+struct user_ctxs {
+ struct fpsimd_context __user *fpsimd;
+ u32 fpsimd_size;
+ struct sve_context __user *sve;
+ u32 sve_size;
+ struct tpidr2_context __user *tpidr2;
+ u32 tpidr2_size;
+ struct za_context __user *za;
+ u32 za_size;
+ struct zt_context __user *zt;
+ u32 zt_size;
+};
+
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
{
struct user_fpsimd_state const *fpsimd =
@@ -186,25 +201,20 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
return err ? -EFAULT : 0;
}
-static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
+static int restore_fpsimd_context(struct user_ctxs *user)
{
struct user_fpsimd_state fpsimd;
- __u32 magic, size;
int err = 0;
- /* check the magic/size information */
- __get_user_error(magic, &ctx->head.magic, err);
- __get_user_error(size, &ctx->head.size, err);
- if (err)
- return -EFAULT;
- if (magic != FPSIMD_MAGIC || size != sizeof(struct fpsimd_context))
+ /* check the size information */
+ if (user->fpsimd_size != sizeof(struct fpsimd_context))
return -EINVAL;
/* copy the FP and status/control registers */
- err = __copy_from_user(fpsimd.vregs, ctx->vregs,
+ err = __copy_from_user(fpsimd.vregs, &(user->fpsimd->vregs),
sizeof(fpsimd.vregs));
- __get_user_error(fpsimd.fpsr, &ctx->fpsr, err);
- __get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
+ __get_user_error(fpsimd.fpsr, &(user->fpsimd->fpsr), err);
+ __get_user_error(fpsimd.fpcr, &(user->fpsimd->fpcr), err);
clear_thread_flag(TIF_SVE);
current->thread.fp_type = FP_STATE_FPSIMD;
@@ -217,12 +227,6 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
}
-struct user_ctxs {
- struct fpsimd_context __user *fpsimd;
- struct sve_context __user *sve;
- struct za_context __user *za;
-};
-
#ifdef CONFIG_ARM64_SVE
static int preserve_sve_context(struct sve_context __user *ctx)
@@ -267,15 +271,20 @@ static int preserve_sve_context(struct sve_context __user *ctx)
static int restore_sve_fpsimd_context(struct user_ctxs *user)
{
- int err;
+ int err = 0;
unsigned int vl, vq;
struct user_fpsimd_state fpsimd;
- struct sve_context sve;
+ u16 user_vl, flags;
- if (__copy_from_user(&sve, user->sve, sizeof(sve)))
- return -EFAULT;
+ if (user->sve_size < sizeof(*user->sve))
+ return -EINVAL;
+
+ __get_user_error(user_vl, &(user->sve->vl), err);
+ __get_user_error(flags, &(user->sve->flags), err);
+ if (err)
+ return err;
- if (sve.flags & SVE_SIG_FLAG_SM) {
+ if (flags & SVE_SIG_FLAG_SM) {
if (!system_supports_sme())
return -EINVAL;
@@ -292,19 +301,19 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
vl = task_get_sve_vl(current);
}
- if (sve.vl != vl)
+ if (user_vl != vl)
return -EINVAL;
- if (sve.head.size <= sizeof(*user->sve)) {
+ if (user->sve_size == sizeof(*user->sve)) {
clear_thread_flag(TIF_SVE);
current->thread.svcr &= ~SVCR_SM_MASK;
current->thread.fp_type = FP_STATE_FPSIMD;
goto fpsimd_only;
}
- vq = sve_vq_from_vl(sve.vl);
+ vq = sve_vq_from_vl(vl);
- if (sve.head.size < SVE_SIG_CONTEXT_SIZE(vq))
+ if (user->sve_size < SVE_SIG_CONTEXT_SIZE(vq))
return -EINVAL;
/*
@@ -330,7 +339,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (err)
return -EFAULT;
- if (sve.flags & SVE_SIG_FLAG_SM)
+ if (flags & SVE_SIG_FLAG_SM)
current->thread.svcr |= SVCR_SM_MASK;
else
set_thread_flag(TIF_SVE);
@@ -366,6 +375,34 @@ extern int preserve_sve_context(void __user *ctx);
#ifdef CONFIG_ARM64_SME
+static int preserve_tpidr2_context(struct tpidr2_context __user *ctx)
+{
+ int err = 0;
+
+ current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+
+ __put_user_error(TPIDR2_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(current->thread.tpidr2_el0, &ctx->tpidr2, err);
+
+ return err;
+}
+
+static int restore_tpidr2_context(struct user_ctxs *user)
+{
+ u64 tpidr2_el0;
+ int err = 0;
+
+ if (user->tpidr2_size != sizeof(*user->tpidr2))
+ return -EINVAL;
+
+ __get_user_error(tpidr2_el0, &user->tpidr2->tpidr2, err);
+ if (!err)
+ current->thread.tpidr2_el0 = tpidr2_el0;
+
+ return err;
+}
+
static int preserve_za_context(struct za_context __user *ctx)
{
int err = 0;
@@ -394,7 +431,7 @@ static int preserve_za_context(struct za_context __user *ctx)
* fpsimd_signal_preserve_current_state().
*/
err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
- current->thread.za_state,
+ current->thread.sme_state,
ZA_SIG_REGS_SIZE(vq));
}
@@ -403,29 +440,33 @@ static int preserve_za_context(struct za_context __user *ctx)
static int restore_za_context(struct user_ctxs *user)
{
- int err;
+ int err = 0;
unsigned int vq;
- struct za_context za;
+ u16 user_vl;
- if (__copy_from_user(&za, user->za, sizeof(za)))
- return -EFAULT;
+ if (user->za_size < sizeof(*user->za))
+ return -EINVAL;
+
+ __get_user_error(user_vl, &(user->za->vl), err);
+ if (err)
+ return err;
- if (za.vl != task_get_sme_vl(current))
+ if (user_vl != task_get_sme_vl(current))
return -EINVAL;
- if (za.head.size <= sizeof(*user->za)) {
+ if (user->za_size == sizeof(*user->za)) {
current->thread.svcr &= ~SVCR_ZA_MASK;
return 0;
}
- vq = sve_vq_from_vl(za.vl);
+ vq = sve_vq_from_vl(user_vl);
- if (za.head.size < ZA_SIG_CONTEXT_SIZE(vq))
+ if (user->za_size < ZA_SIG_CONTEXT_SIZE(vq))
return -EINVAL;
/*
* Careful: we are about __copy_from_user() directly into
- * thread.za_state with preemption enabled, so protection is
+ * thread.sme_state with preemption enabled, so protection is
* needed to prevent a racing context switch from writing stale
* registers back over the new data.
*/
@@ -434,13 +475,13 @@ static int restore_za_context(struct user_ctxs *user)
/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
sme_alloc(current);
- if (!current->thread.za_state) {
+ if (!current->thread.sme_state) {
current->thread.svcr &= ~SVCR_ZA_MASK;
clear_thread_flag(TIF_SME);
return -ENOMEM;
}
- err = __copy_from_user(current->thread.za_state,
+ err = __copy_from_user(current->thread.sme_state,
(char __user const *)user->za +
ZA_SIG_REGS_OFFSET,
ZA_SIG_REGS_SIZE(vq));
@@ -452,11 +493,83 @@ static int restore_za_context(struct user_ctxs *user)
return 0;
}
+
+static int preserve_zt_context(struct zt_context __user *ctx)
+{
+ int err = 0;
+ u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+
+ if (WARN_ON(!thread_za_enabled(&current->thread)))
+ return -EINVAL;
+
+ memset(reserved, 0, sizeof(reserved));
+
+ __put_user_error(ZT_MAGIC, &ctx->head.magic, err);
+ __put_user_error(round_up(ZT_SIG_CONTEXT_SIZE(1), 16),
+ &ctx->head.size, err);
+ __put_user_error(1, &ctx->nregs, err);
+ BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
+ err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+
+ /*
+ * This assumes that the ZT state has already been saved to
+ * the task struct by calling the function
+ * fpsimd_signal_preserve_current_state().
+ */
+ err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET,
+ thread_zt_state(&current->thread),
+ ZT_SIG_REGS_SIZE(1));
+
+ return err ? -EFAULT : 0;
+}
+
+static int restore_zt_context(struct user_ctxs *user)
+{
+ int err;
+ u16 nregs;
+
+ /* ZA must be restored first for this check to be valid */
+ if (!thread_za_enabled(&current->thread))
+ return -EINVAL;
+
+ if (user->zt_size != ZT_SIG_CONTEXT_SIZE(1))
+ return -EINVAL;
+
+ if (__copy_from_user(&nregs, &(user->zt->nregs), sizeof(nregs)))
+ return -EFAULT;
+
+ if (nregs != 1)
+ return -EINVAL;
+
+ /*
+ * Careful: we are about __copy_from_user() directly into
+ * thread.zt_state with preemption enabled, so protection is
+ * needed to prevent a racing context switch from writing stale
+ * registers back over the new data.
+ */
+
+ fpsimd_flush_task_state(current);
+ /* From now, fpsimd_thread_switch() won't touch ZT in thread state */
+
+ err = __copy_from_user(thread_zt_state(&current->thread),
+ (char __user const *)user->zt +
+ ZT_SIG_REGS_OFFSET,
+ ZT_SIG_REGS_SIZE(1));
+ if (err)
+ return -EFAULT;
+
+ return 0;
+}
+
#else /* ! CONFIG_ARM64_SME */
/* Turn any non-optimised out attempts to use these into a link error: */
+extern int preserve_tpidr2_context(void __user *ctx);
+extern int restore_tpidr2_context(struct user_ctxs *user);
extern int preserve_za_context(void __user *ctx);
extern int restore_za_context(struct user_ctxs *user);
+extern int preserve_zt_context(void __user *ctx);
+extern int restore_zt_context(struct user_ctxs *user);
#endif /* ! CONFIG_ARM64_SME */
@@ -473,7 +586,9 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->fpsimd = NULL;
user->sve = NULL;
+ user->tpidr2 = NULL;
user->za = NULL;
+ user->zt = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@@ -516,10 +631,8 @@ static int parse_user_sigframe(struct user_ctxs *user,
if (user->fpsimd)
goto invalid;
- if (size < sizeof(*user->fpsimd))
- goto invalid;
-
user->fpsimd = (struct fpsimd_context __user *)head;
+ user->fpsimd_size = size;
break;
case ESR_MAGIC:
@@ -533,10 +646,19 @@ static int parse_user_sigframe(struct user_ctxs *user,
if (user->sve)
goto invalid;
- if (size < sizeof(*user->sve))
+ user->sve = (struct sve_context __user *)head;
+ user->sve_size = size;
+ break;
+
+ case TPIDR2_MAGIC:
+ if (!system_supports_sme())
goto invalid;
- user->sve = (struct sve_context __user *)head;
+ if (user->tpidr2)
+ goto invalid;
+
+ user->tpidr2 = (struct tpidr2_context __user *)head;
+ user->tpidr2_size = size;
break;
case ZA_MAGIC:
@@ -546,10 +668,19 @@ static int parse_user_sigframe(struct user_ctxs *user,
if (user->za)
goto invalid;
- if (size < sizeof(*user->za))
+ user->za = (struct za_context __user *)head;
+ user->za_size = size;
+ break;
+
+ case ZT_MAGIC:
+ if (!system_supports_sme2())
goto invalid;
- user->za = (struct za_context __user *)head;
+ if (user->zt)
+ goto invalid;
+
+ user->zt = (struct zt_context __user *)head;
+ user->zt_size = size;
break;
case EXTRA_MAGIC:
@@ -668,12 +799,18 @@ static int restore_sigframe(struct pt_regs *regs,
if (user.sve)
err = restore_sve_fpsimd_context(&user);
else
- err = restore_fpsimd_context(user.fpsimd);
+ err = restore_fpsimd_context(&user);
}
+ if (err == 0 && system_supports_sme() && user.tpidr2)
+ err = restore_tpidr2_context(&user);
+
if (err == 0 && system_supports_sme() && user.za)
err = restore_za_context(&user);
+ if (err == 0 && system_supports_sme2() && user.zt)
+ err = restore_zt_context(&user);
+
return err;
}
@@ -765,6 +902,11 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
else
vl = task_get_sme_vl(current);
+ err = sigframe_alloc(user, &user->tpidr2_offset,
+ sizeof(struct tpidr2_context));
+ if (err)
+ return err;
+
if (thread_za_enabled(&current->thread))
vq = sve_vq_from_vl(vl);
@@ -774,6 +916,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
+ if (system_supports_sme2()) {
+ if (add_all || thread_za_enabled(&current->thread)) {
+ err = sigframe_alloc(user, &user->zt_offset,
+ ZT_SIG_CONTEXT_SIZE(1));
+ if (err)
+ return err;
+ }
+ }
+
return sigframe_alloc_end(user);
}
@@ -822,6 +973,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= preserve_sve_context(sve_ctx);
}
+ /* TPIDR2 if supported */
+ if (system_supports_sme() && err == 0) {
+ struct tpidr2_context __user *tpidr2_ctx =
+ apply_user_offset(user, user->tpidr2_offset);
+ err |= preserve_tpidr2_context(tpidr2_ctx);
+ }
+
/* ZA state if present */
if (system_supports_sme() && err == 0 && user->za_offset) {
struct za_context __user *za_ctx =
@@ -829,6 +987,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= preserve_za_context(za_ctx);
}
+ /* ZT state if present */
+ if (system_supports_sme2() && err == 0 && user->zt_offset) {
+ struct zt_context __user *zt_ctx =
+ apply_user_offset(user, user->zt_offset);
+ err |= preserve_zt_context(zt_ctx);
+ }
+
if (err == 0 && user->extra_offset) {
char __user *sfp = (char __user *)user->sigframe;
char __user *userp =
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 97c9de57725d..2ae7cff1953a 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -99,8 +99,9 @@ SYM_FUNC_END(__cpu_suspend_enter)
.pushsection ".idmap.text", "awx"
SYM_CODE_START(cpu_resume)
+ mov x0, xzr
bl init_kernel_el
- bl finalise_el2
+ mov x19, x0 // preserve boot mode
#if VA_BITS > 48
ldr_l x0, vabits_actual
#endif
@@ -116,6 +117,9 @@ SYM_CODE_END(cpu_resume)
.popsection
SYM_FUNC_START(_cpu_resume)
+ mov x0, x19
+ bl finalise_el2
+
mrs x1, mpidr_el1
adr_l x8, mpidr_hash // x8 = struct mpidr_hash virt address
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index a5de47e3df2b..da84cf855c44 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -173,12 +173,8 @@ static inline void fp_user_discard(void)
* register state to track, if this changes the KVM code will
* need updating.
*/
- if (system_supports_sme() && test_thread_flag(TIF_SME)) {
- u64 svcr = read_sysreg_s(SYS_SVCR);
-
- if (svcr & SVCR_SM_MASK)
- sme_smstop_sm();
- }
+ if (system_supports_sme())
+ sme_smstop_sm();
if (!system_supports_sve())
return;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 87f42eb1c950..a6dd3e90755c 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -163,10 +163,8 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
if (!bad)
p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
- else {
- p += sprintf(p, "bad PC value");
- break;
- }
+ else
+ p += sprintf(p, i == 0 ? "(????????) " : "???????? ");
}
printk("%sCode: %s\n", lvl, str);
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 2777214cbf1a..b9202c2ee18e 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -93,6 +93,7 @@ jiffies = jiffies_64;
#ifdef CONFIG_HIBERNATION
#define HIBERNATE_TEXT \
+ ALIGN_FUNCTION(); \
__hibernate_exit_text_start = .; \
*(.hibernate_exit.text) \
__hibernate_exit_text_end = .;
@@ -102,6 +103,7 @@ jiffies = jiffies_64;
#ifdef CONFIG_KEXEC_CORE
#define KEXEC_TEXT \
+ ALIGN_FUNCTION(); \
__relocate_new_kernel_start = .; \
*(.kexec_relocate.text) \
__relocate_new_kernel_end = .;
@@ -178,7 +180,6 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
HYPERVISOR_TEXT
- IDMAP_TEXT
*(.gnu.warning)
. = ALIGN(16);
*(.got) /* Global offset table */
@@ -205,6 +206,7 @@ SECTIONS
TRAMP_TEXT
HIBERNATE_TEXT
KEXEC_TEXT
+ IDMAP_TEXT
. = ALIGN(PAGE_SIZE);
}
@@ -354,6 +356,8 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
#ifdef CONFIG_HIBERNATION
ASSERT(__hibernate_exit_text_end - __hibernate_exit_text_start <= SZ_4K,
"Hibernate exit text is bigger than 4 KiB")
+ASSERT(__hibernate_exit_text_start == swsusp_arch_suspend_exit,
+ "Hibernate exit text does not start with swsusp_arch_suspend_exit")
#endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE,
@@ -380,4 +384,6 @@ ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET,
ASSERT(__relocate_new_kernel_end - __relocate_new_kernel_start <= SZ_4K,
"kexec relocation code is bigger than 4 KiB")
ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken")
+ASSERT(__relocate_new_kernel_start == arm64_relocate_new_kernel,
+ "kexec control page does not start with arm64_relocate_new_kernel")
#endif
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index fccf9ec01813..55f80fb93925 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -328,7 +328,7 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu)
* we may need to check if the host state needs to be saved.
*/
if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
- !(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT)))
+ !(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(PMBIDR_EL1_P_SHIFT)))
vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE);
/* Check if we have TRBE implemented and available at the host */
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 02dd7e9ebd39..235775d0c825 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -143,7 +143,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fp_state.st = &vcpu->arch.ctxt.fp_regs;
fp_state.sve_state = vcpu->arch.sve_state;
fp_state.sve_vl = vcpu->arch.sve_max_vl;
- fp_state.za_state = NULL;
+ fp_state.sme_state = NULL;
fp_state.svcr = &vcpu->arch.svcr;
fp_state.fp_type = &vcpu->arch.fp_type;
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 435346ea1504..f3aa7738b477 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -171,7 +171,7 @@ alternative_else
dsb sy // Synchronize against in-flight ld/st
isb // Prevent an early read of side-effect free ISR
mrs x2, isr_el1
- tbnz x2, #8, 2f // ISR_EL1.A
+ tbnz x2, #ISR_EL1_A_SHIFT, 2f
ret
nop
2:
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index e17455773b98..2673bde62fad 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -27,7 +27,7 @@ static void __debug_save_spe(u64 *pmscr_el1)
* Check if the host is actually using it ?
*/
reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
- if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)))
+ if (!(reg & BIT(PMBLIMITR_EL1_E_SHIFT)))
return;
/* Yes; save the control register and disable data generation */
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 081058d4e436..503567c864fd 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -56,6 +56,7 @@ SYM_FUNC_START(caches_clean_inval_pou)
caches_clean_inval_pou_macro
ret
SYM_FUNC_END(caches_clean_inval_pou)
+SYM_FUNC_ALIAS(__pi_caches_clean_inval_pou, caches_clean_inval_pou)
/*
* caches_clean_inval_user_pou(start,end)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index d77c9f56b7b4..6f9d8898a025 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -133,7 +133,7 @@ static phys_addr_t __init early_pgtable_alloc(int shift)
return phys;
}
-static bool pgattr_change_is_safe(u64 old, u64 new)
+bool pgattr_change_is_safe(u64 old, u64 new)
{
/*
* The following mapping attributes may be updated in live
@@ -142,9 +142,13 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
/* creating or taking down mappings is always safe */
- if (old == 0 || new == 0)
+ if (!pte_valid(__pte(old)) || !pte_valid(__pte(new)))
return true;
+ /* A live entry's pfn should not change */
+ if (pte_pfn(__pte(old)) != pte_pfn(__pte(new)))
+ return false;
+
/* live contiguous mappings may not be manipulated at all */
if ((old | new) & PTE_CONT)
return false;
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 066fa60b93d2..91410f488090 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -110,7 +110,6 @@ SYM_FUNC_END(cpu_do_suspend)
*
* x0: Address of context pointer
*/
- .pushsection ".idmap.text", "awx"
SYM_FUNC_START(cpu_do_resume)
ldp x2, x3, [x0]
ldp x4, x5, [x0, #16]
@@ -166,7 +165,6 @@ alternative_else_nop_endif
isb
ret
SYM_FUNC_END(cpu_do_resume)
- .popsection
#endif
.pushsection ".idmap.text", "awx"
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index dfeb2c51e257..10dcfa13390a 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -28,7 +28,9 @@ HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH_QARMA3
HAS_GENERIC_AUTH_ARCH_QARMA5
HAS_GENERIC_AUTH_IMP_DEF
-HAS_IRQ_PRIO_MASKING
+HAS_GIC_CPUIF_SYSREGS
+HAS_GIC_PRIO_MASKING
+HAS_GIC_PRIO_RELAXED_SYNC
HAS_LDAPR
HAS_LSE_ATOMICS
HAS_NO_FPSIMD
@@ -38,7 +40,6 @@ HAS_RAS_EXTN
HAS_RNG
HAS_SB
HAS_STAGE2_FWB
-HAS_SYSREG_GIC_CPUIF
HAS_TIDCP1
HAS_TLB_RANGE
HAS_VIRT_HOST_EXTN
@@ -50,6 +51,7 @@ MTE
MTE_ASYMM
SME
SME_FA64
+SME2
SPECTRE_V2
SPECTRE_V3A
SPECTRE_V4
diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk
index c350164a3955..7f27d66a17e1 100755
--- a/arch/arm64/tools/gen-sysreg.awk
+++ b/arch/arm64/tools/gen-sysreg.awk
@@ -44,6 +44,11 @@ function define_field(reg, field, msb, lsb) {
define(reg "_" field "_WIDTH", msb - lsb + 1)
}
+# Print a field _SIGNED definition for a field
+function define_field_sign(reg, field, sign) {
+ define(reg "_" field "_SIGNED", sign)
+}
+
# Parse a "<msb>[:<lsb>]" string into the global variables @msb and @lsb
function parse_bitdef(reg, field, bitdef, _bits)
{
@@ -233,6 +238,30 @@ END {
next
}
+/^SignedEnum/ {
+ change_block("Enum<", "Sysreg", "Enum")
+ expect_fields(3)
+ field = $3
+ parse_bitdef(reg, field, $2)
+
+ define_field(reg, field, msb, lsb)
+ define_field_sign(reg, field, "true")
+
+ next
+}
+
+/^UnsignedEnum/ {
+ change_block("Enum<", "Sysreg", "Enum")
+ expect_fields(3)
+ field = $3
+ parse_bitdef(reg, field, $2)
+
+ define_field(reg, field, msb, lsb)
+ define_field_sign(reg, field, "false")
+
+ next
+}
+
/^Enum/ {
change_block("Enum", "Sysreg", "Enum")
expect_fields(3)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 184e58fd5631..94d78acafb67 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -48,26 +48,26 @@
Sysreg ID_PFR0_EL1 3 0 0 1 0
Res0 63:32
-Enum 31:28 RAS
+UnsignedEnum 31:28 RAS
0b0000 NI
0b0001 RAS
0b0010 RASv1p1
EndEnum
-Enum 27:24 DIT
+UnsignedEnum 27:24 DIT
0b0000 NI
0b0001 IMP
EndEnum
-Enum 23:20 AMU
+UnsignedEnum 23:20 AMU
0b0000 NI
0b0001 AMUv1
0b0010 AMUv1p1
EndEnum
-Enum 19:16 CSV2
+UnsignedEnum 19:16 CSV2
0b0000 UNDISCLOSED
0b0001 IMP
0b0010 CSV2p1
EndEnum
-Enum 15:12 State3
+UnsignedEnum 15:12 State3
0b0000 NI
0b0001 IMP
EndEnum
@@ -76,12 +76,12 @@ Enum 11:8 State2
0b0001 NO_CV
0b0010 CV
EndEnum
-Enum 7:4 State1
+UnsignedEnum 7:4 State1
0b0000 NI
0b0001 THUMB
0b0010 THUMB2
EndEnum
-Enum 3:0 State0
+UnsignedEnum 3:0 State0
0b0000 NI
0b0001 IMP
EndEnum
@@ -89,12 +89,12 @@ EndSysreg
Sysreg ID_PFR1_EL1 3 0 0 1 1
Res0 63:32
-Enum 31:28 GIC
+UnsignedEnum 31:28 GIC
0b0000 NI
0b0001 GICv3
0b0010 GICv4p1
EndEnum
-Enum 27:24 Virt_frac
+UnsignedEnum 27:24 Virt_frac
0b0000 NI
0b0001 IMP
EndEnum
@@ -103,16 +103,16 @@ Enum 23:20 Sec_frac
0b0001 WALK_DISABLE
0b0010 SECURE_MEMORY
EndEnum
-Enum 19:16 GenTimer
+UnsignedEnum 19:16 GenTimer
0b0000 NI
0b0001 IMP
0b0010 ECV
EndEnum
-Enum 15:12 Virtualization
+UnsignedEnum 15:12 Virtualization
0b0000 NI
0b0001 IMP
EndEnum
-Enum 11:8 MProgMod
+UnsignedEnum 11:8 MProgMod
0b0000 NI
0b0001 IMP
EndEnum
@@ -121,7 +121,7 @@ Enum 7:4 Security
0b0001 EL3
0b0001 NSACR_RFR
EndEnum
-Enum 3:0 ProgMod
+UnsignedEnum 3:0 ProgMod
0b0000 NI
0b0001 IMP
EndEnum
@@ -129,11 +129,11 @@ EndSysreg
Sysreg ID_DFR0_EL1 3 0 0 1 2
Res0 63:32
-Enum 31:28 TraceFilt
+UnsignedEnum 31:28 TraceFilt
0b0000 NI
0b0001 IMP
EndEnum
-Enum 27:24 PerfMon
+UnsignedEnum 27:24 PerfMon
0b0000 NI
0b0001 PMUv1
0b0010 PMUv2
@@ -192,7 +192,7 @@ Enum 31:28 InnerShr
0b0001 HW
0b1111 IGNORED
EndEnum
-Enum 27:24 FCSE
+UnsignedEnum 27:24 FCSE
0b0000 NI
0b0001 IMP
EndEnum
@@ -369,7 +369,7 @@ Enum 27:24 Divide
0b0001 xDIV_T32
0b0010 xDIV_A32
EndEnum
-Enum 23:20 Debug
+UnsignedEnum 23:20 Debug
0b0000 NI
0b0001 IMP
EndEnum
@@ -380,19 +380,19 @@ Enum 19:16 Coproc
0b0011 MRRC
0b0100 MRRC2
EndEnum
-Enum 15:12 CmpBranch
+UnsignedEnum 15:12 CmpBranch
0b0000 NI
0b0001 IMP
EndEnum
-Enum 11:8 BitField
+UnsignedEnum 11:8 BitField
0b0000 NI
0b0001 IMP
EndEnum
-Enum 7:4 BitCount
+UnsignedEnum 7:4 BitCount
0b0000 NI
0b0001 IMP
EndEnum
-Enum 3:0 Swap
+UnsignedEnum 3:0 Swap
0b0000 NI
0b0001 IMP
EndEnum
@@ -562,33 +562,33 @@ EndSysreg
Sysreg ID_ISAR5_EL1 3 0 0 2 5
Res0 63:32
-Enum 31:28 VCMA
+UnsignedEnum 31:28 VCMA
0b0000 NI
0b0001 IMP
EndEnum
-Enum 27:24 RDM
+UnsignedEnum 27:24 RDM
0b0000 NI
0b0001 IMP
EndEnum
Res0 23:20
-Enum 19:16 CRC32
+UnsignedEnum 19:16 CRC32
0b0000 NI
0b0001 IMP
EndEnum
-Enum 15:12 SHA2
+UnsignedEnum 15:12 SHA2
0b0000 NI
0b0001 IMP
EndEnum
-Enum 11:8 SHA1
+UnsignedEnum 11:8 SHA1
0b0000 NI
0b0001 IMP
EndEnum
-Enum 7:4 AES
+UnsignedEnum 7:4 AES
0b0000 NI
0b0001 IMP
0b0010 VMULL
EndEnum
-Enum 3:0 SEVL
+UnsignedEnum 3:0 SEVL
0b0000 NI
0b0001 IMP
EndEnum
@@ -596,31 +596,31 @@ EndSysreg
Sysreg ID_ISAR6_EL1 3 0 0 2 7
Res0 63:28
-Enum 27:24 I8MM
+UnsignedEnum 27:24 I8MM
0b0000 NI
0b0001 IMP
EndEnum
-Enum 23:20 BF16
+UnsignedEnum 23:20 BF16
0b0000 NI
0b0001 IMP
EndEnum
-Enum 19:16 SPECRES
+UnsignedEnum 19:16 SPECRES
0b0000 NI
0b0001 IMP
EndEnum
-Enum 15:12 SB
+UnsignedEnum 15:12 SB
0b0000 NI
0b0001 IMP
EndEnum
-Enum 11:8 FHM
+UnsignedEnum 11:8 FHM
0b0000 NI
0b0001 IMP
EndEnum
-Enum 7:4 DP
+UnsignedEnum 7:4 DP
0b0000 NI
0b0001 IMP
EndEnum
-Enum 3:0 JSCVT
+UnsignedEnum 3:0 JSCVT
0b0000 NI
0b0001 IMP
EndEnum
@@ -628,37 +628,37 @@ EndSysreg
Sysreg ID_MMFR4_EL1 3 0 0 2 6
Res0 63:32
-Enum 31:28 EVT
+UnsignedEnum 31:28 EVT
0b0000 NI
0b0001 NO_TLBIS
0b0010 TLBIS
EndEnum
-Enum 27:24 CCIDX
+UnsignedEnum 27:24 CCIDX
0b0000 NI
0b0001 IMP
EndEnum
-Enum 23:20 LSM
+UnsignedEnum 23:20 LSM
0b0000 NI
0b0001 IMP
EndEnum
-Enum 19:16 HPDS
+UnsignedEnum 19:16 HPDS
0b0000 NI
0b0001 AA32HPD
0b0010 HPDS2
EndEnum
-Enum 15:12 CnP
+UnsignedEnum 15:12 CnP
0b0000 NI
0b0001 IMP
EndEnum
-Enum 11:8 XNX
+UnsignedEnum 11:8 XNX
0b0000 NI
0b0001 IMP
EndEnum
-Enum 7:4 AC2
+UnsignedEnum 7:4 AC2
0b0000 NI
0b0001 IMP
EndEnum
-Enum 3:0 SpecSEI
+UnsignedEnum 3:0 SpecSEI
0b0000 NI
0b0001 IMP
EndEnum
@@ -666,77 +666,77 @@ EndSysreg
Sysreg MVFR0_EL1 3 0 0 3 0
Res0 63:32
-Enum 31:28 FPRound
+UnsignedEnum 31:28 FPRound
0b0000 NI
0b0001 IMP
EndEnum
-Enum 27:24 FPShVec
+UnsignedEnum 27:24 FPShVec
0b0000 NI
0b0001 IMP
EndEnum
-Enum 23:20 FPSqrt
+UnsignedEnum 23:20 FPSqrt
0b0000 NI
0b0001 IMP
EndEnum
-Enum 19:16 FPDivide
+UnsignedEnum 19:16 FPDivide
0b0000 NI
0b0001 IMP
EndEnum
-Enum 15:12 FPTrap
+UnsignedEnum 15:12 FPTrap
0b0000 NI
0b0001 IMP
EndEnum
-Enum 11:8 FPDP
+UnsignedEnum 11:8 FPDP
0b0000 NI
0b0001 VFPv2
- 0b0001 VFPv3
+ 0b0010 VFPv3
EndEnum
-Enum 7:4 FPSP
+UnsignedEnum 7:4 FPSP
0b0000 NI
0b0001 VFPv2
- 0b0001 VFPv3
+ 0b0010 VFPv3
EndEnum
Enum 3:0 SIMDReg
0b0000 NI
0b0001 IMP_16x64
- 0b0001 IMP_32x64
+ 0b0010 IMP_32x64
EndEnum
EndSysreg
Sysreg MVFR1_EL1 3 0 0 3 1
Res0 63:32
-Enum 31:28 SIMDFMAC
+UnsignedEnum 31:28 SIMDFMAC
0b0000 NI
0b0001 IMP
EndEnum
-Enum 27:24 FPHP
+UnsignedEnum 27:24 FPHP
0b0000 NI
0b0001 FPHP
0b0010 FPHP_CONV
0b0011 FP16
EndEnum
-Enum 23:20 SIMDHP
+UnsignedEnum 23:20 SIMDHP
0b0000 NI
0b0001 SIMDHP
- 0b0001 SIMDHP_FLOAT
+ 0b0010 SIMDHP_FLOAT
EndEnum
-Enum 19:16 SIMDSP
+UnsignedEnum 19:16 SIMDSP
0b0000 NI
0b0001 IMP
EndEnum
-Enum 15:12 SIMDInt
+UnsignedEnum 15:12 SIMDInt
0b0000 NI
0b0001 IMP
EndEnum
-Enum 11:8 SIMDLS
+UnsignedEnum 11:8 SIMDLS
0b0000 NI
0b0001 IMP
EndEnum
-Enum 7:4 FPDNaN
+UnsignedEnum 7:4 FPDNaN
0b0000 NI
0b0001 IMP
EndEnum
-Enum 3:0 FPFtZ
+UnsignedEnum 3:0 FPFtZ
0b0000 NI
0b0001 IMP
EndEnum
@@ -761,15 +761,15 @@ EndSysreg
Sysreg ID_PFR2_EL1 3 0 0 3 4
Res0 63:12
-Enum 11:8 RAS_frac
+UnsignedEnum 11:8 RAS_frac
0b0000 NI
0b0001 RASv1p1
EndEnum
-Enum 7:4 SSBS
+UnsignedEnum 7:4 SSBS
0b0000 NI
0b0001 IMP
EndEnum
-Enum 3:0 CSV3
+UnsignedEnum 3:0 CSV3
0b0000 NI
0b0001 IMP
EndEnum
@@ -777,7 +777,7 @@ EndSysreg
Sysreg ID_DFR1_EL1 3 0 0 3 5
Res0 63:8
-Enum 7:4 HPMN0
+UnsignedEnum 7:4 HPMN0
0b0000 NI
0b0001 IMP
EndEnum
@@ -790,87 +790,87 @@ EndSysreg
Sysreg ID_MMFR5_EL1 3 0 0 3 6
Res0 63:8
-Enum 7:4 nTLBPA
+UnsignedEnum 7:4 nTLBPA
0b0000 NI
0b0001 IMP
EndEnum
-Enum 3:0 ETS
+UnsignedEnum 3:0 ETS
0b0000 NI
0b0001 IMP
EndEnum
EndSysreg
Sysreg ID_AA64PFR0_EL1 3 0 0 4 0
-Enum 63:60 CSV3
+UnsignedEnum 63:60 CSV3
0b0000 NI
0b0001 IMP
EndEnum
-Enum 59:56 CSV2
+UnsignedEnum 59:56 CSV2
0b0000 NI
0b0001 IMP
0b0010 CSV2_2
0b0011 CSV2_3
EndEnum
-Enum 55:52 RME
+UnsignedEnum 55:52 RME
0b0000 NI
0b0001 IMP
EndEnum
-Enum 51:48 DIT
+UnsignedEnum 51:48 DIT
0b0000 NI
0b0001 IMP
EndEnum
-Enum 47:44 AMU
+UnsignedEnum 47:44 AMU
0b0000 NI
0b0001 IMP
0b0010 V1P1
EndEnum
-Enum 43:40 MPAM
+UnsignedEnum 43:40 MPAM
0b0000 0
0b0001 1
EndEnum
-Enum 39:36 SEL2
+UnsignedEnum 39:36 SEL2
0b0000 NI
0b0001 IMP
EndEnum
-Enum 35:32 SVE
+UnsignedEnum 35:32 SVE
0b0000 NI
0b0001 IMP
EndEnum
-Enum 31:28 RAS
+UnsignedEnum 31:28 RAS
0b0000 NI
0b0001 IMP
0b0010 V1P1
EndEnum
-Enum 27:24 GIC
+UnsignedEnum 27:24 GIC
0b0000 NI
0b0001 IMP
0b0010 V4P1
EndEnum
-Enum 23:20 AdvSIMD
+SignedEnum 23:20 AdvSIMD
0b0000 IMP
0b0001 FP16
0b1111 NI
EndEnum
-Enum 19:16 FP
+SignedEnum 19:16 FP
0b0000 IMP
0b0001 FP16
0b1111 NI
EndEnum
-Enum 15:12 EL3
+UnsignedEnum 15:12 EL3
0b0000 NI
0b0001 IMP
0b0010 AARCH32
EndEnum
-Enum 11:8 EL2
+UnsignedEnum 11:8 EL2
0b0000 NI
0b0001 IMP
0b0010 AARCH32
EndEnum
-Enum 7:4 EL1
+UnsignedEnum 7:4 EL1
0b0001 IMP
0b0010 AARCH32
EndEnum
-Enum 3:0 EL0
+UnsignedEnum 3:0 EL0
0b0001 IMP
0b0010 AARCH32
EndEnum
@@ -878,44 +878,45 @@ EndSysreg
Sysreg ID_AA64PFR1_EL1 3 0 0 4 1
Res0 63:40
-Enum 39:36 NMI
+UnsignedEnum 39:36 NMI
0b0000 NI
0b0001 IMP
EndEnum
-Enum 35:32 CSV2_frac
+UnsignedEnum 35:32 CSV2_frac
0b0000 NI
0b0001 CSV2_1p1
0b0010 CSV2_1p2
EndEnum
-Enum 31:28 RNDR_trap
+UnsignedEnum 31:28 RNDR_trap
0b0000 NI
0b0001 IMP
EndEnum
-Enum 27:24 SME
+UnsignedEnum 27:24 SME
0b0000 NI
0b0001 IMP
+ 0b0010 SME2
EndEnum
Res0 23:20
-Enum 19:16 MPAM_frac
+UnsignedEnum 19:16 MPAM_frac
0b0000 MINOR_0
0b0001 MINOR_1
EndEnum
-Enum 15:12 RAS_frac
+UnsignedEnum 15:12 RAS_frac
0b0000 NI
0b0001 RASv1p1
EndEnum
-Enum 11:8 MTE
+UnsignedEnum 11:8 MTE
0b0000 NI
0b0001 IMP
0b0010 MTE2
0b0011 MTE3
EndEnum
-Enum 7:4 SSBS
+UnsignedEnum 7:4 SSBS
0b0000 NI
0b0001 IMP
0b0010 SSBS2
EndEnum
-Enum 3:0 BT
+UnsignedEnum 3:0 BT
0b0000 NI
0b0001 IMP
EndEnum
@@ -923,45 +924,45 @@ EndSysreg
Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4
Res0 63:60
-Enum 59:56 F64MM
+UnsignedEnum 59:56 F64MM
0b0000 NI
0b0001 IMP
EndEnum
-Enum 55:52 F32MM
+UnsignedEnum 55:52 F32MM
0b0000 NI
0b0001 IMP
EndEnum
Res0 51:48
-Enum 47:44 I8MM
+UnsignedEnum 47:44 I8MM
0b0000 NI
0b0001 IMP
EndEnum
-Enum 43:40 SM4
+UnsignedEnum 43:40 SM4
0b0000 NI
0b0001 IMP
EndEnum
Res0 39:36
-Enum 35:32 SHA3
+UnsignedEnum 35:32 SHA3
0b0000 NI
0b0001 IMP
EndEnum
Res0 31:24
-Enum 23:20 BF16
+UnsignedEnum 23:20 BF16
0b0000 NI
0b0001 IMP
0b0010 EBF16
EndEnum
-Enum 19:16 BitPerm
+UnsignedEnum 19:16 BitPerm
0b0000 NI
0b0001 IMP
EndEnum
Res0 15:8
-Enum 7:4 AES
+UnsignedEnum 7:4 AES
0b0000 NI
0b0001 IMP
0b0010 PMULL128
EndEnum
-Enum 3:0 SVEver
+UnsignedEnum 3:0 SVEver
0b0000 IMP
0b0001 SVE2
0b0010 SVE2p1
@@ -969,38 +970,56 @@ EndEnum
EndSysreg
Sysreg ID_AA64SMFR0_EL1 3 0 0 4 5
-Enum 63 FA64
+UnsignedEnum 63 FA64
0b0 NI
0b1 IMP
EndEnum
Res0 62:60
-Enum 59:56 SMEver
+UnsignedEnum 59:56 SMEver
+ 0b0000 SME
+ 0b0001 SME2
+ 0b0010 SME2p1
0b0000 IMP
EndEnum
-Enum 55:52 I16I64
+UnsignedEnum 55:52 I16I64
0b0000 NI
0b1111 IMP
EndEnum
Res0 51:49
-Enum 48 F64F64
+UnsignedEnum 48 F64F64
0b0 NI
0b1 IMP
EndEnum
-Res0 47:40
-Enum 39:36 I8I32
+UnsignedEnum 47:44 I16I32
+ 0b0000 NI
+ 0b0101 IMP
+EndEnum
+UnsignedEnum 43 B16B16
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 42 F16F16
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Res0 41:40
+UnsignedEnum 39:36 I8I32
0b0000 NI
0b1111 IMP
EndEnum
-Enum 35 F16F32
+UnsignedEnum 35 F16F32
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 34 B16F32
0b0 NI
0b1 IMP
EndEnum
-Enum 34 B16F32
+UnsignedEnum 33 BI32I32
0b0 NI
0b1 IMP
EndEnum
-Res0 33
-Enum 32 F32F32
+UnsignedEnum 32 F32F32
0b0 NI
0b1 IMP
EndEnum
@@ -1013,7 +1032,7 @@ Enum 63:60 HPMN0
0b0001 DEF
EndEnum
Res0 59:56
-Enum 55:52 BRBE
+UnsignedEnum 55:52 BRBE
0b0000 NI
0b0001 IMP
0b0010 BRBE_V1P1
@@ -1023,19 +1042,19 @@ Enum 51:48 MTPMU
0b0001 IMP
0b1111 NI
EndEnum
-Enum 47:44 TraceBuffer
+UnsignedEnum 47:44 TraceBuffer
0b0000 NI
0b0001 IMP
EndEnum
-Enum 43:40 TraceFilt
+UnsignedEnum 43:40 TraceFilt
0b0000 NI
0b0001 IMP
EndEnum
-Enum 39:36 DoubleLock
+UnsignedEnum 39:36 DoubleLock
0b0000 IMP
0b1111 NI
EndEnum
-Enum 35:32 PMSVer
+UnsignedEnum 35:32 PMSVer
0b0000 NI
0b0001 IMP
0b0010 V1P1
@@ -1047,7 +1066,7 @@ Res0 27:24
Field 23:20 WRPs
Res0 19:16
Field 15:12 BRPs
-Enum 11:8 PMUVer
+UnsignedEnum 11:8 PMUVer
0b0000 NI
0b0001 IMP
0b0100 V3P1
@@ -1057,11 +1076,11 @@ Enum 11:8 PMUVer
0b1000 V3P8
0b1111 IMP_DEF
EndEnum
-Enum 7:4 TraceVer
+UnsignedEnum 7:4 TraceVer
0b0000 NI
0b0001 IMP
EndEnum
-Enum 3:0 DebugVer
+UnsignedEnum 3:0 DebugVer
0b0110 IMP
0b0111 VHE
0b1000 V8P2
@@ -1091,66 +1110,66 @@ Res0 63:0
EndSysreg
Sysreg ID_AA64ISAR0_EL1 3 0 0 6 0
-Enum 63:60 RNDR
+UnsignedEnum 63:60 RNDR
0b0000 NI
0b0001 IMP
EndEnum
-Enum 59:56 TLB
+UnsignedEnum 59:56 TLB
0b0000 NI
0b0001 OS
0b0010 RANGE
EndEnum
-Enum 55:52 TS
+UnsignedEnum 55:52 TS
0b0000 NI
0b0001 FLAGM
0b0010 FLAGM2
EndEnum
-Enum 51:48 FHM
+UnsignedEnum 51:48 FHM
0b0000 NI
0b0001 IMP
EndEnum
-Enum 47:44 DP
+UnsignedEnum 47:44 DP
0b0000 NI
0b0001 IMP
EndEnum
-Enum 43:40 SM4
+UnsignedEnum 43:40 SM4
0b0000 NI
0b0001 IMP
EndEnum
-Enum 39:36 SM3
+UnsignedEnum 39:36 SM3
0b0000 NI
0b0001 IMP
EndEnum
-Enum 35:32 SHA3
+UnsignedEnum 35:32 SHA3
0b0000 NI
0b0001 IMP
EndEnum
-Enum 31:28 RDM
+UnsignedEnum 31:28 RDM
0b0000 NI
0b0001 IMP
EndEnum
-Enum 27:24 TME
+UnsignedEnum 27:24 TME
0b0000 NI
0b0001 IMP
EndEnum
-Enum 23:20 ATOMIC
+UnsignedEnum 23:20 ATOMIC
0b0000 NI
0b0010 IMP
EndEnum
-Enum 19:16 CRC32
+UnsignedEnum 19:16 CRC32
0b0000 NI
0b0001 IMP
EndEnum
-Enum 15:12 SHA2
+UnsignedEnum 15:12 SHA2
0b0000 NI
0b0001 SHA256
0b0010 SHA512
EndEnum
-Enum 11:8 SHA1
+UnsignedEnum 11:8 SHA1
0b0000 NI
0b0001 IMP
EndEnum
-Enum 7:4 AES
+UnsignedEnum 7:4 AES
0b0000 NI
0b0001 AES
0b0010 PMULL
@@ -1159,63 +1178,63 @@ Res0 3:0
EndSysreg
Sysreg ID_AA64ISAR1_EL1 3 0 0 6 1
-Enum 63:60 LS64
+UnsignedEnum 63:60 LS64
0b0000 NI
0b0001 LS64
0b0010 LS64_V
0b0011 LS64_ACCDATA
EndEnum
-Enum 59:56 XS
+UnsignedEnum 59:56 XS
0b0000 NI
0b0001 IMP
EndEnum
-Enum 55:52 I8MM
+UnsignedEnum 55:52 I8MM
0b0000 NI
0b0001 IMP
EndEnum
-Enum 51:48 DGH
+UnsignedEnum 51:48 DGH
0b0000 NI
0b0001 IMP
EndEnum
-Enum 47:44 BF16
+UnsignedEnum 47:44 BF16
0b0000 NI
0b0001 IMP
0b0010 EBF16
EndEnum
-Enum 43:40 SPECRES
+UnsignedEnum 43:40 SPECRES
0b0000 NI
0b0001 IMP
EndEnum
-Enum 39:36 SB
+UnsignedEnum 39:36 SB
0b0000 NI
0b0001 IMP
EndEnum
-Enum 35:32 FRINTTS
+UnsignedEnum 35:32 FRINTTS
0b0000 NI
0b0001 IMP
EndEnum
-Enum 31:28 GPI
+UnsignedEnum 31:28 GPI
0b0000 NI
0b0001 IMP
EndEnum
-Enum 27:24 GPA
+UnsignedEnum 27:24 GPA
0b0000 NI
0b0001 IMP
EndEnum
-Enum 23:20 LRCPC
+UnsignedEnum 23:20 LRCPC
0b0000 NI
0b0001 IMP
0b0010 LRCPC2
EndEnum
-Enum 19:16 FCMA
+UnsignedEnum 19:16 FCMA
0b0000 NI
0b0001 IMP
EndEnum
-Enum 15:12 JSCVT
+UnsignedEnum 15:12 JSCVT
0b0000 NI
0b0001 IMP
EndEnum
-Enum 11:8 API
+UnsignedEnum 11:8 API
0b0000 NI
0b0001 PAuth
0b0010 EPAC
@@ -1223,7 +1242,7 @@ Enum 11:8 API
0b0100 FPAC
0b0101 FPACCOMBINE
EndEnum
-Enum 7:4 APA
+UnsignedEnum 7:4 APA
0b0000 NI
0b0001 PAuth
0b0010 EPAC
@@ -1231,7 +1250,7 @@ Enum 7:4 APA
0b0100 FPAC
0b0101 FPACCOMBINE
EndEnum
-Enum 3:0 DPB
+UnsignedEnum 3:0 DPB
0b0000 NI
0b0001 IMP
0b0010 DPB2
@@ -1240,28 +1259,28 @@ EndSysreg
Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2
Res0 63:56
-Enum 55:52 CSSC
+UnsignedEnum 55:52 CSSC
0b0000 NI
0b0001 IMP
EndEnum
-Enum 51:48 RPRFM
+UnsignedEnum 51:48 RPRFM
0b0000 NI
0b0001 IMP
EndEnum
Res0 47:28
-Enum 27:24 PAC_frac
+UnsignedEnum 27:24 PAC_frac
0b0000 NI
0b0001 IMP
EndEnum
-Enum 23:20 BC
+UnsignedEnum 23:20 BC
0b0000 NI
0b0001 IMP
EndEnum
-Enum 19:16 MOPS
+UnsignedEnum 19:16 MOPS
0b0000 NI
0b0001 IMP
EndEnum
-Enum 15:12 APA3
+UnsignedEnum 15:12 APA3
0b0000 NI
0b0001 PAuth
0b0010 EPAC
@@ -1269,32 +1288,32 @@ Enum 15:12 APA3
0b0100 FPAC
0b0101 FPACCOMBINE
EndEnum
-Enum 11:8 GPA3
+UnsignedEnum 11:8 GPA3
0b0000 NI
0b0001 IMP
EndEnum
-Enum 7:4 RPRES
+UnsignedEnum 7:4 RPRES
0b0000 NI
0b0001 IMP
EndEnum
-Enum 3:0 WFxT
+UnsignedEnum 3:0 WFxT
0b0000 NI
0b0010 IMP
EndEnum
EndSysreg
Sysreg ID_AA64MMFR0_EL1 3 0 0 7 0
-Enum 63:60 ECV
+UnsignedEnum 63:60 ECV
0b0000 NI
0b0001 IMP
0b0010 CNTPOFF
EndEnum
-Enum 59:56 FGT
+UnsignedEnum 59:56 FGT
0b0000 NI
0b0001 IMP
EndEnum
Res0 55:48
-Enum 47:44 EXS
+UnsignedEnum 47:44 EXS
0b0000 NI
0b0001 IMP
EndEnum
@@ -1329,15 +1348,15 @@ Enum 23:20 TGRAN16
0b0001 IMP
0b0010 52_BIT
EndEnum
-Enum 19:16 BIGENDEL0
+UnsignedEnum 19:16 BIGENDEL0
0b0000 NI
0b0001 IMP
EndEnum
-Enum 15:12 SNSMEM
+UnsignedEnum 15:12 SNSMEM
0b0000 NI
0b0001 IMP
EndEnum
-Enum 11:8 BIGEND
+UnsignedEnum 11:8 BIGEND
0b0000 NI
0b0001 IMP
EndEnum
@@ -1357,62 +1376,62 @@ EndEnum
EndSysreg
Sysreg ID_AA64MMFR1_EL1 3 0 0 7 1
-Enum 63:60 ECBHB
+UnsignedEnum 63:60 ECBHB
0b0000 NI
0b0001 IMP
EndEnum
-Enum 59:56 CMOW
+UnsignedEnum 59:56 CMOW
0b0000 NI
0b0001 IMP
EndEnum
-Enum 55:52 TIDCP1
+UnsignedEnum 55:52 TIDCP1
0b0000 NI
0b0001 IMP
EndEnum
-Enum 51:48 nTLBPA
+UnsignedEnum 51:48 nTLBPA
0b0000 NI
0b0001 IMP
EndEnum
-Enum 47:44 AFP
+UnsignedEnum 47:44 AFP
0b0000 NI
0b0001 IMP
EndEnum
-Enum 43:40 HCX
+UnsignedEnum 43:40 HCX
0b0000 NI
0b0001 IMP
EndEnum
-Enum 39:36 ETS
+UnsignedEnum 39:36 ETS
0b0000 NI
0b0001 IMP
EndEnum
-Enum 35:32 TWED
+UnsignedEnum 35:32 TWED
0b0000 NI
0b0001 IMP
EndEnum
-Enum 31:28 XNX
+UnsignedEnum 31:28 XNX
0b0000 NI
0b0001 IMP
EndEnum
-Enum 27:24 SpecSEI
+UnsignedEnum 27:24 SpecSEI
0b0000 NI
0b0001 IMP
EndEnum
-Enum 23:20 PAN
+UnsignedEnum 23:20 PAN
0b0000 NI
0b0001 IMP
0b0010 PAN2
0b0011 PAN3
EndEnum
-Enum 19:16 LO
+UnsignedEnum 19:16 LO
0b0000 NI
0b0001 IMP
EndEnum
-Enum 15:12 HPDS
+UnsignedEnum 15:12 HPDS
0b0000 NI
0b0001 IMP
0b0010 HPDS2
EndEnum
-Enum 11:8 VH
+UnsignedEnum 11:8 VH
0b0000 NI
0b0001 IMP
EndEnum
@@ -1420,7 +1439,7 @@ Enum 7:4 VMIDBits
0b0000 8
0b0010 16
EndEnum
-Enum 3:0 HAFDBS
+UnsignedEnum 3:0 HAFDBS
0b0000 NI
0b0001 AF
0b0010 DBM
@@ -1428,26 +1447,26 @@ EndEnum
EndSysreg
Sysreg ID_AA64MMFR2_EL1 3 0 0 7 2
-Enum 63:60 E0PD
+UnsignedEnum 63:60 E0PD
0b0000 NI
0b0001 IMP
EndEnum
-Enum 59:56 EVT
+UnsignedEnum 59:56 EVT
0b0000 NI
0b0001 IMP
0b0010 TTLBxS
EndEnum
-Enum 55:52 BBM
+UnsignedEnum 55:52 BBM
0b0000 0
0b0001 1
0b0010 2
EndEnum
-Enum 51:48 TTL
+UnsignedEnum 51:48 TTL
0b0000 NI
0b0001 IMP
EndEnum
Res0 47:44
-Enum 43:40 FWB
+UnsignedEnum 43:40 FWB
0b0000 NI
0b0001 IMP
EndEnum
@@ -1455,7 +1474,7 @@ Enum 39:36 IDS
0b0000 0x0
0b0001 0x18
EndEnum
-Enum 35:32 AT
+UnsignedEnum 35:32 AT
0b0000 NI
0b0001 IMP
EndEnum
@@ -1463,7 +1482,7 @@ Enum 31:28 ST
0b0000 39
0b0001 48_47
EndEnum
-Enum 27:24 NV
+UnsignedEnum 27:24 NV
0b0000 NI
0b0001 IMP
0b0010 NV2
@@ -1476,19 +1495,19 @@ Enum 19:16 VARange
0b0000 48
0b0001 52
EndEnum
-Enum 15:12 IESB
+UnsignedEnum 15:12 IESB
0b0000 NI
0b0001 IMP
EndEnum
-Enum 11:8 LSM
+UnsignedEnum 11:8 LSM
0b0000 NI
0b0001 IMP
EndEnum
-Enum 7:4 UAO
+UnsignedEnum 7:4 UAO
0b0000 NI
0b0001 IMP
EndEnum
-Enum 3:0 CnP
+UnsignedEnum 3:0 CnP
0b0000 NI
0b0001 IMP
EndEnum
@@ -1599,7 +1618,8 @@ EndSysreg
SysregFields SMCR_ELx
Res0 63:32
Field 31 FA64
-Res0 30:9
+Field 30 EZT0
+Res0 29:9
Raz 8:4
Field 3:0 LEN
EndSysregFields
@@ -1618,6 +1638,130 @@ Sysreg FAR_EL1 3 0 6 0 0
Field 63:0 ADDR
EndSysreg
+Sysreg PMSCR_EL1 3 0 9 9 0
+Res0 63:8
+Field 7:6 PCT
+Field 5 TS
+Field 4 PA
+Field 3 CX
+Res0 2
+Field 1 E1SPE
+Field 0 E0SPE
+EndSysreg
+
+Sysreg PMSNEVFR_EL1 3 0 9 9 1
+Field 63:0 E
+EndSysreg
+
+Sysreg PMSICR_EL1 3 0 9 9 2
+Field 63:56 ECOUNT
+Res0 55:32
+Field 31:0 COUNT
+EndSysreg
+
+Sysreg PMSIRR_EL1 3 0 9 9 3
+Res0 63:32
+Field 31:8 INTERVAL
+Res0 7:1
+Field 0 RND
+EndSysreg
+
+Sysreg PMSFCR_EL1 3 0 9 9 4
+Res0 63:19
+Field 18 ST
+Field 17 LD
+Field 16 B
+Res0 15:4
+Field 3 FnE
+Field 2 FL
+Field 1 FT
+Field 0 FE
+EndSysreg
+
+Sysreg PMSEVFR_EL1 3 0 9 9 5
+Field 63:0 E
+EndSysreg
+
+Sysreg PMSLATFR_EL1 3 0 9 9 6
+Res0 63:16
+Field 15:0 MINLAT
+EndSysreg
+
+Sysreg PMSIDR_EL1 3 0 9 9 7
+Res0 63:25
+Field 24 PBT
+Field 23:20 FORMAT
+Enum 19:16 COUNTSIZE
+ 0b0010 12_BIT_SAT
+ 0b0011 16_BIT_SAT
+EndEnum
+Field 15:12 MAXSIZE
+Enum 11:8 INTERVAL
+ 0b0000 256
+ 0b0010 512
+ 0b0011 768
+ 0b0100 1024
+ 0b0101 1536
+ 0b0110 2048
+ 0b0111 3072
+ 0b1000 4096
+EndEnum
+Res0 7
+Field 6 FnE
+Field 5 ERND
+Field 4 LDS
+Field 3 ARCHINST
+Field 2 FL
+Field 1 FT
+Field 0 FE
+EndSysreg
+
+Sysreg PMBLIMITR_EL1 3 0 9 10 0
+Field 63:12 LIMIT
+Res0 11:6
+Field 5 PMFZ
+Res0 4:3
+Enum 2:1 FM
+ 0b00 FILL
+ 0b10 DISCARD
+EndEnum
+Field 0 E
+EndSysreg
+
+Sysreg PMBPTR_EL1 3 0 9 10 1
+Field 63:0 PTR
+EndSysreg
+
+Sysreg PMBSR_EL1 3 0 9 10 3
+Res0 63:32
+Enum 31:26 EC
+ 0b000000 BUF
+ 0b100100 FAULT_S1
+ 0b100101 FAULT_S2
+ 0b011110 FAULT_GPC
+ 0b011111 IMP_DEF
+EndEnum
+Res0 25:20
+Field 19 DL
+Field 18 EA
+Field 17 S
+Field 16 COLL
+Field 15:0 MSS
+EndSysreg
+
+Sysreg PMBIDR_EL1 3 0 9 10 7
+Res0 63:12
+Enum 11:8 EA
+ 0b0000 NotDescribed
+ 0b0001 Ignored
+ 0b0010 SError
+EndEnum
+Res0 7:6
+Field 5 F
+Field 4 P
+Field 3:0 ALIGN
+EndSysreg
+
SysregFields CONTEXTIDR_ELx
Res0 63:32
Field 31:0 PROCID
@@ -1772,6 +1916,21 @@ Sysreg FAR_EL2 3 4 6 0 0
Field 63:0 ADDR
EndSysreg
+Sysreg PMSCR_EL2 3 4 9 9 0
+Res0 63:8
+Enum 7:6 PCT
+ 0b00 VIRT
+ 0b01 PHYS
+ 0b11 GUEST
+EndEnum
+Field 5 TS
+Field 4 PA
+Field 3 CX
+Res0 2
+Field 1 E2SPE
+Field 0 E0HSPE
+EndSysreg
+
Sysreg CONTEXTIDR_EL2 3 4 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
@@ -1842,3 +2001,18 @@ Field 23:16 LD
Res0 15:8
Field 7:0 LR
EndSysreg
+
+Sysreg ISR_EL1 3 0 12 1 0
+Res0 63:11
+Field 10 IS
+Field 9 FS
+Field 8 A
+Field 7 I
+Field 6 F
+Res0 5:0
+EndSysreg
+
+Sysreg ICC_NMIAR1_EL1 3 0 12 9 5
+Res0 63:24
+Field 23:0 INTID
+EndSysreg
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index c4b1947ebf76..288003a9f0ca 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -841,7 +841,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
if (ret < 0)
return ret;
- move_imm(ctx, t1, func_addr, is32);
+ move_addr(ctx, t1, func_addr);
emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0);
move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
break;
diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h
index ca708024fdd3..c335dc4eed37 100644
--- a/arch/loongarch/net/bpf_jit.h
+++ b/arch/loongarch/net/bpf_jit.h
@@ -82,6 +82,27 @@ static inline void emit_sext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, boo
emit_insn(ctx, addiw, reg, reg, 0);
}
+static inline void move_addr(struct jit_ctx *ctx, enum loongarch_gpr rd, u64 addr)
+{
+ u64 imm_11_0, imm_31_12, imm_51_32, imm_63_52;
+
+ /* lu12iw rd, imm_31_12 */
+ imm_31_12 = (addr >> 12) & 0xfffff;
+ emit_insn(ctx, lu12iw, rd, imm_31_12);
+
+ /* ori rd, rd, imm_11_0 */
+ imm_11_0 = addr & 0xfff;
+ emit_insn(ctx, ori, rd, rd, imm_11_0);
+
+ /* lu32id rd, imm_51_32 */
+ imm_51_32 = (addr >> 32) & 0xfffff;
+ emit_insn(ctx, lu32id, rd, imm_51_32);
+
+ /* lu52id rd, rd, imm_63_52 */
+ imm_63_52 = (addr >> 52) & 0xfff;
+ emit_insn(ctx, lu52id, rd, rd, imm_63_52);
+}
+
static inline void move_imm(struct jit_ctx *ctx, enum loongarch_gpr rd, long imm, bool is32)
{
long imm_11_0, imm_31_12, imm_51_32, imm_63_52, imm_51_0, imm_51_31;
diff --git a/arch/m68k/68000/entry.S b/arch/m68k/68000/entry.S
index 997b54933015..7d63e2f1555a 100644
--- a/arch/m68k/68000/entry.S
+++ b/arch/m68k/68000/entry.S
@@ -45,6 +45,8 @@ do_trace:
jbsr syscall_trace_enter
RESTORE_SWITCH_STACK
addql #4,%sp
+ addql #1,%d0
+ jeq ret_from_exception
movel %sp@(PT_OFF_ORIG_D0),%d1
movel #-ENOSYS,%d0
cmpl #NR_syscalls,%d1
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 7bff88118507..82154952e574 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -18,6 +18,8 @@ config M68K
select GENERIC_CPU_DEVICES
select GENERIC_IOMAP
select GENERIC_IRQ_SHOW
+ select HAVE_ARCH_SECCOMP
+ select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ASM_MODVERSIONS
select HAVE_DEBUG_BUGVERBOSE
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED
diff --git a/arch/m68k/Kconfig.devices b/arch/m68k/Kconfig.devices
index 6a87b4a5fcac..e6e3efac1840 100644
--- a/arch/m68k/Kconfig.devices
+++ b/arch/m68k/Kconfig.devices
@@ -19,6 +19,7 @@ config HEARTBEAT
# We have a dedicated heartbeat LED. :-)
config PROC_HARDWARE
bool "/proc/hardware support"
+ depends on PROC_FS
help
Say Y here to support the /proc/hardware file, which gives you
access to information about the machine you're running on,
diff --git a/arch/m68k/coldfire/entry.S b/arch/m68k/coldfire/entry.S
index 9f337c70243a..35104c5417ff 100644
--- a/arch/m68k/coldfire/entry.S
+++ b/arch/m68k/coldfire/entry.S
@@ -90,6 +90,8 @@ ENTRY(system_call)
jbsr syscall_trace_enter
RESTORE_SWITCH_STACK
addql #4,%sp
+ addql #1,%d0
+ jeq ret_from_exception
movel %d3,%a0
jbsr %a0@
movel %d0,%sp@(PT_OFF_D0) /* save the return value */
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 7b49fe6f7cb3..ec2d792015a4 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -108,7 +108,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -630,7 +629,6 @@ CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_STRING_SELFTEST=m
CONFIG_TEST_STRING_HELPERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
@@ -638,7 +636,6 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 656a06d97d4c..061a07824dc2 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -104,7 +104,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -586,7 +585,6 @@ CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_STRING_SELFTEST=m
CONFIG_TEST_STRING_HELPERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
@@ -594,7 +592,6 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 8972b822875d..02af5f501dae 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -111,7 +111,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -607,7 +606,6 @@ CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_STRING_SELFTEST=m
CONFIG_TEST_STRING_HELPERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
@@ -615,7 +613,6 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 7b86e1277a1a..0d5832cb3e10 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -101,7 +101,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -578,7 +577,6 @@ CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_STRING_SELFTEST=m
CONFIG_TEST_STRING_HELPERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
@@ -586,7 +584,6 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index d0d5c0a9aee6..c246c3538839 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -103,7 +103,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -588,7 +587,6 @@ CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_STRING_SELFTEST=m
CONFIG_TEST_STRING_HELPERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
@@ -596,7 +594,6 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index ac1d0c86b6ff..98d2d0599e5a 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -102,7 +102,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -609,7 +608,6 @@ CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_STRING_SELFTEST=m
CONFIG_TEST_STRING_HELPERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
@@ -617,7 +615,6 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index c5f7603c9830..b2d5ec6ba625 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -122,7 +122,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -315,6 +314,7 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
+CONFIG_PCCARD=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -695,7 +695,6 @@ CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_STRING_SELFTEST=m
CONFIG_TEST_STRING_HELPERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
@@ -703,7 +702,6 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 26f5b59e3bc0..d3420c642992 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -100,7 +100,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -577,7 +576,6 @@ CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_STRING_SELFTEST=m
CONFIG_TEST_STRING_HELPERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
@@ -585,7 +583,6 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 3045c7f0bde3..e294b0b67695 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -101,7 +101,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -578,7 +577,6 @@ CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_STRING_SELFTEST=m
CONFIG_TEST_STRING_HELPERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
@@ -586,7 +584,6 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index f2a486be651b..764a94b08936 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -102,7 +102,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -596,7 +595,6 @@ CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_STRING_SELFTEST=m
CONFIG_TEST_STRING_HELPERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
@@ -604,7 +602,6 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 8a7db7be10c0..d4eeddac6bb8 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -98,7 +98,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -576,7 +575,6 @@ CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_STRING_SELFTEST=m
CONFIG_TEST_STRING_HELPERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
@@ -584,7 +582,6 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 7ed49ee0b9e0..ca359b880683 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -98,7 +98,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
@@ -576,7 +575,6 @@ CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_STRING_SELFTEST=m
CONFIG_TEST_STRING_HELPERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
@@ -584,7 +582,6 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/include/asm/gpio.h b/arch/m68k/include/asm/gpio.h
index a50b27719a58..5cfc0996ba94 100644
--- a/arch/m68k/include/asm/gpio.h
+++ b/arch/m68k/include/asm/gpio.h
@@ -66,13 +66,6 @@ static inline int gpio_to_irq(unsigned gpio)
return __gpio_to_irq(gpio);
}
-static inline int irq_to_gpio(unsigned irq)
-{
- return (irq >= MCFGPIO_IRQ_VECBASE &&
- irq < (MCFGPIO_IRQ_VECBASE + MCFGPIO_IRQ_MAX)) ?
- irq - MCFGPIO_IRQ_VECBASE : -ENXIO;
-}
-
static inline int gpio_cansleep(unsigned gpio)
{
return gpio < MCFGPIO_PIN_MAX ? 0 : __gpio_cansleep(gpio);
diff --git a/arch/m68k/include/asm/seccomp.h b/arch/m68k/include/asm/seccomp.h
new file mode 100644
index 000000000000..de8a94e1fb3f
--- /dev/null
+++ b/arch/m68k/include/asm/seccomp.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm-generic/seccomp.h>
+
+#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_M68K
+#define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+#define SECCOMP_ARCH_NATIVE_NAME "m68k"
+
+#endif /* _ASM_SECCOMP_H */
diff --git a/arch/m68k/include/asm/syscall.h b/arch/m68k/include/asm/syscall.h
index 465ac039be09..d1453e850cdd 100644
--- a/arch/m68k/include/asm/syscall.h
+++ b/arch/m68k/include/asm/syscall.h
@@ -4,6 +4,63 @@
#include <uapi/linux/audit.h>
+#include <asm/unistd.h>
+
+extern const unsigned long sys_call_table[];
+
+static inline int syscall_get_nr(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return regs->orig_d0;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ regs->d0 = regs->orig_d0;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ unsigned long error = regs->d0;
+
+ return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return regs->d0;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+ struct pt_regs *regs,
+ int error, long val)
+{
+ regs->d0 = (long)error ?: val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned long *args)
+{
+ args[0] = regs->orig_d0;
+ args++;
+
+ memcpy(args, &regs->d1, 5 * sizeof(args[0]));
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned long *args)
+{
+ regs->orig_d0 = args[0];
+ args++;
+
+ memcpy(&regs->d1, args, 5 * sizeof(args[0]));
+}
+
static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_M68K;
diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h
index c952658ba792..31be2ad999ca 100644
--- a/arch/m68k/include/asm/thread_info.h
+++ b/arch/m68k/include/asm/thread_info.h
@@ -61,6 +61,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_NOTIFY_RESUME 5 /* callback before returning to user */
#define TIF_SIGPENDING 6 /* signal pending */
#define TIF_NEED_RESCHED 7 /* rescheduling necessary */
+#define TIF_SECCOMP 13 /* seccomp syscall filtering active */
#define TIF_DELAYED_TRACE 14 /* single step a syscall */
#define TIF_SYSCALL_TRACE 15 /* syscall trace active */
#define TIF_MEMDIE 16 /* is terminating due to OOM killer */
@@ -69,6 +70,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_DELAYED_TRACE (1 << TIF_DELAYED_TRACE)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_MEMDIE (1 << TIF_MEMDIE)
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 18f278bdbd21..4dd2fd7acba9 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -184,9 +184,12 @@ do_trace_entry:
jbsr syscall_trace_enter
RESTORE_SWITCH_STACK
addql #4,%sp
+ addql #1,%d0 | optimization for cmpil #-1,%d0
+ jeq ret_from_syscall
movel %sp@(PT_OFF_ORIG_D0),%d0
cmpl #NR_syscalls,%d0
jcs syscall
+ jra ret_from_syscall
badsys:
movel #-ENOSYS,%sp@(PT_OFF_D0)
jra ret_from_syscall
@@ -211,6 +214,9 @@ ENTRY(system_call)
| syscall trace?
tstb %a1@(TINFO_FLAGS+2)
jmi do_trace_entry
+ | seccomp filter active?
+ btst #5,%a1@(TINFO_FLAGS+2)
+ bnes do_trace_entry
cmpl #NR_syscalls,%d0
jcc badsys
syscall:
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index 0a4184a37461..cd0172d29430 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -21,7 +21,7 @@
#include <linux/signal.h>
#include <linux/regset.h>
#include <linux/elf.h>
-
+#include <linux/seccomp.h>
#include <linux/uaccess.h>
#include <asm/page.h>
#include <asm/processor.h>
@@ -278,6 +278,10 @@ asmlinkage int syscall_trace_enter(void)
if (test_thread_flag(TIF_SYSCALL_TRACE))
ret = ptrace_report_syscall_entry(task_pt_regs(current));
+
+ if (secure_computing() == -1)
+ return -1;
+
return ret;
}
diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c
index d15057d34e56..127d7ecdbd49 100644
--- a/arch/m68k/q40/q40ints.c
+++ b/arch/m68k/q40/q40ints.c
@@ -201,8 +201,8 @@ static int ccleirq=60; /* ISA dev IRQs*/
#define DEBUG_Q40INT
/*#define IP_USE_DISABLE *//* would be nice, but crashes ???? */
-static int mext_disabled=0; /* ext irq disabled by master chip? */
-static int aliased_irq=0; /* how many times inside handler ?*/
+static int mext_disabled; /* ext irq disabled by master chip? */
+static int aliased_irq; /* how many times inside handler ?*/
/* got interrupt, dispatch to ISA or keyboard/timer IRQs */
diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
index 9a7d7346001e..f433121774c0 100644
--- a/arch/riscv/include/asm/patch.h
+++ b/arch/riscv/include/asm/patch.h
@@ -7,6 +7,6 @@
#define _ASM_RISCV_PATCH_H
int patch_text_nosync(void *addr, const void *insns, size_t len);
-int patch_text(void *addr, u32 insn);
+int patch_text(void *addr, u32 *insns, int ninsns);
#endif /* _ASM_RISCV_PATCH_H */
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 765004b60513..8086d1a281cd 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -15,7 +15,8 @@
struct patch_insn {
void *addr;
- u32 insn;
+ u32 *insns;
+ int ninsns;
atomic_t cpu_count;
};
@@ -102,12 +103,15 @@ NOKPROBE_SYMBOL(patch_text_nosync);
static int patch_text_cb(void *data)
{
struct patch_insn *patch = data;
- int ret = 0;
+ unsigned long len;
+ int i, ret = 0;
if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
- ret =
- patch_text_nosync(patch->addr, &patch->insn,
- GET_INSN_LENGTH(patch->insn));
+ for (i = 0; ret == 0 && i < patch->ninsns; i++) {
+ len = GET_INSN_LENGTH(patch->insns[i]);
+ ret = patch_text_nosync(patch->addr + i * len,
+ &patch->insns[i], len);
+ }
atomic_inc(&patch->cpu_count);
} else {
while (atomic_read(&patch->cpu_count) <= num_online_cpus())
@@ -119,11 +123,12 @@ static int patch_text_cb(void *data)
}
NOKPROBE_SYMBOL(patch_text_cb);
-int patch_text(void *addr, u32 insn)
+int patch_text(void *addr, u32 *insns, int ninsns)
{
struct patch_insn patch = {
.addr = addr,
- .insn = insn,
+ .insns = insns,
+ .ninsns = ninsns,
.cpu_count = ATOMIC_INIT(0),
};
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index 2bedec37d092..2f08c14a933d 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -23,13 +23,14 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
+ u32 insn = __BUG_INSN_32;
unsigned long offset = GET_INSN_LENGTH(p->opcode);
p->ainsn.api.restore = (unsigned long)p->addr + offset;
- patch_text(p->ainsn.api.insn, p->opcode);
+ patch_text(p->ainsn.api.insn, &p->opcode, 1);
patch_text((void *)((unsigned long)(p->ainsn.api.insn) + offset),
- __BUG_INSN_32);
+ &insn, 1);
}
static void __kprobes arch_prepare_simulate(struct kprobe *p)
@@ -116,16 +117,16 @@ void *alloc_insn_page(void)
/* install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
- if ((p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32)
- patch_text(p->addr, __BUG_INSN_32);
- else
- patch_text(p->addr, __BUG_INSN_16);
+ u32 insn = (p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32 ?
+ __BUG_INSN_32 : __BUG_INSN_16;
+
+ patch_text(p->addr, &insn, 1);
}
/* remove breakpoint from text */
void __kprobes arch_disarm_kprobe(struct kprobe *p)
{
- patch_text(p->addr, p->opcode);
+ patch_text(p->addr, &p->opcode, 1);
}
void __kprobes arch_remove_kprobe(struct kprobe *p)
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index d926e0f7ef57..bf9802a63061 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -573,6 +573,11 @@ static inline u32 rv_fence(u8 pred, u8 succ)
return rv_i_insn(imm11_0, 0, 0, 0, 0xf);
}
+static inline u32 rv_nop(void)
+{
+ return rv_i_insn(0, 0, 0, 0, 0x13);
+}
+
/* RVC instrutions. */
static inline u16 rvc_addi4spn(u8 rd, u32 imm10)
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index f2417ac54edd..f5a668736c79 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -8,6 +8,8 @@
#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/memory.h>
+#include <linux/stop_machine.h>
#include "bpf_jit.h"
#define RV_REG_TCC RV_REG_A6
@@ -238,7 +240,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
if (!is_tail_call)
emit_mv(RV_REG_A0, RV_REG_A5, ctx);
emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
- is_tail_call ? 4 : 0, /* skip TCC init */
+ is_tail_call ? 20 : 0, /* skip reserved nops and TCC init */
ctx);
}
@@ -428,12 +430,12 @@ static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
*rd = RV_REG_T2;
}
-static int emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr,
+static int emit_jump_and_link(u8 rd, s64 rvoff, bool fixed_addr,
struct rv_jit_context *ctx)
{
s64 upper, lower;
- if (rvoff && is_21b_int(rvoff) && !force_jalr) {
+ if (rvoff && fixed_addr && is_21b_int(rvoff)) {
emit(rv_jal(rd, rvoff >> 1), ctx);
return 0;
} else if (in_auipc_jalr_range(rvoff)) {
@@ -454,24 +456,17 @@ static bool is_signed_bpf_cond(u8 cond)
cond == BPF_JSGE || cond == BPF_JSLE;
}
-static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
+static int emit_call(u64 addr, bool fixed_addr, struct rv_jit_context *ctx)
{
s64 off = 0;
u64 ip;
- u8 rd;
- int ret;
if (addr && ctx->insns) {
ip = (u64)(long)(ctx->insns + ctx->ninsns);
off = addr - ip;
}
- ret = emit_jump_and_link(RV_REG_RA, off, !fixed, ctx);
- if (ret)
- return ret;
- rd = bpf_to_rv_reg(BPF_REG_0, ctx);
- emit_mv(rd, RV_REG_A0, ctx);
- return 0;
+ return emit_jump_and_link(RV_REG_RA, off, fixed_addr, ctx);
}
static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
@@ -622,6 +617,401 @@ static int add_exception_handler(const struct bpf_insn *insn,
return 0;
}
+static int gen_call_or_nops(void *target, void *ip, u32 *insns)
+{
+ s64 rvoff;
+ int i, ret;
+ struct rv_jit_context ctx;
+
+ ctx.ninsns = 0;
+ ctx.insns = (u16 *)insns;
+
+ if (!target) {
+ for (i = 0; i < 4; i++)
+ emit(rv_nop(), &ctx);
+ return 0;
+ }
+
+ rvoff = (s64)(target - (ip + 4));
+ emit(rv_sd(RV_REG_SP, -8, RV_REG_RA), &ctx);
+ ret = emit_jump_and_link(RV_REG_RA, rvoff, false, &ctx);
+ if (ret)
+ return ret;
+ emit(rv_ld(RV_REG_RA, -8, RV_REG_SP), &ctx);
+
+ return 0;
+}
+
+static int gen_jump_or_nops(void *target, void *ip, u32 *insns)
+{
+ s64 rvoff;
+ struct rv_jit_context ctx;
+
+ ctx.ninsns = 0;
+ ctx.insns = (u16 *)insns;
+
+ if (!target) {
+ emit(rv_nop(), &ctx);
+ emit(rv_nop(), &ctx);
+ return 0;
+ }
+
+ rvoff = (s64)(target - ip);
+ return emit_jump_and_link(RV_REG_ZERO, rvoff, false, &ctx);
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
+ void *old_addr, void *new_addr)
+{
+ u32 old_insns[4], new_insns[4];
+ bool is_call = poke_type == BPF_MOD_CALL;
+ int (*gen_insns)(void *target, void *ip, u32 *insns);
+ int ninsns = is_call ? 4 : 2;
+ int ret;
+
+ if (!is_bpf_text_address((unsigned long)ip))
+ return -ENOTSUPP;
+
+ gen_insns = is_call ? gen_call_or_nops : gen_jump_or_nops;
+
+ ret = gen_insns(old_addr, ip, old_insns);
+ if (ret)
+ return ret;
+
+ if (memcmp(ip, old_insns, ninsns * 4))
+ return -EFAULT;
+
+ ret = gen_insns(new_addr, ip, new_insns);
+ if (ret)
+ return ret;
+
+ cpus_read_lock();
+ mutex_lock(&text_mutex);
+ if (memcmp(ip, new_insns, ninsns * 4))
+ ret = patch_text(ip, new_insns, ninsns);
+ mutex_unlock(&text_mutex);
+ cpus_read_unlock();
+
+ return ret;
+}
+
+static void store_args(int nregs, int args_off, struct rv_jit_context *ctx)
+{
+ int i;
+
+ for (i = 0; i < nregs; i++) {
+ emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx);
+ args_off -= 8;
+ }
+}
+
+static void restore_args(int nregs, int args_off, struct rv_jit_context *ctx)
+{
+ int i;
+
+ for (i = 0; i < nregs; i++) {
+ emit_ld(RV_REG_A0 + i, -args_off, RV_REG_FP, ctx);
+ args_off -= 8;
+ }
+}
+
+static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off,
+ int run_ctx_off, bool save_ret, struct rv_jit_context *ctx)
+{
+ int ret, branch_off;
+ struct bpf_prog *p = l->link.prog;
+ int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
+
+ if (l->cookie) {
+ emit_imm(RV_REG_T1, l->cookie, ctx);
+ emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_T1, ctx);
+ } else {
+ emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_ZERO, ctx);
+ }
+
+ /* arg1: prog */
+ emit_imm(RV_REG_A0, (const s64)p, ctx);
+ /* arg2: &run_ctx */
+ emit_addi(RV_REG_A1, RV_REG_FP, -run_ctx_off, ctx);
+ ret = emit_call((const u64)bpf_trampoline_enter(p), true, ctx);
+ if (ret)
+ return ret;
+
+ /* if (__bpf_prog_enter(prog) == 0)
+ * goto skip_exec_of_prog;
+ */
+ branch_off = ctx->ninsns;
+ /* nop reserved for conditional jump */
+ emit(rv_nop(), ctx);
+
+ /* store prog start time */
+ emit_mv(RV_REG_S1, RV_REG_A0, ctx);
+
+ /* arg1: &args_off */
+ emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx);
+ if (!p->jited)
+ /* arg2: progs[i]->insnsi for interpreter */
+ emit_imm(RV_REG_A1, (const s64)p->insnsi, ctx);
+ ret = emit_call((const u64)p->bpf_func, true, ctx);
+ if (ret)
+ return ret;
+
+ if (save_ret)
+ emit_sd(RV_REG_FP, -retval_off, regmap[BPF_REG_0], ctx);
+
+ /* update branch with beqz */
+ if (ctx->insns) {
+ int offset = ninsns_rvoff(ctx->ninsns - branch_off);
+ u32 insn = rv_beq(RV_REG_A0, RV_REG_ZERO, offset >> 1);
+ *(u32 *)(ctx->insns + branch_off) = insn;
+ }
+
+ /* arg1: prog */
+ emit_imm(RV_REG_A0, (const s64)p, ctx);
+ /* arg2: prog start time */
+ emit_mv(RV_REG_A1, RV_REG_S1, ctx);
+ /* arg3: &run_ctx */
+ emit_addi(RV_REG_A2, RV_REG_FP, -run_ctx_off, ctx);
+ ret = emit_call((const u64)bpf_trampoline_exit(p), true, ctx);
+
+ return ret;
+}
+
+static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
+ const struct btf_func_model *m,
+ struct bpf_tramp_links *tlinks,
+ void *func_addr, u32 flags,
+ struct rv_jit_context *ctx)
+{
+ int i, ret, offset;
+ int *branches_off = NULL;
+ int stack_size = 0, nregs = m->nr_args;
+ int retaddr_off, fp_off, retval_off, args_off;
+ int nregs_off, ip_off, run_ctx_off, sreg_off;
+ struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ void *orig_call = func_addr;
+ bool save_ret;
+ u32 insn;
+
+ /* Generated trampoline stack layout:
+ *
+ * FP - 8 [ RA of parent func ] return address of parent
+ * function
+ * FP - retaddr_off [ RA of traced func ] return address of traced
+ * function
+ * FP - fp_off [ FP of parent func ]
+ *
+ * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
+ * BPF_TRAMP_F_RET_FENTRY_RET
+ * [ argN ]
+ * [ ... ]
+ * FP - args_off [ arg1 ]
+ *
+ * FP - nregs_off [ regs count ]
+ *
+ * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG
+ *
+ * FP - run_ctx_off [ bpf_tramp_run_ctx ]
+ *
+ * FP - sreg_off [ callee saved reg ]
+ *
+ * [ pads ] pads for 16 bytes alignment
+ */
+
+ if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
+ return -ENOTSUPP;
+
+ /* extra regiters for struct arguments */
+ for (i = 0; i < m->nr_args; i++)
+ if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
+ nregs += round_up(m->arg_size[i], 8) / 8 - 1;
+
+ /* 8 arguments passed by registers */
+ if (nregs > 8)
+ return -ENOTSUPP;
+
+ /* room for parent function return address */
+ stack_size += 8;
+
+ stack_size += 8;
+ retaddr_off = stack_size;
+
+ stack_size += 8;
+ fp_off = stack_size;
+
+ save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
+ if (save_ret) {
+ stack_size += 8;
+ retval_off = stack_size;
+ }
+
+ stack_size += nregs * 8;
+ args_off = stack_size;
+
+ stack_size += 8;
+ nregs_off = stack_size;
+
+ if (flags & BPF_TRAMP_F_IP_ARG) {
+ stack_size += 8;
+ ip_off = stack_size;
+ }
+
+ stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
+ run_ctx_off = stack_size;
+
+ stack_size += 8;
+ sreg_off = stack_size;
+
+ stack_size = round_up(stack_size, 16);
+
+ emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx);
+
+ emit_sd(RV_REG_SP, stack_size - retaddr_off, RV_REG_RA, ctx);
+ emit_sd(RV_REG_SP, stack_size - fp_off, RV_REG_FP, ctx);
+
+ emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx);
+
+ /* callee saved register S1 to pass start time */
+ emit_sd(RV_REG_FP, -sreg_off, RV_REG_S1, ctx);
+
+ /* store ip address of the traced function */
+ if (flags & BPF_TRAMP_F_IP_ARG) {
+ emit_imm(RV_REG_T1, (const s64)func_addr, ctx);
+ emit_sd(RV_REG_FP, -ip_off, RV_REG_T1, ctx);
+ }
+
+ emit_li(RV_REG_T1, nregs, ctx);
+ emit_sd(RV_REG_FP, -nregs_off, RV_REG_T1, ctx);
+
+ store_args(nregs, args_off, ctx);
+
+ /* skip to actual body of traced function */
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ orig_call += 16;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ emit_imm(RV_REG_A0, (const s64)im, ctx);
+ ret = emit_call((const u64)__bpf_tramp_enter, true, ctx);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < fentry->nr_links; i++) {
+ ret = invoke_bpf_prog(fentry->links[i], args_off, retval_off, run_ctx_off,
+ flags & BPF_TRAMP_F_RET_FENTRY_RET, ctx);
+ if (ret)
+ return ret;
+ }
+
+ if (fmod_ret->nr_links) {
+ branches_off = kcalloc(fmod_ret->nr_links, sizeof(int), GFP_KERNEL);
+ if (!branches_off)
+ return -ENOMEM;
+
+ /* cleanup to avoid garbage return value confusion */
+ emit_sd(RV_REG_FP, -retval_off, RV_REG_ZERO, ctx);
+ for (i = 0; i < fmod_ret->nr_links; i++) {
+ ret = invoke_bpf_prog(fmod_ret->links[i], args_off, retval_off,
+ run_ctx_off, true, ctx);
+ if (ret)
+ goto out;
+ emit_ld(RV_REG_T1, -retval_off, RV_REG_FP, ctx);
+ branches_off[i] = ctx->ninsns;
+ /* nop reserved for conditional jump */
+ emit(rv_nop(), ctx);
+ }
+ }
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ restore_args(nregs, args_off, ctx);
+ ret = emit_call((const u64)orig_call, true, ctx);
+ if (ret)
+ goto out;
+ emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx);
+ im->ip_after_call = ctx->insns + ctx->ninsns;
+ /* 2 nops reserved for auipc+jalr pair */
+ emit(rv_nop(), ctx);
+ emit(rv_nop(), ctx);
+ }
+
+ /* update branches saved in invoke_bpf_mod_ret with bnez */
+ for (i = 0; ctx->insns && i < fmod_ret->nr_links; i++) {
+ offset = ninsns_rvoff(ctx->ninsns - branches_off[i]);
+ insn = rv_bne(RV_REG_T1, RV_REG_ZERO, offset >> 1);
+ *(u32 *)(ctx->insns + branches_off[i]) = insn;
+ }
+
+ for (i = 0; i < fexit->nr_links; i++) {
+ ret = invoke_bpf_prog(fexit->links[i], args_off, retval_off,
+ run_ctx_off, false, ctx);
+ if (ret)
+ goto out;
+ }
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ im->ip_epilogue = ctx->insns + ctx->ninsns;
+ emit_imm(RV_REG_A0, (const s64)im, ctx);
+ ret = emit_call((const u64)__bpf_tramp_exit, true, ctx);
+ if (ret)
+ goto out;
+ }
+
+ if (flags & BPF_TRAMP_F_RESTORE_REGS)
+ restore_args(nregs, args_off, ctx);
+
+ if (save_ret)
+ emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx);
+
+ emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);
+
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* return address of parent function */
+ emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx);
+ else
+ /* return address of traced function */
+ emit_ld(RV_REG_RA, stack_size - retaddr_off, RV_REG_SP, ctx);
+
+ emit_ld(RV_REG_FP, stack_size - fp_off, RV_REG_SP, ctx);
+ emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx);
+
+ emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx);
+
+ ret = ctx->ninsns;
+out:
+ kfree(branches_off);
+ return ret;
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
+ void *image_end, const struct btf_func_model *m,
+ u32 flags, struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ int ret;
+ struct rv_jit_context ctx;
+
+ ctx.ninsns = 0;
+ ctx.insns = NULL;
+ ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
+ if (ret < 0)
+ return ret;
+
+ if (ninsns_rvoff(ret) > (long)image_end - (long)image)
+ return -EFBIG;
+
+ ctx.ninsns = 0;
+ ctx.insns = image;
+ ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
+ if (ret < 0)
+ return ret;
+
+ bpf_flush_icache(ctx.insns, ctx.insns + ctx.ninsns);
+
+ return ninsns_rvoff(ret);
+}
+
int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
bool extra_pass)
{
@@ -913,7 +1303,7 @@ out_be:
/* JUMP off */
case BPF_JMP | BPF_JA:
rvoff = rv_offset(i, off, ctx);
- ret = emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx);
+ ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
if (ret)
return ret;
break;
@@ -1032,17 +1422,20 @@ out_be:
/* function call */
case BPF_JMP | BPF_CALL:
{
- bool fixed;
+ bool fixed_addr;
u64 addr;
mark_call(ctx);
- ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr,
- &fixed);
+ ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
+ &addr, &fixed_addr);
if (ret < 0)
return ret;
- ret = emit_call(fixed, addr, ctx);
+
+ ret = emit_call(addr, fixed_addr, ctx);
if (ret)
return ret;
+
+ emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx);
break;
}
/* tail call */
@@ -1057,7 +1450,7 @@ out_be:
break;
rvoff = epilogue_offset(ctx);
- ret = emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx);
+ ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
if (ret)
return ret;
break;
@@ -1270,7 +1663,7 @@ out_be:
void bpf_jit_build_prologue(struct rv_jit_context *ctx)
{
- int stack_adjust = 0, store_offset, bpf_stack_adjust;
+ int i, stack_adjust = 0, store_offset, bpf_stack_adjust;
bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
if (bpf_stack_adjust)
@@ -1297,6 +1690,10 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx)
store_offset = stack_adjust - 8;
+ /* reserve 4 nop insns */
+ for (i = 0; i < 4; i++)
+ emit(rv_nop(), ctx);
+
/* First instruction is always setting the tail-call-counter
* (TCC) register. This instruction is skipped for tail calls.
* Force using a 4-byte (non-compressed) instruction.
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 7fd08755a1f9..933771b0b07a 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -187,6 +187,7 @@ config S390
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
select HAVE_KRETPROBES
+ select HAVE_RETHOOK
select HAVE_KVM
select HAVE_LIVEPATCH
select HAVE_MEMBLOCK_PHYS_MAP
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index d52c3e2e16bc..47a397da0498 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -35,7 +35,7 @@ endif
CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
-obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
+obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 70418389414d..58ce701d6110 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -8,10 +8,36 @@
#ifndef __ASSEMBLY__
+struct machine_info {
+ unsigned char has_edat1 : 1;
+ unsigned char has_edat2 : 1;
+ unsigned char has_nx : 1;
+};
+
+struct vmlinux_info {
+ unsigned long default_lma;
+ unsigned long entry;
+ unsigned long image_size; /* does not include .bss */
+ unsigned long bss_size; /* uncompressed image .bss size */
+ unsigned long bootdata_off;
+ unsigned long bootdata_size;
+ unsigned long bootdata_preserved_off;
+ unsigned long bootdata_preserved_size;
+ unsigned long dynsym_start;
+ unsigned long rela_dyn_start;
+ unsigned long rela_dyn_end;
+ unsigned long amode31_size;
+ unsigned long init_mm_off;
+ unsigned long swapper_pg_dir_off;
+ unsigned long invalid_pg_dir_off;
+};
+
void startup_kernel(void);
-unsigned long detect_memory(void);
+unsigned long detect_memory(unsigned long *safe_addr);
+void mem_detect_set_usable_limit(unsigned long limit);
bool is_ipl_block_dump(void);
void store_ipl_parmblock(void);
+unsigned long read_ipl_report(unsigned long safe_addr);
void setup_boot_command_line(void);
void parse_boot_command_line(void);
void verify_facilities(void);
@@ -19,7 +45,12 @@ void print_missing_facilities(void);
void sclp_early_setup_buffer(void);
void print_pgm_check_info(void);
unsigned long get_random_base(unsigned long safe_addr);
+void setup_vmem(unsigned long asce_limit);
+unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total);
void __printf(1, 2) decompressor_printk(const char *fmt, ...);
+void error(char *m);
+
+extern struct machine_info machine;
/* Symbols defined by linker scripts */
extern const char kernel_version[];
@@ -31,8 +62,13 @@ extern char __boot_data_start[], __boot_data_end[];
extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
extern char _decompressor_syms_start[], _decompressor_syms_end[];
extern char _stack_start[], _stack_end[];
+extern char _end[];
+extern unsigned char _compressed_start[];
+extern unsigned char _compressed_end[];
+extern struct vmlinux_info _vmlinux_info;
+#define vmlinux _vmlinux_info
-unsigned long read_ipl_report(unsigned long safe_offset);
+#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
#endif /* __ASSEMBLY__ */
#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
index b519a1f045d8..d762733a0753 100644
--- a/arch/s390/boot/decompressor.c
+++ b/arch/s390/boot/decompressor.c
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <asm/page.h>
#include "decompressor.h"
+#include "boot.h"
/*
* gzip declarations
diff --git a/arch/s390/boot/decompressor.h b/arch/s390/boot/decompressor.h
index f75cc31a77dd..92b81d2ea35d 100644
--- a/arch/s390/boot/decompressor.h
+++ b/arch/s390/boot/decompressor.h
@@ -2,37 +2,11 @@
#ifndef BOOT_COMPRESSED_DECOMPRESSOR_H
#define BOOT_COMPRESSED_DECOMPRESSOR_H
-#include <linux/stddef.h>
-
#ifdef CONFIG_KERNEL_UNCOMPRESSED
static inline void *decompress_kernel(void) { return NULL; }
#else
void *decompress_kernel(void);
#endif
unsigned long mem_safe_offset(void);
-void error(char *m);
-
-struct vmlinux_info {
- unsigned long default_lma;
- void (*entry)(void);
- unsigned long image_size; /* does not include .bss */
- unsigned long bss_size; /* uncompressed image .bss size */
- unsigned long bootdata_off;
- unsigned long bootdata_size;
- unsigned long bootdata_preserved_off;
- unsigned long bootdata_preserved_size;
- unsigned long dynsym_start;
- unsigned long rela_dyn_start;
- unsigned long rela_dyn_end;
- unsigned long amode31_size;
-};
-
-/* Symbols defined by linker scripts */
-extern char _end[];
-extern unsigned char _compressed_start[];
-extern unsigned char _compressed_end[];
-extern char _vmlinux_info[];
-
-#define vmlinux (*(struct vmlinux_info *)_vmlinux_info)
#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index e8d74d4f62aa..3e3d846400b4 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -132,7 +132,7 @@ static unsigned long count_valid_kernel_positions(unsigned long kernel_size,
unsigned long start, end, pos = 0;
int i;
- for_each_mem_detect_block(i, &start, &end) {
+ for_each_mem_detect_usable_block(i, &start, &end) {
if (_min >= end)
continue;
if (start >= _max)
@@ -153,7 +153,7 @@ static unsigned long position_to_address(unsigned long pos, unsigned long kernel
unsigned long start, end;
int i;
- for_each_mem_detect_block(i, &start, &end) {
+ for_each_mem_detect_usable_block(i, &start, &end) {
if (_min >= end)
continue;
if (start >= _max)
@@ -172,26 +172,20 @@ static unsigned long position_to_address(unsigned long pos, unsigned long kernel
unsigned long get_random_base(unsigned long safe_addr)
{
+ unsigned long usable_total = get_mem_detect_usable_total();
unsigned long memory_limit = get_mem_detect_end();
unsigned long base_pos, max_pos, kernel_size;
- unsigned long kasan_needs;
int i;
- memory_limit = min(memory_limit, ident_map_size);
-
/*
* Avoid putting kernel in the end of physical memory
- * which kasan will use for shadow memory and early pgtable
- * mapping allocations.
+ * which vmem and kasan code will use for shadow memory and
+ * pgtable mapping allocations.
*/
- memory_limit -= kasan_estimate_memory_needs(memory_limit);
+ memory_limit -= kasan_estimate_memory_needs(usable_total);
+ memory_limit -= vmem_estimate_memory_needs(usable_total);
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size) {
- if (safe_addr < initrd_data.start + initrd_data.size)
- safe_addr = initrd_data.start + initrd_data.size;
- }
safe_addr = ALIGN(safe_addr, THREAD_SIZE);
-
kernel_size = vmlinux.image_size + vmlinux.bss_size;
if (safe_addr + kernel_size > memory_limit)
return 0;
diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
index 7fa1a32ea0f3..35f4ba11f7fd 100644
--- a/arch/s390/boot/mem_detect.c
+++ b/arch/s390/boot/mem_detect.c
@@ -16,29 +16,10 @@ struct mem_detect_info __bootdata(mem_detect);
#define ENTRIES_EXTENDED_MAX \
(256 * (1020 / 2) * sizeof(struct mem_detect_block))
-/*
- * To avoid corrupting old kernel memory during dump, find lowest memory
- * chunk possible either right after the kernel end (decompressed kernel) or
- * after initrd (if it is present and there is no hole between the kernel end
- * and initrd)
- */
-static void *mem_detect_alloc_extended(void)
-{
- unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64));
-
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
- initrd_data.start < offset + ENTRIES_EXTENDED_MAX)
- offset = ALIGN(initrd_data.start + initrd_data.size, sizeof(u64));
-
- return (void *)offset;
-}
-
static struct mem_detect_block *__get_mem_detect_block_ptr(u32 n)
{
if (n < MEM_INLINED_ENTRIES)
return &mem_detect.entries[n];
- if (unlikely(!mem_detect.entries_extended))
- mem_detect.entries_extended = mem_detect_alloc_extended();
return &mem_detect.entries_extended[n - MEM_INLINED_ENTRIES];
}
@@ -147,7 +128,7 @@ static int tprot(unsigned long addr)
return rc;
}
-static void search_mem_end(void)
+static unsigned long search_mem_end(void)
{
unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */
unsigned long offset = 0;
@@ -159,33 +140,52 @@ static void search_mem_end(void)
if (!tprot(pivot << 20))
offset = pivot;
}
-
- add_mem_detect_block(0, (offset + 1) << 20);
+ return (offset + 1) << 20;
}
-unsigned long detect_memory(void)
+unsigned long detect_memory(unsigned long *safe_addr)
{
- unsigned long max_physmem_end;
+ unsigned long max_physmem_end = 0;
sclp_early_get_memsize(&max_physmem_end);
+ mem_detect.entries_extended = (struct mem_detect_block *)ALIGN(*safe_addr, sizeof(u64));
if (!sclp_early_read_storage_info()) {
mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO;
- return max_physmem_end;
- }
-
- if (!diag260()) {
+ } else if (!diag260()) {
mem_detect.info_source = MEM_DETECT_DIAG260;
- return max_physmem_end;
- }
-
- if (max_physmem_end) {
+ max_physmem_end = max_physmem_end ?: get_mem_detect_end();
+ } else if (max_physmem_end) {
add_mem_detect_block(0, max_physmem_end);
mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO;
- return max_physmem_end;
+ } else {
+ max_physmem_end = search_mem_end();
+ add_mem_detect_block(0, max_physmem_end);
+ mem_detect.info_source = MEM_DETECT_BIN_SEARCH;
}
- search_mem_end();
- mem_detect.info_source = MEM_DETECT_BIN_SEARCH;
- return get_mem_detect_end();
+ if (mem_detect.count > MEM_INLINED_ENTRIES) {
+ *safe_addr += (mem_detect.count - MEM_INLINED_ENTRIES) *
+ sizeof(struct mem_detect_block);
+ }
+
+ return max_physmem_end;
+}
+
+void mem_detect_set_usable_limit(unsigned long limit)
+{
+ struct mem_detect_block *block;
+ int i;
+
+ /* make sure mem_detect.usable ends up within online memory block */
+ for (i = 0; i < mem_detect.count; i++) {
+ block = __get_mem_detect_block_ptr(i);
+ if (block->start >= limit)
+ break;
+ if (block->end >= limit) {
+ mem_detect.usable = limit;
+ break;
+ }
+ mem_detect.usable = block->end;
+ }
}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 47ca3264c023..11413f0baabc 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -3,6 +3,7 @@
#include <linux/elf.h>
#include <asm/boot_data.h>
#include <asm/sections.h>
+#include <asm/maccess.h>
#include <asm/cpu_mf.h>
#include <asm/setup.h>
#include <asm/kasan.h>
@@ -11,6 +12,7 @@
#include <asm/diag.h>
#include <asm/uv.h>
#include <asm/abs_lowcore.h>
+#include <asm/mem_detect.h>
#include "decompressor.h"
#include "boot.h"
#include "uv.h"
@@ -18,6 +20,7 @@
unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata_preserved(__abs_lowcore);
unsigned long __bootdata_preserved(__memcpy_real_area);
+pte_t *__bootdata_preserved(memcpy_real_ptep);
unsigned long __bootdata(__amode31_base);
unsigned long __bootdata_preserved(VMALLOC_START);
unsigned long __bootdata_preserved(VMALLOC_END);
@@ -33,6 +36,8 @@ u64 __bootdata_preserved(stfle_fac_list[16]);
u64 __bootdata_preserved(alt_stfle_fac_list[16]);
struct oldmem_data __bootdata_preserved(oldmem_data);
+struct machine_info machine;
+
void error(char *x)
{
sclp_early_printk("\n\n");
@@ -42,6 +47,20 @@ void error(char *x)
disabled_wait();
}
+static void detect_facilities(void)
+{
+ if (test_facility(8)) {
+ machine.has_edat1 = 1;
+ __ctl_set_bit(0, 23);
+ }
+ if (test_facility(78))
+ machine.has_edat2 = 1;
+ if (!noexec_disabled && test_facility(130)) {
+ machine.has_nx = 1;
+ __ctl_set_bit(0, 20);
+ }
+}
+
static void setup_lpp(void)
{
S390_lowcore.current_pid = 0;
@@ -57,16 +76,17 @@ unsigned long mem_safe_offset(void)
}
#endif
-static void rescue_initrd(unsigned long addr)
+static unsigned long rescue_initrd(unsigned long safe_addr)
{
if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
- return;
+ return safe_addr;
if (!initrd_data.start || !initrd_data.size)
- return;
- if (addr <= initrd_data.start)
- return;
- memmove((void *)addr, (void *)initrd_data.start, initrd_data.size);
- initrd_data.start = addr;
+ return safe_addr;
+ if (initrd_data.start < safe_addr) {
+ memmove((void *)safe_addr, (void *)initrd_data.start, initrd_data.size);
+ initrd_data.start = safe_addr;
+ }
+ return initrd_data.start + initrd_data.size;
}
static void copy_bootdata(void)
@@ -150,9 +170,10 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
#endif
}
-static void setup_kernel_memory_layout(void)
+static unsigned long setup_kernel_memory_layout(void)
{
unsigned long vmemmap_start;
+ unsigned long asce_limit;
unsigned long rte_size;
unsigned long pages;
unsigned long vmax;
@@ -167,10 +188,10 @@ static void setup_kernel_memory_layout(void)
vmalloc_size > _REGION2_SIZE ||
vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
_REGION2_SIZE) {
- vmax = _REGION1_SIZE;
+ asce_limit = _REGION1_SIZE;
rte_size = _REGION2_SIZE;
} else {
- vmax = _REGION2_SIZE;
+ asce_limit = _REGION2_SIZE;
rte_size = _REGION3_SIZE;
}
/*
@@ -178,7 +199,7 @@ static void setup_kernel_memory_layout(void)
* secure storage limit, so that any vmalloc allocation
* we do could be used to back secure guest storage.
*/
- vmax = adjust_to_uv_max(vmax);
+ vmax = adjust_to_uv_max(asce_limit);
#ifdef CONFIG_KASAN
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
@@ -207,6 +228,8 @@ static void setup_kernel_memory_layout(void)
/* make sure vmemmap doesn't overlay with vmalloc area */
VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
vmemmap = (struct page *)vmemmap_start;
+
+ return asce_limit;
}
/*
@@ -240,19 +263,25 @@ static void offset_vmlinux_info(unsigned long offset)
vmlinux.rela_dyn_start += offset;
vmlinux.rela_dyn_end += offset;
vmlinux.dynsym_start += offset;
+ vmlinux.init_mm_off += offset;
+ vmlinux.swapper_pg_dir_off += offset;
+ vmlinux.invalid_pg_dir_off += offset;
}
static unsigned long reserve_amode31(unsigned long safe_addr)
{
__amode31_base = PAGE_ALIGN(safe_addr);
- return safe_addr + vmlinux.amode31_size;
+ return __amode31_base + vmlinux.amode31_size;
}
void startup_kernel(void)
{
+ unsigned long max_physmem_end;
unsigned long random_lma;
unsigned long safe_addr;
+ unsigned long asce_limit;
void *img;
+ psw_t psw;
initrd_data.start = parmarea.initrd_start;
initrd_data.size = parmarea.initrd_size;
@@ -265,14 +294,17 @@ void startup_kernel(void)
safe_addr = reserve_amode31(safe_addr);
safe_addr = read_ipl_report(safe_addr);
uv_query_info();
- rescue_initrd(safe_addr);
+ safe_addr = rescue_initrd(safe_addr);
sclp_early_read_info();
setup_boot_command_line();
parse_boot_command_line();
+ detect_facilities();
sanitize_prot_virt_host();
- setup_ident_map_size(detect_memory());
+ max_physmem_end = detect_memory(&safe_addr);
+ setup_ident_map_size(max_physmem_end);
setup_vmalloc_size();
- setup_kernel_memory_layout();
+ asce_limit = setup_kernel_memory_layout();
+ mem_detect_set_usable_limit(ident_map_size);
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
random_lma = get_random_base(safe_addr);
@@ -289,9 +321,23 @@ void startup_kernel(void)
} else if (__kaslr_offset)
memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
+ /*
+ * The order of the following operations is important:
+ *
+ * - handle_relocs() must follow clear_bss_section() to establish static
+ * memory references to data in .bss to be used by setup_vmem()
+ * (i.e init_mm.pgd)
+ *
+ * - setup_vmem() must follow handle_relocs() to be able using
+ * static memory references to data in .bss (i.e init_mm.pgd)
+ *
+ * - copy_bootdata() must follow setup_vmem() to propagate changes to
+ * bootdata made by setup_vmem()
+ */
clear_bss_section();
- copy_bootdata();
handle_relocs(__kaslr_offset);
+ setup_vmem(asce_limit);
+ copy_bootdata();
if (__kaslr_offset) {
/*
@@ -303,5 +349,11 @@ void startup_kernel(void)
if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
memset(img, 0, vmlinux.image_size);
}
- vmlinux.entry();
+
+ /*
+ * Jump to the decompressed kernel entry point and switch DAT mode on.
+ */
+ psw.addr = vmlinux.entry;
+ psw.mask = PSW_KERNEL_BITS;
+ __load_psw(psw);
}
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
new file mode 100644
index 000000000000..4d1d0d8e99cb
--- /dev/null
+++ b/arch/s390/boot/vmem.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/sched/task.h>
+#include <linux/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/mem_detect.h>
+#include <asm/maccess.h>
+#include <asm/abs_lowcore.h>
+#include "decompressor.h"
+#include "boot.h"
+
+#define init_mm (*(struct mm_struct *)vmlinux.init_mm_off)
+#define swapper_pg_dir vmlinux.swapper_pg_dir_off
+#define invalid_pg_dir vmlinux.invalid_pg_dir_off
+
+/*
+ * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though.
+ */
+static inline pte_t *__virt_to_kpte(unsigned long va)
+{
+ return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
+}
+
+unsigned long __bootdata_preserved(s390_invalid_asce);
+unsigned long __bootdata(pgalloc_pos);
+unsigned long __bootdata(pgalloc_end);
+unsigned long __bootdata(pgalloc_low);
+
+enum populate_mode {
+ POPULATE_NONE,
+ POPULATE_ONE2ONE,
+ POPULATE_ABS_LOWCORE,
+};
+
+static void boot_check_oom(void)
+{
+ if (pgalloc_pos < pgalloc_low)
+ error("out of memory on boot\n");
+}
+
+static void pgtable_populate_init(void)
+{
+ unsigned long initrd_end;
+ unsigned long kernel_end;
+
+ kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
+ pgalloc_low = round_up(kernel_end, PAGE_SIZE);
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
+ initrd_end = round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
+ pgalloc_low = max(pgalloc_low, initrd_end);
+ }
+
+ pgalloc_end = round_down(get_mem_detect_end(), PAGE_SIZE);
+ pgalloc_pos = pgalloc_end;
+
+ boot_check_oom();
+}
+
+static void *boot_alloc_pages(unsigned int order)
+{
+ unsigned long size = PAGE_SIZE << order;
+
+ pgalloc_pos -= size;
+ pgalloc_pos = round_down(pgalloc_pos, size);
+
+ boot_check_oom();
+
+ return (void *)pgalloc_pos;
+}
+
+static void *boot_crst_alloc(unsigned long val)
+{
+ unsigned long *table;
+
+ table = boot_alloc_pages(CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init(table, val);
+ return table;
+}
+
+static pte_t *boot_pte_alloc(void)
+{
+ static void *pte_leftover;
+ pte_t *pte;
+
+ BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE);
+
+ if (!pte_leftover) {
+ pte_leftover = boot_alloc_pages(0);
+ pte = pte_leftover + _PAGE_TABLE_SIZE;
+ } else {
+ pte = pte_leftover;
+ pte_leftover = NULL;
+ }
+ memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+ return pte;
+}
+
+static unsigned long _pa(unsigned long addr, enum populate_mode mode)
+{
+ switch (mode) {
+ case POPULATE_NONE:
+ return -1;
+ case POPULATE_ONE2ONE:
+ return addr;
+ case POPULATE_ABS_LOWCORE:
+ return __abs_lowcore_pa(addr);
+ default:
+ return -1;
+ }
+}
+
+static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end)
+{
+ return machine.has_edat2 &&
+ IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE;
+}
+
+static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end)
+{
+ return machine.has_edat1 &&
+ IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE;
+}
+
+static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long next;
+ pte_t *pte, entry;
+
+ pte = pte_offset_kernel(pmd, addr);
+ for (; addr < end; addr += PAGE_SIZE, pte++) {
+ if (pte_none(*pte)) {
+ entry = __pte(_pa(addr, mode));
+ entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
+ set_pte(pte, entry);
+ }
+ }
+}
+
+static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long next;
+ pmd_t *pmd, entry;
+ pte_t *pte;
+
+ pmd = pmd_offset(pud, addr);
+ for (; addr < end; addr = next, pmd++) {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(*pmd)) {
+ if (can_large_pmd(pmd, addr, next)) {
+ entry = __pmd(_pa(addr, mode));
+ entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
+ set_pmd(pmd, entry);
+ continue;
+ }
+ pte = boot_pte_alloc();
+ pmd_populate(&init_mm, pmd, pte);
+ } else if (pmd_large(*pmd)) {
+ continue;
+ }
+ pgtable_pte_populate(pmd, addr, next, mode);
+ }
+}
+
+static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long next;
+ pud_t *pud, entry;
+ pmd_t *pmd;
+
+ pud = pud_offset(p4d, addr);
+ for (; addr < end; addr = next, pud++) {
+ next = pud_addr_end(addr, end);
+ if (pud_none(*pud)) {
+ if (can_large_pud(pud, addr, next)) {
+ entry = __pud(_pa(addr, mode));
+ entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
+ set_pud(pud, entry);
+ continue;
+ }
+ pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ pud_populate(&init_mm, pud, pmd);
+ } else if (pud_large(*pud)) {
+ continue;
+ }
+ pgtable_pmd_populate(pud, addr, next, mode);
+ }
+}
+
+static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long next;
+ p4d_t *p4d;
+ pud_t *pud;
+
+ p4d = p4d_offset(pgd, addr);
+ for (; addr < end; addr = next, p4d++) {
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(*p4d)) {
+ pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY);
+ p4d_populate(&init_mm, p4d, pud);
+ }
+ pgtable_pud_populate(p4d, addr, next, mode);
+ }
+}
+
+static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode)
+{
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+
+ pgd = pgd_offset(&init_mm, addr);
+ for (; addr < end; addr = next, pgd++) {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(*pgd)) {
+ p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY);
+ pgd_populate(&init_mm, pgd, p4d);
+ }
+ pgtable_p4d_populate(pgd, addr, next, mode);
+ }
+}
+
+void setup_vmem(unsigned long asce_limit)
+{
+ unsigned long start, end;
+ unsigned long asce_type;
+ unsigned long asce_bits;
+ int i;
+
+ if (asce_limit == _REGION1_SIZE) {
+ asce_type = _REGION2_ENTRY_EMPTY;
+ asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+ } else {
+ asce_type = _REGION3_ENTRY_EMPTY;
+ asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+ }
+ s390_invalid_asce = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+
+ crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
+ crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
+
+ /*
+ * To allow prefixing the lowcore must be mapped with 4KB pages.
+ * To prevent creation of a large page at address 0 first map
+ * the lowcore and create the identity mapping only afterwards.
+ */
+ pgtable_populate_init();
+ pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE);
+ for_each_mem_detect_usable_block(i, &start, &end)
+ pgtable_populate(start, end, POPULATE_ONE2ONE);
+ pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
+ POPULATE_ABS_LOWCORE);
+ pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
+ POPULATE_NONE);
+ memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area);
+
+ S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits;
+ S390_lowcore.user_asce = s390_invalid_asce;
+
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+ __ctl_load(S390_lowcore.kernel_asce, 13, 13);
+
+ init_mm.context.asce = S390_lowcore.kernel_asce;
+}
+
+unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total)
+{
+ unsigned long pages = DIV_ROUND_UP(online_mem_total, PAGE_SIZE);
+
+ return DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2;
+}
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 526c3f40f6a2..c773820e4af9 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -398,10 +398,6 @@ static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
if (err)
return err;
- /* In fips mode only 128 bit or 256 bit keys are valid */
- if (fips_enabled && key_len != 32 && key_len != 64)
- return -EINVAL;
-
/* Pick the correct function code based on the key length */
fc = (key_len == 32) ? CPACF_KM_XTS_128 :
(key_len == 64) ? CPACF_KM_XTS_256 : 0;
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
index 1f2d40993c4d..a8a2407381af 100644
--- a/arch/s390/crypto/arch_random.c
+++ b/arch/s390/crypto/arch_random.c
@@ -10,6 +10,7 @@
#include <linux/atomic.h>
#include <linux/random.h>
#include <linux/static_key.h>
+#include <asm/archrandom.h>
#include <asm/cpacf.h>
DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index a279b7d23a5e..29dc827e0fe8 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -474,7 +474,7 @@ static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
return rc;
/*
- * xts_check_key verifies the key length is not odd and makes
+ * xts_verify_key verifies the key length is not odd and makes
* sure that the two keys are not the same. This can be done
* on the two protected keys as well
*/
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
index 4c61b14ee928..6f264b79e377 100644
--- a/arch/s390/include/asm/abs_lowcore.h
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -7,11 +7,21 @@
#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))
extern unsigned long __abs_lowcore;
-extern bool abs_lowcore_mapped;
-struct lowcore *get_abs_lowcore(unsigned long *flags);
-void put_abs_lowcore(struct lowcore *lc, unsigned long flags);
int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc);
void abs_lowcore_unmap(int cpu);
+static inline struct lowcore *get_abs_lowcore(void)
+{
+ int cpu;
+
+ cpu = get_cpu();
+ return ((struct lowcore *)__abs_lowcore) + cpu;
+}
+
+static inline void put_abs_lowcore(struct lowcore *lc)
+{
+ put_cpu();
+}
+
#endif /* _ASM_S390_ABS_LOWCORE_H */
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index f508f5025e38..57a2d6518d27 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -239,7 +239,10 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
union {
unsigned long value;
struct ap_qirq_ctrl qirqctrl;
- struct ap_queue_status status;
+ struct {
+ u32 _pad;
+ struct ap_queue_status status;
+ };
} reg1;
unsigned long reg2 = pa_ind;
@@ -253,7 +256,7 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
: [reg1] "+&d" (reg1)
: [reg0] "d" (reg0), [reg2] "d" (reg2)
- : "cc", "0", "1", "2");
+ : "cc", "memory", "0", "1", "2");
return reg1.status;
}
@@ -290,7 +293,10 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
unsigned long reg0 = qid | (5UL << 24) | ((ifbit & 0x01) << 22);
union {
unsigned long value;
- struct ap_queue_status status;
+ struct {
+ u32 _pad;
+ struct ap_queue_status status;
+ };
} reg1;
unsigned long reg2;
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
index b74f1070ddb2..55a02a153dfc 100644
--- a/arch/s390/include/asm/asm-extable.h
+++ b/arch/s390/include/asm/asm-extable.h
@@ -12,6 +12,7 @@
#define EX_TYPE_UA_STORE 3
#define EX_TYPE_UA_LOAD_MEM 4
#define EX_TYPE_UA_LOAD_REG 5
+#define EX_TYPE_UA_LOAD_REGPAIR 6
#define EX_DATA_REG_ERR_SHIFT 0
#define EX_DATA_REG_ERR GENMASK(3, 0)
@@ -85,4 +86,7 @@
#define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \
__EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
+#define EX_TABLE_UA_LOAD_REGPAIR(_fault, _target, _regerr, _regzero) \
+ __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REGPAIR, _regerr, _regzero, 0)
+
#endif /* __ASM_EXTABLE_H */
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index bd1596810cc1..91d261751d25 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -15,6 +15,7 @@
#include <asm/fcx.h>
#include <asm/irq.h>
#include <asm/schid.h>
+#include <linux/mutex.h>
/* structs from asm/cio.h */
struct irb;
@@ -87,6 +88,7 @@ struct ccw_device {
spinlock_t *ccwlock;
/* private: */
struct ccw_device_private *private; /* cio private information */
+ struct mutex reg_mutex;
/* public: */
struct ccw_device_id id;
struct ccw_driver *drv;
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 84c3f0d576c5..3f26416c2ad8 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -88,67 +88,90 @@ static __always_inline unsigned long __cmpxchg(unsigned long address,
unsigned long old,
unsigned long new, int size)
{
- unsigned long prev, tmp;
- int shift;
-
switch (size) {
- case 1:
+ case 1: {
+ unsigned int prev, shift, mask;
+
shift = (3 ^ (address & 3)) << 3;
address ^= address & 3;
+ old = (old & 0xff) << shift;
+ new = (new & 0xff) << shift;
+ mask = ~(0xff << shift);
asm volatile(
- " l %0,%2\n"
- "0: nr %0,%5\n"
- " lr %1,%0\n"
- " or %0,%3\n"
- " or %1,%4\n"
- " cs %0,%1,%2\n"
- " jnl 1f\n"
- " xr %1,%0\n"
- " nr %1,%5\n"
- " jnz 0b\n"
+ " l %[prev],%[address]\n"
+ " nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "0: lr %[tmp],%[prev]\n"
+ " cs %[prev],%[new],%[address]\n"
+ " jnl 1f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jz 0b\n"
"1:"
- : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address)
- : "d" ((old & 0xff) << shift),
- "d" ((new & 0xff) << shift),
- "d" (~(0xff << shift))
- : "memory", "cc");
+ : [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (old),
+ [new] "+&d" (new),
+ [mask] "+&d" (mask)
+ :: "memory", "cc");
return prev >> shift;
- case 2:
+ }
+ case 2: {
+ unsigned int prev, shift, mask;
+
shift = (2 ^ (address & 2)) << 3;
address ^= address & 2;
+ old = (old & 0xffff) << shift;
+ new = (new & 0xffff) << shift;
+ mask = ~(0xffff << shift);
asm volatile(
- " l %0,%2\n"
- "0: nr %0,%5\n"
- " lr %1,%0\n"
- " or %0,%3\n"
- " or %1,%4\n"
- " cs %0,%1,%2\n"
- " jnl 1f\n"
- " xr %1,%0\n"
- " nr %1,%5\n"
- " jnz 0b\n"
+ " l %[prev],%[address]\n"
+ " nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "0: lr %[tmp],%[prev]\n"
+ " cs %[prev],%[new],%[address]\n"
+ " jnl 1f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jz 0b\n"
"1:"
- : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address)
- : "d" ((old & 0xffff) << shift),
- "d" ((new & 0xffff) << shift),
- "d" (~(0xffff << shift))
- : "memory", "cc");
+ : [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (old),
+ [new] "+&d" (new),
+ [mask] "+&d" (mask)
+ :: "memory", "cc");
return prev >> shift;
- case 4:
+ }
+ case 4: {
+ unsigned int prev = old;
+
asm volatile(
- " cs %0,%3,%1\n"
- : "=&d" (prev), "+Q" (*(int *) address)
- : "0" (old), "d" (new)
+ " cs %[prev],%[new],%[address]\n"
+ : [prev] "+&d" (prev),
+ [address] "+Q" (*(int *)address)
+ : [new] "d" (new)
: "memory", "cc");
return prev;
- case 8:
+ }
+ case 8: {
+ unsigned long prev = old;
+
asm volatile(
- " csg %0,%3,%1\n"
- : "=&d" (prev), "+QS" (*(long *) address)
- : "0" (old), "d" (new)
+ " csg %[prev],%[new],%[address]\n"
+ : [prev] "+&d" (prev),
+ [address] "+QS" (*(long *)address)
+ : [new] "d" (new)
: "memory", "cc");
return prev;
}
+ }
__cmpxchg_called_with_bad_pointer();
return old;
}
diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h
deleted file mode 100644
index f87a4788c19c..000000000000
--- a/arch/s390/include/asm/cpu_mcf.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Counter facility support definitions for the Linux perf
- *
- * Copyright IBM Corp. 2019
- * Author(s): Hendrik Brueckner <[email protected]>
- */
-#ifndef _ASM_S390_CPU_MCF_H
-#define _ASM_S390_CPU_MCF_H
-
-#include <linux/perf_event.h>
-#include <asm/cpu_mf.h>
-
-enum cpumf_ctr_set {
- CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
- CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
- CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */
- CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */
- CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */
-
- /* Maximum number of counter sets */
- CPUMF_CTR_SET_MAX,
-};
-
-#define CPUMF_LCCTL_ENABLE_SHIFT 16
-#define CPUMF_LCCTL_ACTCTL_SHIFT 0
-
-static inline void ctr_set_enable(u64 *state, u64 ctrsets)
-{
- *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
-}
-
-static inline void ctr_set_disable(u64 *state, u64 ctrsets)
-{
- *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
-}
-
-static inline void ctr_set_start(u64 *state, u64 ctrsets)
-{
- *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
-}
-
-static inline void ctr_set_stop(u64 *state, u64 ctrsets)
-{
- *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
-}
-
-static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
-{
- switch (set) {
- case CPUMF_CTR_SET_BASIC:
- return stcctm(BASIC, range, dest);
- case CPUMF_CTR_SET_USER:
- return stcctm(PROBLEM_STATE, range, dest);
- case CPUMF_CTR_SET_CRYPTO:
- return stcctm(CRYPTO_ACTIVITY, range, dest);
- case CPUMF_CTR_SET_EXT:
- return stcctm(EXTENDED, range, dest);
- case CPUMF_CTR_SET_MT_DIAG:
- return stcctm(MT_DIAG_CLEARING, range, dest);
- case CPUMF_CTR_SET_MAX:
- return 3;
- }
- return 3;
-}
-
-struct cpu_cf_events {
- struct cpumf_ctr_info info;
- atomic_t ctr_set[CPUMF_CTR_SET_MAX];
- atomic64_t alert;
- u64 state; /* For perf_event_open SVC */
- u64 dev_state; /* For /dev/hwctr */
- unsigned int flags;
- size_t used; /* Bytes used in data */
- size_t usedss; /* Bytes used in start/stop */
- unsigned char start[PAGE_SIZE]; /* Counter set at event add */
- unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */
- unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */
- unsigned int sets; /* # Counter set saved in memory */
-};
-DECLARE_PER_CPU(struct cpu_cf_events, cpu_cf_events);
-
-bool kernel_cpumcf_avail(void);
-int __kernel_cpumcf_begin(void);
-unsigned long kernel_cpumcf_alert(int clear);
-void __kernel_cpumcf_end(void);
-
-static inline int kernel_cpumcf_begin(void)
-{
- if (!cpum_cf_avail())
- return -ENODEV;
-
- preempt_disable();
- return __kernel_cpumcf_begin();
-}
-static inline void kernel_cpumcf_end(void)
-{
- __kernel_cpumcf_end();
- preempt_enable();
-}
-
-/* Return true if store counter set multiple instruction is available */
-static inline int stccm_avail(void)
-{
- return test_facility(142);
-}
-
-size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
- struct cpumf_ctr_info *info);
-int cfset_online_cpu(unsigned int cpu);
-int cfset_offline_cpu(unsigned int cpu);
-#endif /* _ASM_S390_CPU_MCF_H */
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index efa103b52a1a..7e417d7de568 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -42,7 +42,6 @@ static inline int cpum_sf_avail(void)
return test_facility(40) && test_facility(68);
}
-
struct cpumf_ctr_info {
u16 cfvn;
u16 auth_ctl;
@@ -275,56 +274,4 @@ static inline int lsctl(struct hws_lsctl_request_block *req)
return cc ? -EINVAL : 0;
}
-
-/* Sampling control helper functions */
-
-#include <linux/time.h>
-
-static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
- unsigned long freq)
-{
- return (USEC_PER_SEC / freq) * qsi->cpu_speed;
-}
-
-static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
- unsigned long rate)
-{
- return USEC_PER_SEC * qsi->cpu_speed / rate;
-}
-
-/* Return TOD timestamp contained in an trailer entry */
-static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
-{
- /* TOD in STCKE format */
- if (te->header.t)
- return *((unsigned long long *) &te->timestamp[1]);
-
- /* TOD in STCK format */
- return *((unsigned long long *) &te->timestamp[0]);
-}
-
-/* Return pointer to trailer entry of an sample data block */
-static inline unsigned long *trailer_entry_ptr(unsigned long v)
-{
- void *ret;
-
- ret = (void *) v;
- ret += PAGE_SIZE;
- ret -= sizeof(struct hws_trailer_entry);
-
- return (unsigned long *) ret;
-}
-
-/* Return true if the entry in the sample data block table (sdbt)
- * is a link to the next sdbt */
-static inline int is_link_entry(unsigned long *s)
-{
- return *s & 0x1ul ? 1 : 0;
-}
-
-/* Return pointer to the linked sdbt */
-static inline unsigned long *get_next_sdbt(unsigned long *s)
-{
- return (unsigned long *) (*s & ~0x1ul);
-}
#endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 1d389847b588..30bb3ec4e5fc 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -11,30 +11,11 @@
#include <linux/types.h>
#include <asm/timex.h>
-#define CPUTIME_PER_USEC 4096ULL
-#define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
-
-/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
-
-#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
-
-/*
- * Convert cputime to microseconds.
- */
-static inline u64 cputime_to_usecs(const u64 cputime)
-{
- return cputime >> 12;
-}
-
/*
* Convert cputime to nanoseconds.
*/
#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
-u64 arch_cpu_idle_time(int cpu);
-
-#define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
-
void account_idle_time_irq(void);
#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 56e99c286d12..674a939f16ee 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -12,6 +12,7 @@
#include <linux/if_ether.h>
#include <linux/percpu.h>
#include <asm/asm-extable.h>
+#include <asm/cio.h>
enum diag_stat_enum {
DIAG_STAT_X008,
@@ -20,6 +21,7 @@ enum diag_stat_enum {
DIAG_STAT_X014,
DIAG_STAT_X044,
DIAG_STAT_X064,
+ DIAG_STAT_X08C,
DIAG_STAT_X09C,
DIAG_STAT_X0DC,
DIAG_STAT_X204,
@@ -79,10 +81,20 @@ struct diag210 {
u8 vrdccrty; /* real device type (output) */
u8 vrdccrmd; /* real device model (output) */
u8 vrdccrft; /* real device feature (output) */
-} __attribute__((packed, aligned(4)));
+} __packed __aligned(4);
extern int diag210(struct diag210 *addr);
+struct diag8c {
+ u8 flags;
+ u8 num_partitions;
+ u16 width;
+ u16 height;
+ u8 data[0];
+} __packed __aligned(4);
+
+extern int diag8c(struct diag8c *out, struct ccw_dev_id *devno);
+
/* bit is set in flags, when physical cpu info is included in diag 204 data */
#define DIAG204_LPAR_PHYS_FLG 0x80
#define DIAG204_LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */
@@ -318,6 +330,7 @@ struct diag_ops {
int (*diag210)(struct diag210 *addr);
int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode);
int (*diag14)(unsigned long rx, unsigned long ry1, unsigned long subcode);
+ int (*diag8c)(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
void (*diag0c)(struct hypfs_diag0c_entry *entry);
void (*diag308_reset)(void);
};
@@ -330,5 +343,6 @@ int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode);
int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode);
void _diag0c_amode31(struct hypfs_diag0c_entry *entry);
void _diag308_reset_amode31(void);
+int _diag8c_amode31(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
#endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
index 4a71dbbf76fb..bbdadb1c9efc 100644
--- a/arch/s390/include/asm/fpu/internal.h
+++ b/arch/s390/include/asm/fpu/internal.h
@@ -27,7 +27,7 @@ static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
int i;
for (i = 0; i < __NUM_FPRS; i++)
- fprs[i] = *(freg_t *)(vxrs + i);
+ fprs[i].ui = vxrs[i].high;
}
static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
@@ -35,7 +35,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
int i;
for (i = 0; i < __NUM_FPRS; i++)
- *(freg_t *)(vxrs + i) = fprs[i];
+ vxrs[i].high = fprs[i].ui;
}
static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
index 40eae2c08d61..59fcc3c72edf 100644
--- a/arch/s390/include/asm/idals.h
+++ b/arch/s390/include/asm/idals.h
@@ -23,6 +23,9 @@
#define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */
#define IDA_BLOCK_SIZE (1L<<IDA_SIZE_LOG)
+#define IDA_2K_SIZE_LOG 11
+#define IDA_2K_BLOCK_SIZE (1L << IDA_2K_SIZE_LOG)
+
/*
* Test if an address/length pair needs an idal list.
*/
@@ -43,6 +46,15 @@ static inline unsigned int idal_nr_words(void *vaddr, unsigned int length)
}
/*
+ * Return the number of 2K IDA words needed for an address/length pair.
+ */
+static inline unsigned int idal_2k_nr_words(void *vaddr, unsigned int length)
+{
+ return ((__pa(vaddr) & (IDA_2K_BLOCK_SIZE - 1)) + length +
+ (IDA_2K_BLOCK_SIZE - 1)) >> IDA_2K_SIZE_LOG;
+}
+
+/*
* Create the list of idal words for an address/length pair.
*/
static inline unsigned long *idal_create_words(unsigned long *idaws,
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index 5cea629c548e..09f763b9eb40 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -10,16 +10,12 @@
#include <linux/types.h>
#include <linux/device.h>
-#include <linux/seqlock.h>
struct s390_idle_data {
- seqcount_t seqcount;
unsigned long idle_count;
unsigned long idle_time;
unsigned long clock_idle_enter;
- unsigned long clock_idle_exit;
unsigned long timer_idle_enter;
- unsigned long timer_idle_exit;
unsigned long mt_cycles_enter[8];
};
@@ -27,6 +23,5 @@ extern struct device_attribute dev_attr_idle_count;
extern struct device_attribute dev_attr_idle_time_us;
void psw_idle(struct s390_idle_data *data, unsigned long psw_mask);
-void psw_idle_exit(void);
#endif /* _S390_IDLE_H */
diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h
index 2768d5db181f..e5cfc81d5b61 100644
--- a/arch/s390/include/asm/kasan.h
+++ b/arch/s390/include/asm/kasan.h
@@ -14,17 +14,15 @@
#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
extern void kasan_early_init(void);
-extern void kasan_copy_shadow_mapping(void);
-extern void kasan_free_early_identity(void);
/*
* Estimate kasan memory requirements, which it will reserve
* at the very end of available physical memory. To estimate
* that, we take into account that kasan would require
* 1/8 of available physical memory (for shadow memory) +
- * creating page tables for the whole memory + shadow memory
- * region (1 + 1/8). To keep page tables estimates simple take
- * the double of combined ptes size.
+ * creating page tables for the shadow memory region.
+ * To keep page tables estimates simple take the double of
+ * combined ptes size.
*
* physmem parameter has to be already adjusted if not entire physical memory
* would be used (e.g. due to effect of "mem=" option).
@@ -36,15 +34,13 @@ static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem)
/* for shadow memory */
kasan_needs = round_up(physmem / 8, PAGE_SIZE);
/* for paging structures */
- pages = DIV_ROUND_UP(physmem + kasan_needs, PAGE_SIZE);
+ pages = DIV_ROUND_UP(kasan_needs, PAGE_SIZE);
kasan_needs += DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2;
return kasan_needs;
}
#else
static inline void kasan_early_init(void) { }
-static inline void kasan_copy_shadow_mapping(void) { }
-static inline void kasan_free_early_identity(void) { }
static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) { return 0; }
#endif
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 598095f4b924..83f732ca3af4 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -70,8 +70,6 @@ struct kprobe_ctlblk {
};
void arch_remove_kprobe(struct kprobe *p);
-void __kretprobe_trampoline(void);
-void trampoline_probe_handler(struct pt_regs *regs);
int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
int kprobe_exceptions_notify(struct notifier_block *self,
diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h
index c7fa838cf6b9..cfec3141fdba 100644
--- a/arch/s390/include/asm/maccess.h
+++ b/arch/s390/include/asm/maccess.h
@@ -7,7 +7,7 @@
struct iov_iter;
extern unsigned long __memcpy_real_area;
-void memcpy_real_init(void);
+extern pte_t *memcpy_real_ptep;
size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count);
int memcpy_real(void *dest, unsigned long src, size_t count);
#ifdef CONFIG_CRASH_DUMP
diff --git a/arch/s390/include/asm/mem_detect.h b/arch/s390/include/asm/mem_detect.h
index a7c922a69050..f9e7354036d2 100644
--- a/arch/s390/include/asm/mem_detect.h
+++ b/arch/s390/include/asm/mem_detect.h
@@ -30,6 +30,7 @@ struct mem_detect_block {
struct mem_detect_info {
u32 count;
u8 info_source;
+ unsigned long usable;
struct mem_detect_block entries[MEM_INLINED_ENTRIES];
struct mem_detect_block *entries_extended;
};
@@ -38,7 +39,7 @@ extern struct mem_detect_info mem_detect;
void add_mem_detect_block(u64 start, u64 end);
static inline int __get_mem_detect_block(u32 n, unsigned long *start,
- unsigned long *end)
+ unsigned long *end, bool respect_usable_limit)
{
if (n >= mem_detect.count) {
*start = 0;
@@ -53,21 +54,41 @@ static inline int __get_mem_detect_block(u32 n, unsigned long *start,
*start = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].start;
*end = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].end;
}
+
+ if (respect_usable_limit && mem_detect.usable) {
+ if (*start >= mem_detect.usable)
+ return -1;
+ if (*end > mem_detect.usable)
+ *end = mem_detect.usable;
+ }
return 0;
}
/**
- * for_each_mem_detect_block - early online memory range iterator
+ * for_each_mem_detect_usable_block - early online memory range iterator
* @i: an integer used as loop variable
* @p_start: ptr to unsigned long for start address of the range
* @p_end: ptr to unsigned long for end address of the range
*
- * Walks over detected online memory ranges.
+ * Walks over detected online memory ranges below usable limit.
*/
-#define for_each_mem_detect_block(i, p_start, p_end) \
- for (i = 0, __get_mem_detect_block(i, p_start, p_end); \
- i < mem_detect.count; \
- i++, __get_mem_detect_block(i, p_start, p_end))
+#define for_each_mem_detect_usable_block(i, p_start, p_end) \
+ for (i = 0; !__get_mem_detect_block(i, p_start, p_end, true); i++)
+
+/* Walks over all detected online memory ranges disregarding usable limit. */
+#define for_each_mem_detect_block(i, p_start, p_end) \
+ for (i = 0; !__get_mem_detect_block(i, p_start, p_end, false); i++)
+
+static inline unsigned long get_mem_detect_usable_total(void)
+{
+ unsigned long start, end, total = 0;
+ int i;
+
+ for_each_mem_detect_usable_block(i, &start, &end)
+ total += end - start;
+
+ return total;
+}
static inline void get_mem_detect_reserved(unsigned long *start,
unsigned long *size)
@@ -84,8 +105,10 @@ static inline unsigned long get_mem_detect_end(void)
unsigned long start;
unsigned long end;
+ if (mem_detect.usable)
+ return mem_detect.usable;
if (mem_detect.count) {
- __get_mem_detect_block(mem_detect.count - 1, &start, &end);
+ __get_mem_detect_block(mem_detect.count - 1, &start, &end, false);
return end;
}
return 0;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b26cbf1c533c..b87ca864d27d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -23,6 +23,7 @@
#include <asm/uv.h>
extern pgd_t swapper_pg_dir[];
+extern pgd_t invalid_pg_dir[];
extern void paging_init(void);
extern unsigned long s390_invalid_asce;
@@ -181,6 +182,8 @@ static inline int is_module_addr(void *addr)
#define _PAGE_SOFT_DIRTY 0x000
#endif
+#define _PAGE_SW_BITS 0xffUL /* All SW bits */
+
#define _PAGE_SWP_EXCLUSIVE _PAGE_LARGE /* SW pte exclusive swap bit */
/* Set of bits not changed in pte_modify */
@@ -188,6 +191,12 @@ static inline int is_module_addr(void *addr)
_PAGE_YOUNG | _PAGE_SOFT_DIRTY)
/*
+ * Mask of bits that must not be changed with RDP. Allow only _PAGE_PROTECT
+ * HW bit and all SW bits.
+ */
+#define _PAGE_RDP_MASK ~(_PAGE_PROTECT | _PAGE_SW_BITS)
+
+/*
* handle_pte_fault uses pte_present and pte_none to find out the pte type
* WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
* distinguish present from not-present ptes. It is changed only with the page
@@ -477,6 +486,12 @@ static inline int is_module_addr(void *addr)
_REGION3_ENTRY_YOUNG | \
_REGION_ENTRY_PROTECT | \
_REGION_ENTRY_NOEXEC)
+#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \
+ _REGION3_ENTRY_LARGE | \
+ _REGION3_ENTRY_READ | \
+ _REGION3_ENTRY_WRITE | \
+ _REGION3_ENTRY_YOUNG | \
+ _REGION3_ENTRY_DIRTY)
static inline bool mm_p4d_folded(struct mm_struct *mm)
{
@@ -1045,6 +1060,19 @@ static inline pte_t pte_mkhuge(pte_t pte)
#define IPTE_NODAT 0x400
#define IPTE_GUEST_ASCE 0x800
+static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep,
+ unsigned long opt, unsigned long asce,
+ int local)
+{
+ unsigned long pto;
+
+ pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
+ asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]"
+ : "+m" (*ptep)
+ : [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt),
+ [asce] "a" (asce), [m4] "i" (local));
+}
+
static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
unsigned long opt, unsigned long asce,
int local)
@@ -1195,6 +1223,42 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
}
+/*
+ * Check if PTEs only differ in _PAGE_PROTECT HW bit, but also allow SW PTE
+ * bits in the comparison. Those might change e.g. because of dirty and young
+ * tracking.
+ */
+static inline int pte_allow_rdp(pte_t old, pte_t new)
+{
+ /*
+ * Only allow changes from RO to RW
+ */
+ if (!(pte_val(old) & _PAGE_PROTECT) || pte_val(new) & _PAGE_PROTECT)
+ return 0;
+
+ return (pte_val(old) & _PAGE_RDP_MASK) == (pte_val(new) & _PAGE_RDP_MASK);
+}
+
+static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
+ unsigned long address)
+{
+ /*
+ * RDP might not have propagated the PTE protection reset to all CPUs,
+ * so there could be spurious TLB protection faults.
+ * NOTE: This will also be called when a racing pagetable update on
+ * another thread already installed the correct PTE. Both cases cannot
+ * really be distinguished.
+ * Therefore, only do the local TLB flush when RDP can be used, to avoid
+ * unnecessary overhead.
+ */
+ if (MACHINE_HAS_RDP)
+ asm volatile("ptlb" : : : "memory");
+}
+#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
+
+void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t new);
+
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
@@ -1202,7 +1266,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
{
if (pte_same(*ptep, entry))
return 0;
- ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
+ if (MACHINE_HAS_RDP && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))
+ ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry);
+ else
+ ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
return 1;
}
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index c907f747d2a0..e98d9650764b 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -44,29 +44,46 @@
typedef long (*sys_call_ptr_t)(struct pt_regs *regs);
-static inline void set_cpu_flag(int flag)
+static __always_inline void set_cpu_flag(int flag)
{
S390_lowcore.cpu_flags |= (1UL << flag);
}
-static inline void clear_cpu_flag(int flag)
+static __always_inline void clear_cpu_flag(int flag)
{
S390_lowcore.cpu_flags &= ~(1UL << flag);
}
-static inline int test_cpu_flag(int flag)
+static __always_inline bool test_cpu_flag(int flag)
{
- return !!(S390_lowcore.cpu_flags & (1UL << flag));
+ return S390_lowcore.cpu_flags & (1UL << flag);
+}
+
+static __always_inline bool test_and_set_cpu_flag(int flag)
+{
+ if (test_cpu_flag(flag))
+ return true;
+ set_cpu_flag(flag);
+ return false;
+}
+
+static __always_inline bool test_and_clear_cpu_flag(int flag)
+{
+ if (!test_cpu_flag(flag))
+ return false;
+ clear_cpu_flag(flag);
+ return true;
}
/*
* Test CIF flag of another CPU. The caller needs to ensure that
* CPU hotplug can not happen, e.g. by disabling preemption.
*/
-static inline int test_cpu_flag_of(int flag, int cpu)
+static __always_inline bool test_cpu_flag_of(int flag, int cpu)
{
struct lowcore *lc = lowcore_ptr[cpu];
- return !!(lc->cpu_flags & (1UL << flag));
+
+ return lc->cpu_flags & (1UL << flag);
}
#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 8bae33ab320a..bfb8c3cb8aee 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -26,7 +26,7 @@
#ifndef __ASSEMBLY__
#define PSW_KERNEL_BITS (PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \
- PSW_MASK_EA | PSW_MASK_BA)
+ PSW_MASK_EA | PSW_MASK_BA | PSW_MASK_DAT)
#define PSW_USER_BITS (PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 77e6506898f5..3a1f8825bc7d 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -34,6 +34,7 @@
#define MACHINE_FLAG_GS BIT(16)
#define MACHINE_FLAG_SCC BIT(17)
#define MACHINE_FLAG_PCI_MIO BIT(18)
+#define MACHINE_FLAG_RDP BIT(19)
#define LPP_MAGIC BIT(31)
#define LPP_PID_MASK _AC(0xffffffff, UL)
@@ -73,6 +74,10 @@ extern unsigned int zlib_dfltcc_support;
extern int noexec_disabled;
extern unsigned long ident_map_size;
+extern unsigned long pgalloc_pos;
+extern unsigned long pgalloc_end;
+extern unsigned long pgalloc_low;
+extern unsigned long __amode31_base;
/* The Write Back bit position in the physaddr is given by the SLPC PCI */
extern unsigned long mio_wb_bit_mask;
@@ -95,6 +100,7 @@ extern unsigned long mio_wb_bit_mask;
#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
#define MACHINE_HAS_PCI_MIO (S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO)
+#define MACHINE_HAS_RDP (S390_lowcore.machine_flags & MACHINE_FLAG_RDP)
/*
* Console mode. Override with conmode=
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index fde7e6b1df48..9286430fe729 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -7,36 +7,13 @@
#ifndef _ASM_S390_SYSCALL_WRAPPER_H
#define _ASM_S390_SYSCALL_WRAPPER_H
-#define __SC_TYPE(t, a) t
-
-#define SYSCALL_PT_ARG6(regs, m, t1, t2, t3, t4, t5, t6)\
- SYSCALL_PT_ARG5(regs, m, t1, t2, t3, t4, t5), \
- m(t6, (regs->gprs[7]))
-
-#define SYSCALL_PT_ARG5(regs, m, t1, t2, t3, t4, t5) \
- SYSCALL_PT_ARG4(regs, m, t1, t2, t3, t4), \
- m(t5, (regs->gprs[6]))
-
-#define SYSCALL_PT_ARG4(regs, m, t1, t2, t3, t4) \
- SYSCALL_PT_ARG3(regs, m, t1, t2, t3), \
- m(t4, (regs->gprs[5]))
-
-#define SYSCALL_PT_ARG3(regs, m, t1, t2, t3) \
- SYSCALL_PT_ARG2(regs, m, t1, t2), \
- m(t3, (regs->gprs[4]))
-
-#define SYSCALL_PT_ARG2(regs, m, t1, t2) \
- SYSCALL_PT_ARG1(regs, m, t1), \
- m(t2, (regs->gprs[3]))
-
-#define SYSCALL_PT_ARG1(regs, m, t1) \
- m(t1, (regs->orig_gpr2))
-
-#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__)
+/* Mapping of registers to parameters for syscalls */
+#define SC_S390_REGS_TO_ARGS(x, ...) \
+ __MAP(x, __SC_ARGS \
+ ,, regs->orig_gpr2,, regs->gprs[3],, regs->gprs[4] \
+ ,, regs->gprs[5],, regs->gprs[6],, regs->gprs[7])
#ifdef CONFIG_COMPAT
-#define __SC_COMPAT_TYPE(t, a) \
- __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a
#define __SC_COMPAT_CAST(t, a) \
({ \
@@ -56,34 +33,31 @@
(t)__ReS; \
})
-#define __S390_SYS_STUBx(x, name, ...) \
- long __s390_sys##name(struct pt_regs *regs); \
- ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \
- long __s390_sys##name(struct pt_regs *regs) \
- { \
- long ret = __do_sys##name(SYSCALL_PT_ARGS(x, regs, \
- __SC_COMPAT_CAST, __MAP(x, __SC_TYPE, __VA_ARGS__))); \
- __MAP(x,__SC_TEST,__VA_ARGS__); \
- return ret; \
- }
-
/*
* To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
* named __s390x_sys_*()
*/
#define COMPAT_SYSCALL_DEFINE0(sname) \
- SYSCALL_METADATA(_##sname, 0); \
long __s390_compat_sys_##sname(void); \
ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO); \
long __s390_compat_sys_##sname(void)
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
+ long __s390_sys_##sname(void); \
+ ALLOW_ERROR_INJECTION(__s390_sys_##sname, ERRNO); \
long __s390x_sys_##sname(void); \
ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \
+ static inline long __do_sys_##sname(void); \
long __s390_sys_##sname(void) \
- __attribute__((alias(__stringify(__s390x_sys_##sname)))); \
- long __s390x_sys_##sname(void)
+ { \
+ return __do_sys_##sname(); \
+ } \
+ long __s390x_sys_##sname(void) \
+ { \
+ return __do_sys_##sname(); \
+ } \
+ static inline long __do_sys_##sname(void)
#define COND_SYSCALL(name) \
cond_syscall(__s390x_sys_##name); \
@@ -94,24 +68,20 @@
SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers)
#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
- __diag_push(); \
- __diag_ignore(GCC, 8, "-Wattribute-alias", \
- "Type aliasing is used to sanitize syscall arguments"); \
long __s390_compat_sys##name(struct pt_regs *regs); \
- long __s390_compat_sys##name(struct pt_regs *regs) \
- __attribute__((alias(__stringify(__se_compat_sys##name)))); \
ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \
- static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
- long __se_compat_sys##name(struct pt_regs *regs); \
- long __se_compat_sys##name(struct pt_regs *regs) \
+ static inline long __se_compat_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)); \
+ static inline long __do_compat_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__)); \
+ long __s390_compat_sys##name(struct pt_regs *regs) \
{ \
- long ret = __do_compat_sys##name(SYSCALL_PT_ARGS(x, regs, __SC_DELOUSE, \
- __MAP(x, __SC_TYPE, __VA_ARGS__))); \
- __MAP(x,__SC_TEST,__VA_ARGS__); \
- return ret; \
+ return __se_compat_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__)); \
} \
- __diag_pop(); \
- static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+ static inline long __se_compat_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)) \
+ { \
+ __MAP(x, __SC_TEST, __VA_ARGS__); \
+ return __do_compat_sys##name(__MAP(x, __SC_DELOUSE, __VA_ARGS__)); \
+ } \
+ static inline long __do_compat_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__))
/*
* As some compat syscalls may not be implemented, we need to expand
@@ -124,42 +94,58 @@
#define COMPAT_SYS_NI(name) \
SYSCALL_ALIAS(__s390_compat_sys_##name, sys_ni_posix_timers)
-#else /* CONFIG_COMPAT */
+#define __S390_SYS_STUBx(x, name, ...) \
+ long __s390_sys##name(struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \
+ static inline long ___se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)); \
+ long __s390_sys##name(struct pt_regs *regs) \
+ { \
+ return ___se_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__)); \
+ } \
+ static inline long ___se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)) \
+ { \
+ __MAP(x, __SC_TEST, __VA_ARGS__); \
+ return __do_sys##name(__MAP(x, __SC_COMPAT_CAST, __VA_ARGS__)); \
+ }
-#define __S390_SYS_STUBx(x, fullname, name, ...)
+#else /* CONFIG_COMPAT */
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
long __s390x_sys_##sname(void); \
ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \
- long __s390x_sys_##sname(void)
+ static inline long __do_sys_##sname(void); \
+ long __s390x_sys_##sname(void) \
+ { \
+ return __do_sys_##sname(); \
+ } \
+ static inline long __do_sys_##sname(void)
#define COND_SYSCALL(name) \
cond_syscall(__s390x_sys_##name)
#define SYS_NI(name) \
- SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers);
+ SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers)
+
+#define __S390_SYS_STUBx(x, fullname, name, ...)
#endif /* CONFIG_COMPAT */
-#define __SYSCALL_DEFINEx(x, name, ...) \
- __diag_push(); \
- __diag_ignore(GCC, 8, "-Wattribute-alias", \
- "Type aliasing is used to sanitize syscall arguments"); \
- long __s390x_sys##name(struct pt_regs *regs) \
- __attribute__((alias(__stringify(__se_sys##name)))); \
- ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \
- static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
- long __se_sys##name(struct pt_regs *regs); \
- __S390_SYS_STUBx(x, name, __VA_ARGS__) \
- long __se_sys##name(struct pt_regs *regs) \
- { \
- long ret = __do_sys##name(SYSCALL_PT_ARGS(x, regs, \
- __SC_CAST, __MAP(x, __SC_TYPE, __VA_ARGS__))); \
- __MAP(x,__SC_TEST,__VA_ARGS__); \
- return ret; \
- } \
- __diag_pop(); \
- static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+#define __SYSCALL_DEFINEx(x, name, ...) \
+ long __s390x_sys##name(struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \
+ static inline long __se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)); \
+ static inline long __do_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__)); \
+ __S390_SYS_STUBx(x, name, __VA_ARGS__); \
+ long __s390x_sys##name(struct pt_regs *regs) \
+ { \
+ return __se_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__)); \
+ } \
+ static inline long __se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)) \
+ { \
+ __MAP(x, __SC_TEST, __VA_ARGS__); \
+ return __do_sys##name(__MAP(x, __SC_CAST, __VA_ARGS__)); \
+ } \
+ static inline long __do_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__))
#endif /* _ASM_S390_SYSCALL_WRAPPER_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index f7038b800cc3..8a8c64a678c4 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -390,4 +390,212 @@ do { \
goto err_label; \
} while (0)
+void __cmpxchg_user_key_called_with_bad_pointer(void);
+
+#define CMPXCHG_USER_KEY_MAX_LOOPS 128
+
+static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
+ __uint128_t old, __uint128_t new,
+ unsigned long key, int size)
+{
+ int rc = 0;
+
+ switch (size) {
+ case 1: {
+ unsigned int prev, shift, mask, _old, _new;
+ unsigned long count;
+
+ shift = (3 ^ (address & 3)) << 3;
+ address ^= address & 3;
+ _old = ((unsigned int)old & 0xff) << shift;
+ _new = ((unsigned int)new & 0xff) << shift;
+ mask = ~(0xff << shift);
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ " llill %[count],%[max_loops]\n"
+ "0: l %[prev],%[address]\n"
+ "1: nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "2: lr %[tmp],%[prev]\n"
+ "3: cs %[prev],%[new],%[address]\n"
+ "4: jnl 5f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jnz 5f\n"
+ " brct %[count],2b\n"
+ "5: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (_old),
+ [new] "+&d" (_new),
+ [mask] "+&d" (mask),
+ [count] "=a" (count)
+ : [key] "%[count]" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY),
+ [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+ : "memory", "cc");
+ *(unsigned char *)uval = prev >> shift;
+ if (!count)
+ rc = -EAGAIN;
+ return rc;
+ }
+ case 2: {
+ unsigned int prev, shift, mask, _old, _new;
+ unsigned long count;
+
+ shift = (2 ^ (address & 2)) << 3;
+ address ^= address & 2;
+ _old = ((unsigned int)old & 0xffff) << shift;
+ _new = ((unsigned int)new & 0xffff) << shift;
+ mask = ~(0xffff << shift);
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ " llill %[count],%[max_loops]\n"
+ "0: l %[prev],%[address]\n"
+ "1: nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "2: lr %[tmp],%[prev]\n"
+ "3: cs %[prev],%[new],%[address]\n"
+ "4: jnl 5f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jnz 5f\n"
+ " brct %[count],2b\n"
+ "5: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (_old),
+ [new] "+&d" (_new),
+ [mask] "+&d" (mask),
+ [count] "=a" (count)
+ : [key] "%[count]" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY),
+ [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+ : "memory", "cc");
+ *(unsigned short *)uval = prev >> shift;
+ if (!count)
+ rc = -EAGAIN;
+ return rc;
+ }
+ case 4: {
+ unsigned int prev = old;
+
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: cs %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+Q" (*(int *)address)
+ : [new] "d" ((unsigned int)new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ *(unsigned int *)uval = prev;
+ return rc;
+ }
+ case 8: {
+ unsigned long prev = old;
+
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: csg %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+QS" (*(long *)address)
+ : [new] "d" ((unsigned long)new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ *(unsigned long *)uval = prev;
+ return rc;
+ }
+ case 16: {
+ __uint128_t prev = old;
+
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: cdsg %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+QS" (*(__int128_t *)address)
+ : [new] "d" (new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ *(__uint128_t *)uval = prev;
+ return rc;
+ }
+ }
+ __cmpxchg_user_key_called_with_bad_pointer();
+ return rc;
+}
+
+/**
+ * cmpxchg_user_key() - cmpxchg with user space target, honoring storage keys
+ * @ptr: User space address of value to compare to @old and exchange with
+ * @new. Must be aligned to sizeof(*@ptr).
+ * @uval: Address where the old value of *@ptr is written to.
+ * @old: Old value. Compared to the content pointed to by @ptr in order to
+ * determine if the exchange occurs. The old value read from *@ptr is
+ * written to *@uval.
+ * @new: New value to place at *@ptr.
+ * @key: Access key to use for checking storage key protection.
+ *
+ * Perform a cmpxchg on a user space target, honoring storage key protection.
+ * @key alone determines how key checking is performed, neither
+ * storage-protection-override nor fetch-protection-override apply.
+ * The caller must compare *@uval and @old to determine if values have been
+ * exchanged. In case of an exception *@uval is set to zero.
+ *
+ * Return: 0: cmpxchg executed
+ * -EFAULT: an exception happened when trying to access *@ptr
+ * -EAGAIN: maxed out number of retries (byte and short only)
+ */
+#define cmpxchg_user_key(ptr, uval, old, new, key) \
+({ \
+ __typeof__(ptr) __ptr = (ptr); \
+ __typeof__(uval) __uval = (uval); \
+ \
+ BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval))); \
+ might_fault(); \
+ __chk_user_ptr(__ptr); \
+ __cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \
+ (old), (new), (key), sizeof(*(__ptr))); \
+})
+
#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index 02462e7100c1..b8ecf04e3468 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -4,7 +4,7 @@
#include <linux/sched.h>
#include <linux/ftrace.h>
-#include <linux/kprobes.h>
+#include <linux/rethook.h>
#include <linux/llist.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
@@ -43,13 +43,15 @@ struct unwind_state {
bool error;
};
-/* Recover the return address modified by kretprobe and ftrace_graph. */
+/* Recover the return address modified by rethook and ftrace_graph. */
static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state,
unsigned long ip)
{
ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *)state->sp);
- if (is_kretprobe_trampoline(ip))
- ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur);
+#ifdef CONFIG_RETHOOK
+ if (is_rethook_trampoline(ip))
+ ip = rethook_find_ret_addr(state->task, state->sp, &state->kr_cur);
+#endif
return ip;
}
diff --git a/arch/s390/include/uapi/asm/fs3270.h b/arch/s390/include/uapi/asm/fs3270.h
new file mode 100644
index 000000000000..c4bc1108af6a
--- /dev/null
+++ b/arch/s390/include/uapi/asm/fs3270.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_S390_UAPI_FS3270_H
+#define __ASM_S390_UAPI_FS3270_H
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+/* ioctls for fullscreen 3270 */
+#define TUBICMD _IO('3', 3) /* set ccw command for fs reads. */
+#define TUBOCMD _IO('3', 4) /* set ccw command for fs writes. */
+#define TUBGETI _IO('3', 7) /* get ccw command for fs reads. */
+#define TUBGETO _IO('3', 8) /* get ccw command for fs writes. */
+#define TUBGETMOD _IO('3', 13) /* get characteristics like model, cols, rows */
+
+/* For TUBGETMOD */
+struct raw3270_iocb {
+ __u16 model;
+ __u16 line_cnt;
+ __u16 col_cnt;
+ __u16 pf_cnt;
+ __u16 re_cnt;
+ __u16 map;
+};
+
+#endif /* __ASM_S390_UAPI_FS3270_H */
diff --git a/arch/s390/include/uapi/asm/raw3270.h b/arch/s390/include/uapi/asm/raw3270.h
new file mode 100644
index 000000000000..6676f102bd50
--- /dev/null
+++ b/arch/s390/include/uapi/asm/raw3270.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_S390_UAPI_RAW3270_H
+#define __ASM_S390_UAPI_RAW3270_H
+
+/* Local Channel Commands */
+#define TC_WRITE 0x01 /* Write */
+#define TC_RDBUF 0x02 /* Read Buffer */
+#define TC_EWRITE 0x05 /* Erase write */
+#define TC_READMOD 0x06 /* Read modified */
+#define TC_EWRITEA 0x0d /* Erase write alternate */
+#define TC_WRITESF 0x11 /* Write structured field */
+
+/* Buffer Control Orders */
+#define TO_GE 0x08 /* Graphics Escape */
+#define TO_SF 0x1d /* Start field */
+#define TO_SBA 0x11 /* Set buffer address */
+#define TO_IC 0x13 /* Insert cursor */
+#define TO_PT 0x05 /* Program tab */
+#define TO_RA 0x3c /* Repeat to address */
+#define TO_SFE 0x29 /* Start field extended */
+#define TO_EUA 0x12 /* Erase unprotected to address */
+#define TO_MF 0x2c /* Modify field */
+#define TO_SA 0x28 /* Set attribute */
+
+/* Field Attribute Bytes */
+#define TF_INPUT 0x40 /* Visible input */
+#define TF_INPUTN 0x4c /* Invisible input */
+#define TF_INMDT 0xc1 /* Visible, Set-MDT */
+#define TF_LOG 0x60
+
+/* Character Attribute Bytes */
+#define TAT_RESET 0x00
+#define TAT_FIELD 0xc0
+#define TAT_EXTHI 0x41
+#define TAT_FGCOLOR 0x42
+#define TAT_CHARS 0x43
+#define TAT_BGCOLOR 0x45
+#define TAT_TRANS 0x46
+
+/* Extended-Highlighting Bytes */
+#define TAX_RESET 0x00
+#define TAX_BLINK 0xf1
+#define TAX_REVER 0xf2
+#define TAX_UNDER 0xf4
+
+/* Reset value */
+#define TAR_RESET 0x00
+
+/* Color values */
+#define TAC_RESET 0x00
+#define TAC_BLUE 0xf1
+#define TAC_RED 0xf2
+#define TAC_PINK 0xf3
+#define TAC_GREEN 0xf4
+#define TAC_TURQ 0xf5
+#define TAC_YELLOW 0xf6
+#define TAC_WHITE 0xf7
+#define TAC_DEFAULT 0x00
+
+/* Write Control Characters */
+#define TW_NONE 0x40 /* No particular action */
+#define TW_KR 0xc2 /* Keyboard restore */
+#define TW_PLUSALARM 0x04 /* Add this bit for alarm */
+
+#define RAW3270_FIRSTMINOR 1 /* First minor number */
+#define RAW3270_MAXDEVS 255 /* Max number of 3270 devices */
+
+#define AID_CLEAR 0x6d
+#define AID_ENTER 0x7d
+#define AID_PF3 0xf3
+#define AID_PF7 0xf7
+#define AID_PF8 0xf8
+#define AID_READ_PARTITION 0x88
+
+#endif /* __ASM_S390_UAPI_RAW3270_H */
diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h
index da034c606314..84457dbb26b4 100644
--- a/arch/s390/include/uapi/asm/types.h
+++ b/arch/s390/include/uapi/asm/types.h
@@ -12,15 +12,18 @@
#ifndef __ASSEMBLY__
-/* A address type so that arithmetic can be done on it & it can be upgraded to
- 64 bit when necessary
-*/
-typedef unsigned long addr_t;
+typedef unsigned long addr_t;
typedef __signed__ long saddr_t;
typedef struct {
- __u32 u[4];
-} __vector128;
+ union {
+ struct {
+ __u64 high;
+ __u64 low;
+ };
+ __u32 u[4];
+ };
+} __attribute__((packed, aligned(4))) __vector128;
#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index d83713f67530..f4785abe1b9f 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -85,7 +85,8 @@ struct ica_rsa_modexpo_crt {
struct CPRBX {
__u16 cprb_len; /* CPRB length 220 */
__u8 cprb_ver_id; /* CPRB version id. 0x02 */
- __u8 _pad_000[3]; /* Alignment pad bytes */
+ __u8 ctfm; /* Command Type Filtering Mask */
+ __u8 pad_000[2]; /* Alignment pad bytes */
__u8 func_id[2]; /* function id 0x5432 */
__u8 cprb_flags[4]; /* Flags */
__u32 req_parml; /* request parameter buffer len */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 5e6a23299790..8983837b3565 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -58,6 +58,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KPROBES) += kprobes_insn_page.o
obj-$(CONFIG_KPROBES) += mcount.o
+obj-$(CONFIG_RETHOOK) += rethook.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
@@ -69,7 +70,7 @@ obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o
obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
-obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
index fb92e8ed0525..f9efc54ec4b7 100644
--- a/arch/s390/kernel/abs_lowcore.c
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -3,12 +3,7 @@
#include <linux/pgtable.h>
#include <asm/abs_lowcore.h>
-#define ABS_LOWCORE_UNMAPPED 1
-#define ABS_LOWCORE_LAP_ON 2
-#define ABS_LOWCORE_IRQS_ON 4
-
unsigned long __bootdata_preserved(__abs_lowcore);
-bool __ro_after_init abs_lowcore_mapped;
int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
{
@@ -49,47 +44,3 @@ void abs_lowcore_unmap(int cpu)
addr += PAGE_SIZE;
}
}
-
-struct lowcore *get_abs_lowcore(unsigned long *flags)
-{
- unsigned long irq_flags;
- union ctlreg0 cr0;
- int cpu;
-
- *flags = 0;
- cpu = get_cpu();
- if (abs_lowcore_mapped) {
- return ((struct lowcore *)__abs_lowcore) + cpu;
- } else {
- if (cpu != 0)
- panic("Invalid unmapped absolute lowcore access\n");
- local_irq_save(irq_flags);
- if (!irqs_disabled_flags(irq_flags))
- *flags |= ABS_LOWCORE_IRQS_ON;
- __ctl_store(cr0.val, 0, 0);
- if (cr0.lap) {
- *flags |= ABS_LOWCORE_LAP_ON;
- __ctl_clear_bit(0, 28);
- }
- *flags |= ABS_LOWCORE_UNMAPPED;
- return lowcore_ptr[0];
- }
-}
-
-void put_abs_lowcore(struct lowcore *lc, unsigned long flags)
-{
- if (abs_lowcore_mapped) {
- if (flags)
- panic("Invalid mapped absolute lowcore release\n");
- } else {
- if (smp_processor_id() != 0)
- panic("Invalid mapped absolute lowcore access\n");
- if (!(flags & ABS_LOWCORE_UNMAPPED))
- panic("Invalid unmapped absolute lowcore release\n");
- if (flags & ABS_LOWCORE_LAP_ON)
- __ctl_set_bit(0, 28);
- if (flags & ABS_LOWCORE_IRQS_ON)
- local_irq_enable();
- }
- put_cpu();
-}
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 7ee3651d00ab..56254fa06f99 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -46,7 +46,7 @@ struct cache_info {
#define CACHE_MAX_LEVEL 8
union cache_topology {
struct cache_info ci[CACHE_MAX_LEVEL];
- unsigned long long raw;
+ unsigned long raw;
};
static const char * const cache_type_string[] = {
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index eee1ad3e1b29..cecedd01d4ec 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -139,7 +139,7 @@ static int save_sigregs_ext32(struct pt_regs *regs,
/* Save vector registers to signal stack */
if (MACHINE_HAS_VX) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
+ vxrs[i] = current->thread.fpu.vxrs[i].low;
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
sizeof(sregs_ext->vxrs_low)) ||
__copy_to_user(&sregs_ext->vxrs_high,
@@ -173,7 +173,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
+ current->thread.fpu.vxrs[i].low = vxrs[i];
}
return 0;
}
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index c13b1455ec8c..8a617be28bb4 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -110,7 +110,7 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
/* Copy lower halves of vector registers 0-15 */
for (i = 0; i < 16; i++)
- memcpy(&sa->vxrs_low[i], &vxrs[i].u[2], 8);
+ sa->vxrs_low[i] = vxrs[i].low;
/* Copy vector registers 16-31 */
memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
}
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index a778714e4d8b..82079f2d8583 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -35,6 +35,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" },
[DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" },
[DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" },
+ [DIAG_STAT_X08C] = { .code = 0x08c, .name = "Access 3270 Display Device Information" },
[DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" },
[DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" },
[DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" },
@@ -57,12 +58,16 @@ struct diag_ops __amode31_ref diag_amode31_ops = {
.diag26c = _diag26c_amode31,
.diag14 = _diag14_amode31,
.diag0c = _diag0c_amode31,
+ .diag8c = _diag8c_amode31,
.diag308_reset = _diag308_reset_amode31
};
static struct diag210 _diag210_tmp_amode31 __section(".amode31.data");
struct diag210 __amode31_ref *__diag210_tmp_amode31 = &_diag210_tmp_amode31;
+static struct diag8c _diag8c_tmp_amode31 __section(".amode31.data");
+static struct diag8c __amode31_ref *__diag8c_tmp_amode31 = &_diag8c_tmp_amode31;
+
static int show_diag_stat(struct seq_file *m, void *v)
{
struct diag_stat *stat;
@@ -194,6 +199,27 @@ int diag210(struct diag210 *addr)
}
EXPORT_SYMBOL(diag210);
+/*
+ * Diagnose 210: Get information about a virtual device
+ */
+int diag8c(struct diag8c *addr, struct ccw_dev_id *devno)
+{
+ static DEFINE_SPINLOCK(diag8c_lock);
+ unsigned long flags;
+ int ccode;
+
+ spin_lock_irqsave(&diag8c_lock, flags);
+
+ diag_stat_inc(DIAG_STAT_X08C);
+ ccode = diag_amode31_ops.diag8c(__diag8c_tmp_amode31, devno, sizeof(*addr));
+
+ *addr = *__diag8c_tmp_amode31;
+ spin_unlock_irqrestore(&diag8c_lock, flags);
+
+ return ccode;
+}
+EXPORT_SYMBOL(diag8c);
+
int diag224(void *ptr)
{
int rc = -EOPNOTSUPP;
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 6030fdd6997b..59eba19ae0f2 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -18,6 +18,7 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <asm/asm-extable.h>
+#include <linux/memblock.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/ipl.h>
@@ -160,9 +161,7 @@ static noinline __init void setup_lowcore_early(void)
psw_t psw;
psw.addr = (unsigned long)early_pgm_check_handler;
- psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
- if (IS_ENABLED(CONFIG_KASAN))
- psw.mask |= PSW_MASK_DAT;
+ psw.mask = PSW_KERNEL_BITS;
S390_lowcore.program_new_psw = psw;
S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
}
@@ -227,6 +226,8 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
/* the control bit is set during PCI initialization */
}
+ if (test_facility(194))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_RDP;
}
static inline void save_vector_registers(void)
@@ -288,7 +289,6 @@ static void __init sort_amode31_extable(void)
void __init startup_init(void)
{
- sclp_early_adjust_va();
reset_tod_clock();
check_image_bootable();
time_early_init();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 0f423e9df095..c8d8c9960936 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -137,19 +137,13 @@ _LPP_OFFSET = __LC_LPP
lgr %r14,\reg
larl %r13,\start
slgr %r14,%r13
-#ifdef CONFIG_AS_IS_LLVM
clgfrl %r14,.Lrange_size\@
-#else
- clgfi %r14,\end - \start
-#endif
jhe \outside_label
-#ifdef CONFIG_AS_IS_LLVM
.section .rodata, "a"
.align 4
.Lrange_size\@:
.long \end - \start
.previous
-#endif
.endm
.macro SIEEXIT
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 995ec7449feb..34674e38826b 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -73,6 +73,5 @@ extern struct exception_table_entry _stop_amode31_ex_table[];
#define __amode31_data __section(".amode31.data")
#define __amode31_ref __section(".amode31.refs")
extern long _start_amode31_refs[], _end_amode31_refs[];
-extern unsigned long __amode31_base;
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index d7b8b6ad574d..3b3bf8329e6c 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -25,6 +25,7 @@ ENTRY(startup_continue)
larl %r14,init_task
stg %r14,__LC_CURRENT
larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE
+ brasl %r14,sclp_early_adjust_va # allow sclp_early_printk
#ifdef CONFIG_KASAN
brasl %r14,kasan_early_init
#endif
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index b04fb418307c..38e267c7bff7 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -24,116 +24,61 @@ static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
void account_idle_time_irq(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ unsigned long idle_time;
u64 cycles_new[8];
int i;
- clear_cpu_flag(CIF_ENABLED_WAIT);
if (smp_cpu_mtid) {
stcctm(MT_DIAG, smp_cpu_mtid, cycles_new);
for (i = 0; i < smp_cpu_mtid; i++)
this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
}
- idle->clock_idle_exit = S390_lowcore.int_clock;
- idle->timer_idle_exit = S390_lowcore.sys_enter_timer;
+ idle_time = S390_lowcore.int_clock - idle->clock_idle_enter;
S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock;
- S390_lowcore.last_update_clock = idle->clock_idle_exit;
+ S390_lowcore.last_update_clock = S390_lowcore.int_clock;
S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter;
- S390_lowcore.last_update_timer = idle->timer_idle_exit;
+ S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
+
+ /* Account time spent with enabled wait psw loaded as idle time. */
+ WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
+ WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
+ account_idle_time(cputime_to_nsecs(idle_time));
}
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
- unsigned long idle_time;
unsigned long psw_mask;
/* Wait for external, I/O or machine check interrupt. */
- psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
clear_cpu_flag(CIF_NOHZ_DELAY);
/* psw_idle() returns with interrupts disabled. */
psw_idle(idle, psw_mask);
-
- /* Account time spent with enabled wait psw loaded as idle time. */
- raw_write_seqcount_begin(&idle->seqcount);
- idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
- idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
- idle->idle_time += idle_time;
- idle->idle_count++;
- account_idle_time(cputime_to_nsecs(idle_time));
- raw_write_seqcount_end(&idle->seqcount);
}
static ssize_t show_idle_count(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned long idle_count;
- unsigned int seq;
-
- do {
- seq = read_seqcount_begin(&idle->seqcount);
- idle_count = READ_ONCE(idle->idle_count);
- if (READ_ONCE(idle->clock_idle_enter))
- idle_count++;
- } while (read_seqcount_retry(&idle->seqcount, seq));
- return sprintf(buf, "%lu\n", idle_count);
+
+ return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count));
}
DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
static ssize_t show_idle_time(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
- unsigned long now, idle_time, idle_enter, idle_exit, in_idle;
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned int seq;
-
- do {
- seq = read_seqcount_begin(&idle->seqcount);
- idle_time = READ_ONCE(idle->idle_time);
- idle_enter = READ_ONCE(idle->clock_idle_enter);
- idle_exit = READ_ONCE(idle->clock_idle_exit);
- } while (read_seqcount_retry(&idle->seqcount, seq));
- in_idle = 0;
- now = get_tod_clock();
- if (idle_enter) {
- if (idle_exit) {
- in_idle = idle_exit - idle_enter;
- } else if (now > idle_enter) {
- in_idle = now - idle_enter;
- }
- }
- idle_time += in_idle;
- return sprintf(buf, "%lu\n", idle_time >> 12);
-}
-DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
-u64 arch_cpu_idle_time(int cpu)
-{
- struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
- unsigned long now, idle_enter, idle_exit, in_idle;
- unsigned int seq;
-
- do {
- seq = read_seqcount_begin(&idle->seqcount);
- idle_enter = READ_ONCE(idle->clock_idle_enter);
- idle_exit = READ_ONCE(idle->clock_idle_exit);
- } while (read_seqcount_retry(&idle->seqcount, seq));
- in_idle = 0;
- now = get_tod_clock();
- if (idle_enter) {
- if (idle_exit) {
- in_idle = idle_exit - idle_enter;
- } else if (now > idle_enter) {
- in_idle = now - idle_enter;
- }
- }
- return cputime_to_nsecs(in_idle);
+ return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12);
}
+DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
void arch_cpu_idle_enter(void)
{
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index fbd646dbf440..5f0f5c86963a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -593,6 +593,7 @@ static struct attribute *ipl_eckd_attrs[] = {
&sys_ipl_type_attr.attr,
&sys_ipl_eckd_bootprog_attr.attr,
&sys_ipl_eckd_br_chr_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
&sys_ipl_device_attr.attr,
&sys_ipl_secure_attr.attr,
&sys_ipl_has_secure_attr.attr,
@@ -888,23 +889,27 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
return len;
}
-/* FCP wrapper */
-static ssize_t reipl_fcp_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
-{
- return reipl_generic_loadparm_show(reipl_block_fcp, page);
-}
-
-static ssize_t reipl_fcp_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
-{
- return reipl_generic_loadparm_store(reipl_block_fcp, buf, len);
-}
-
-static struct kobj_attribute sys_reipl_fcp_loadparm_attr =
- __ATTR(loadparm, 0644, reipl_fcp_loadparm_show,
- reipl_fcp_loadparm_store);
+#define DEFINE_GENERIC_LOADPARM(name) \
+static ssize_t reipl_##name##_loadparm_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *page) \
+{ \
+ return reipl_generic_loadparm_show(reipl_block_##name, page); \
+} \
+static ssize_t reipl_##name##_loadparm_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ return reipl_generic_loadparm_store(reipl_block_##name, buf, len); \
+} \
+static struct kobj_attribute sys_reipl_##name##_loadparm_attr = \
+ __ATTR(loadparm, 0644, reipl_##name##_loadparm_show, \
+ reipl_##name##_loadparm_store)
+
+DEFINE_GENERIC_LOADPARM(fcp);
+DEFINE_GENERIC_LOADPARM(nvme);
+DEFINE_GENERIC_LOADPARM(ccw);
+DEFINE_GENERIC_LOADPARM(nss);
+DEFINE_GENERIC_LOADPARM(eckd);
static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
@@ -994,24 +999,6 @@ DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
reipl_block_nvme->nvme.br_lba);
-/* nvme wrapper */
-static ssize_t reipl_nvme_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
-{
- return reipl_generic_loadparm_show(reipl_block_nvme, page);
-}
-
-static ssize_t reipl_nvme_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
-{
- return reipl_generic_loadparm_store(reipl_block_nvme, buf, len);
-}
-
-static struct kobj_attribute sys_reipl_nvme_loadparm_attr =
- __ATTR(loadparm, 0644, reipl_nvme_loadparm_show,
- reipl_nvme_loadparm_store);
-
static struct attribute *reipl_nvme_attrs[] = {
&sys_reipl_nvme_fid_attr.attr,
&sys_reipl_nvme_nsid_attr.attr,
@@ -1047,38 +1034,6 @@ static struct kobj_attribute sys_reipl_nvme_clear_attr =
/* CCW reipl device attributes */
DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
-/* NSS wrapper */
-static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
-{
- return reipl_generic_loadparm_show(reipl_block_nss, page);
-}
-
-static ssize_t reipl_nss_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
-{
- return reipl_generic_loadparm_store(reipl_block_nss, buf, len);
-}
-
-/* CCW wrapper */
-static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
-{
- return reipl_generic_loadparm_show(reipl_block_ccw, page);
-}
-
-static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
-{
- return reipl_generic_loadparm_store(reipl_block_ccw, buf, len);
-}
-
-static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
- __ATTR(loadparm, 0644, reipl_ccw_loadparm_show,
- reipl_ccw_loadparm_store);
-
static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
@@ -1176,6 +1131,7 @@ static struct attribute *reipl_eckd_attrs[] = {
&sys_reipl_eckd_device_attr.attr,
&sys_reipl_eckd_bootprog_attr.attr,
&sys_reipl_eckd_br_chr_attr.attr,
+ &sys_reipl_eckd_loadparm_attr.attr,
NULL,
};
@@ -1194,7 +1150,7 @@ static ssize_t reipl_eckd_clear_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t len)
{
- if (strtobool(buf, &reipl_eckd_clear) < 0)
+ if (kstrtobool(buf, &reipl_eckd_clear) < 0)
return -EINVAL;
return len;
}
@@ -1251,10 +1207,6 @@ static struct kobj_attribute sys_reipl_nss_name_attr =
__ATTR(name, 0644, reipl_nss_name_show,
reipl_nss_name_store);
-static struct kobj_attribute sys_reipl_nss_loadparm_attr =
- __ATTR(loadparm, 0644, reipl_nss_loadparm_show,
- reipl_nss_loadparm_store);
-
static struct attribute *reipl_nss_attrs[] = {
&sys_reipl_nss_name_attr.attr,
&sys_reipl_nss_loadparm_attr.attr,
@@ -1986,15 +1938,14 @@ static void dump_reipl_run(struct shutdown_trigger *trigger)
{
unsigned long ipib = (unsigned long) reipl_block_actual;
struct lowcore *abs_lc;
- unsigned long flags;
unsigned int csum;
csum = (__force unsigned int)
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
abs_lc->ipib = ipib;
abs_lc->ipib_checksum = csum;
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
dump_run(trigger);
}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 45393919fe61..b020ff17d206 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -136,7 +136,7 @@ void noinstr do_io_irq(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
struct pt_regs *old_regs = set_irq_regs(regs);
- int from_idle;
+ bool from_idle;
irq_enter_rcu();
@@ -146,7 +146,7 @@ void noinstr do_io_irq(struct pt_regs *regs)
current->thread.last_break = regs->last_break;
}
- from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
account_idle_time_irq();
@@ -171,7 +171,7 @@ void noinstr do_ext_irq(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
struct pt_regs *old_regs = set_irq_regs(regs);
- int from_idle;
+ bool from_idle;
irq_enter_rcu();
@@ -185,7 +185,7 @@ void noinstr do_ext_irq(struct pt_regs *regs)
regs->int_parm = S390_lowcore.ext_params;
regs->int_parm_long = S390_lowcore.ext_params2;
- from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
account_idle_time_irq();
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 401f9c933ff9..5e713f318de3 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -281,16 +281,6 @@ static void pop_kprobe(struct kprobe_ctlblk *kcb)
}
NOKPROBE_SYMBOL(pop_kprobe);
-void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
-{
- ri->ret_addr = (kprobe_opcode_t *)regs->gprs[14];
- ri->fp = (void *)regs->gprs[15];
-
- /* Replace the return addr with trampoline addr */
- regs->gprs[14] = (unsigned long)&__kretprobe_trampoline;
-}
-NOKPROBE_SYMBOL(arch_prepare_kretprobe);
-
static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
{
switch (kcb->kprobe_status) {
@@ -371,26 +361,6 @@ static int kprobe_handler(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(kprobe_handler);
-void arch_kretprobe_fixup_return(struct pt_regs *regs,
- kprobe_opcode_t *correct_ret_addr)
-{
- /* Replace fake return address with real one. */
- regs->gprs[14] = (unsigned long)correct_ret_addr;
-}
-NOKPROBE_SYMBOL(arch_kretprobe_fixup_return);
-
-/*
- * Called from __kretprobe_trampoline
- */
-void trampoline_probe_handler(struct pt_regs *regs)
-{
- kretprobe_trampoline_handler(regs, (void *)regs->gprs[15]);
-}
-NOKPROBE_SYMBOL(trampoline_probe_handler);
-
-/* assembler function that handles the kretprobes must not be probed itself */
-NOKPROBE_SYMBOL(__kretprobe_trampoline);
-
/*
* Called after single-stepping. p->addr is the address of the
* instruction whose first byte has been replaced by the "breakpoint"
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 4579b42286d5..2a8e73266428 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -224,7 +224,6 @@ void machine_kexec_cleanup(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
struct lowcore *abs_lc;
- unsigned long flags;
VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
@@ -232,9 +231,9 @@ void arch_crash_save_vmcoreinfo(void)
vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
abs_lc->vmcore_info = paddr_vmcoreinfo_note();
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
}
void machine_shutdown(void)
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 4786bfe02144..43ff91073d2a 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -135,9 +135,9 @@ SYM_FUNC_END(return_to_handler)
#endif
#endif /* CONFIG_FUNCTION_TRACER */
-#ifdef CONFIG_KPROBES
+#ifdef CONFIG_RETHOOK
-SYM_FUNC_START(__kretprobe_trampoline)
+SYM_FUNC_START(arch_rethook_trampoline)
stg %r14,(__SF_GPRS+8*8)(%r15)
lay %r15,-STACK_FRAME_SIZE(%r15)
@@ -152,16 +152,16 @@ SYM_FUNC_START(__kretprobe_trampoline)
epsw %r2,%r3
risbg %r3,%r2,0,31,32
stg %r3,STACK_PTREGS_PSW(%r15)
- larl %r1,__kretprobe_trampoline
+ larl %r1,arch_rethook_trampoline
stg %r1,STACK_PTREGS_PSW+8(%r15)
lay %r2,STACK_PTREGS(%r15)
- brasl %r14,trampoline_probe_handler
+ brasl %r14,arch_rethook_trampoline_callback
mvc __SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15)
lmg %r0,%r15,STACK_PTREGS_GPRS(%r15)
lpswe __SF_EMPTY(%r15)
-SYM_FUNC_END(__kretprobe_trampoline)
+SYM_FUNC_END(arch_rethook_trampoline)
-#endif /* CONFIG_KPROBES */
+#endif /* CONFIG_RETHOOK */
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index ec0bd9457e90..6e1824141b29 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -59,15 +59,14 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
void __init os_info_init(void)
{
struct lowcore *abs_lc;
- unsigned long flags;
os_info.version_major = OS_INFO_VERSION_MAJOR;
os_info.version_minor = OS_INFO_VERSION_MINOR;
os_info.magic = OS_INFO_MAGIC;
os_info.csum = os_info_csum(&os_info);
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
abs_lc->os_info = __pa(&os_info);
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
}
#ifdef CONFIG_CRASH_DUMP
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index aa38649c7c27..c9ab971498d6 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -2,7 +2,7 @@
/*
* Performance event support for s390x - CPU-measurement Counter Facility
*
- * Copyright IBM Corp. 2012, 2021
+ * Copyright IBM Corp. 2012, 2023
* Author(s): Hendrik Brueckner <[email protected]>
* Thomas Richter <[email protected]>
*/
@@ -16,11 +16,82 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/miscdevice.h>
+#include <linux/perf_event.h>
-#include <asm/cpu_mcf.h>
+#include <asm/cpu_mf.h>
#include <asm/hwctrset.h>
#include <asm/debug.h>
+enum cpumf_ctr_set {
+ CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
+ CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
+ CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */
+ CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */
+ CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */
+
+ /* Maximum number of counter sets */
+ CPUMF_CTR_SET_MAX,
+};
+
+#define CPUMF_LCCTL_ENABLE_SHIFT 16
+#define CPUMF_LCCTL_ACTCTL_SHIFT 0
+
+static inline void ctr_set_enable(u64 *state, u64 ctrsets)
+{
+ *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
+}
+
+static inline void ctr_set_disable(u64 *state, u64 ctrsets)
+{
+ *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
+}
+
+static inline void ctr_set_start(u64 *state, u64 ctrsets)
+{
+ *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
+}
+
+static inline void ctr_set_stop(u64 *state, u64 ctrsets)
+{
+ *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
+}
+
+static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
+{
+ switch (set) {
+ case CPUMF_CTR_SET_BASIC:
+ return stcctm(BASIC, range, dest);
+ case CPUMF_CTR_SET_USER:
+ return stcctm(PROBLEM_STATE, range, dest);
+ case CPUMF_CTR_SET_CRYPTO:
+ return stcctm(CRYPTO_ACTIVITY, range, dest);
+ case CPUMF_CTR_SET_EXT:
+ return stcctm(EXTENDED, range, dest);
+ case CPUMF_CTR_SET_MT_DIAG:
+ return stcctm(MT_DIAG_CLEARING, range, dest);
+ case CPUMF_CTR_SET_MAX:
+ return 3;
+ }
+ return 3;
+}
+
+struct cpu_cf_events {
+ struct cpumf_ctr_info info;
+ atomic_t ctr_set[CPUMF_CTR_SET_MAX];
+ u64 state; /* For perf_event_open SVC */
+ u64 dev_state; /* For /dev/hwctr */
+ unsigned int flags;
+ size_t used; /* Bytes used in data */
+ size_t usedss; /* Bytes used in start/stop */
+ unsigned char start[PAGE_SIZE]; /* Counter set at event add */
+ unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */
+ unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */
+ unsigned int sets; /* # Counter set saved in memory */
+};
+
+/* Per-CPU event structure for the counter facility */
+static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events);
+
static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */
static debug_info_t *cf_dbg;
@@ -112,6 +183,53 @@ static void cfdiag_trailer(struct cf_trailer_entry *te)
te->timestamp = get_tod_clock_fast();
}
+/*
+ * Return the maximum possible counter set size (in number of 8 byte counters)
+ * depending on type and model number.
+ */
+static size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
+ struct cpumf_ctr_info *info)
+{
+ size_t ctrset_size = 0;
+
+ switch (ctrset) {
+ case CPUMF_CTR_SET_BASIC:
+ if (info->cfvn >= 1)
+ ctrset_size = 6;
+ break;
+ case CPUMF_CTR_SET_USER:
+ if (info->cfvn == 1)
+ ctrset_size = 6;
+ else if (info->cfvn >= 3)
+ ctrset_size = 2;
+ break;
+ case CPUMF_CTR_SET_CRYPTO:
+ if (info->csvn >= 1 && info->csvn <= 5)
+ ctrset_size = 16;
+ else if (info->csvn == 6 || info->csvn == 7)
+ ctrset_size = 20;
+ break;
+ case CPUMF_CTR_SET_EXT:
+ if (info->csvn == 1)
+ ctrset_size = 32;
+ else if (info->csvn == 2)
+ ctrset_size = 48;
+ else if (info->csvn >= 3 && info->csvn <= 5)
+ ctrset_size = 128;
+ else if (info->csvn == 6 || info->csvn == 7)
+ ctrset_size = 160;
+ break;
+ case CPUMF_CTR_SET_MT_DIAG:
+ if (info->csvn > 3)
+ ctrset_size = 48;
+ break;
+ case CPUMF_CTR_SET_MAX:
+ break;
+ }
+
+ return ctrset_size;
+}
+
/* Read a counter set. The counter set number determines the counter set and
* the CPUM-CF first and second version number determine the number of
* available counters in each counter set.
@@ -388,6 +506,47 @@ static void cpumf_pmu_disable(struct pmu *pmu)
cpuhw->flags &= ~PMU_F_ENABLED;
}
+#define PMC_INIT 0UL
+#define PMC_RELEASE 1UL
+
+static void cpum_cf_setup_cpu(void *flags)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+
+ switch ((unsigned long)flags) {
+ case PMC_INIT:
+ memset(&cpuhw->info, 0, sizeof(cpuhw->info));
+ qctri(&cpuhw->info);
+ cpuhw->flags |= PMU_F_RESERVED;
+ break;
+
+ case PMC_RELEASE:
+ cpuhw->flags &= ~PMU_F_RESERVED;
+ break;
+ }
+
+ /* Disable CPU counter sets */
+ lcctl(0);
+ debug_sprintf_event(cf_dbg, 5, "%s flags %#x flags %#x state %#llx\n",
+ __func__, *(int *)flags, cpuhw->flags,
+ cpuhw->state);
+}
+
+/* Initialize the CPU-measurement counter facility */
+static int __kernel_cpumcf_begin(void)
+{
+ on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_INIT, 1);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+ return 0;
+}
+
+/* Release the CPU-measurement counter facility */
+static void __kernel_cpumcf_end(void)
+{
+ on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_RELEASE, 1);
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+}
/* Number of perf events counting hardware events */
static atomic_t num_events = ATOMIC_INIT(0);
@@ -397,12 +556,10 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
/* Release the PMU if event is the last perf event */
static void hw_perf_event_destroy(struct perf_event *event)
{
- if (!atomic_add_unless(&num_events, -1, 1)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_dec_return(&num_events) == 0)
- __kernel_cpumcf_end();
- mutex_unlock(&pmc_reserve_mutex);
- }
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_events) == 0)
+ __kernel_cpumcf_end();
+ mutex_unlock(&pmc_reserve_mutex);
}
/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
@@ -434,6 +591,12 @@ static void cpumf_hw_inuse(void)
mutex_unlock(&pmc_reserve_mutex);
}
+static int is_userspace_event(u64 ev)
+{
+ return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev;
+}
+
static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
{
struct perf_event_attr *attr = &event->attr;
@@ -456,19 +619,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
if (is_sampling_event(event)) /* No sampling support */
return -ENOENT;
ev = attr->config;
- /* Count user space (problem-state) only */
if (!attr->exclude_user && attr->exclude_kernel) {
- if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
- return -EOPNOTSUPP;
- ev = cpumf_generic_events_user[ev];
-
- /* No support for kernel space counters only */
+ /*
+ * Count user space (problem-state) only
+ * Handle events 32 and 33 as 0:u and 1:u
+ */
+ if (!is_userspace_event(ev)) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_user[ev];
+ }
} else if (!attr->exclude_kernel && attr->exclude_user) {
+ /* No support for kernel space counters only */
return -EOPNOTSUPP;
- } else { /* Count user and kernel space */
- if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
- return -EOPNOTSUPP;
- ev = cpumf_generic_events_basic[ev];
+ } else {
+ /* Count user and kernel space, incl. events 32 + 33 */
+ if (!is_userspace_event(ev)) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_basic[ev];
+ }
}
break;
@@ -761,31 +931,120 @@ static struct pmu cpumf_pmu = {
.read = cpumf_pmu_read,
};
+static int cpum_cf_setup(unsigned int cpu, unsigned long flags)
+{
+ local_irq_disable();
+ cpum_cf_setup_cpu((void *)flags);
+ local_irq_enable();
+ return 0;
+}
+
+static int cfset_online_cpu(unsigned int cpu);
+static int cpum_cf_online_cpu(unsigned int cpu)
+{
+ debug_sprintf_event(cf_dbg, 4, "%s cpu %d in_irq %ld\n", __func__,
+ cpu, in_interrupt());
+ cpum_cf_setup(cpu, PMC_INIT);
+ return cfset_online_cpu(cpu);
+}
+
+static int cfset_offline_cpu(unsigned int cpu);
+static int cpum_cf_offline_cpu(unsigned int cpu)
+{
+ debug_sprintf_event(cf_dbg, 4, "%s cpu %d\n", __func__, cpu);
+ cfset_offline_cpu(cpu);
+ return cpum_cf_setup(cpu, PMC_RELEASE);
+}
+
+/* Return true if store counter set multiple instruction is available */
+static inline int stccm_avail(void)
+{
+ return test_facility(142);
+}
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_cf_events *cpuhw;
+
+ if (!(alert & CPU_MF_INT_CF_MASK))
+ return;
+
+ inc_irq_stat(IRQEXT_CMC);
+ cpuhw = this_cpu_ptr(&cpu_cf_events);
+
+ /*
+ * Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case.
+ */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* counter authorization change alert */
+ if (alert & CPU_MF_INT_CF_CACA)
+ qctri(&cpuhw->info);
+
+ /* loss of counter data alert */
+ if (alert & CPU_MF_INT_CF_LCDA)
+ pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+
+ /* loss of MT counter data alert */
+ if (alert & CPU_MF_INT_CF_MTDA)
+ pr_warn("CPU[%i] MT counter data was lost\n",
+ smp_processor_id());
+}
+
static int cfset_init(void);
static int __init cpumf_pmu_init(void)
{
int rc;
- if (!kernel_cpumcf_avail())
+ if (!cpum_cf_avail())
return -ENODEV;
+ /*
+ * Clear bit 15 of cr0 to unauthorize problem-state to
+ * extract measurement counters
+ */
+ ctl_clear_bit(0, 48);
+
+ /* register handler for measurement-alert interruptions */
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (rc) {
+ pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc);
+ return rc;
+ }
+
/* Setup s390dbf facility */
cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
if (!cf_dbg) {
pr_err("Registration of s390dbf(cpum_cf) failed\n");
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto out1;
}
debug_register_view(cf_dbg, &debug_sprintf_view);
cpumf_pmu.attr_groups = cpumf_cf_event_group();
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
if (rc) {
- debug_unregister_view(cf_dbg, &debug_sprintf_view);
- debug_unregister(cf_dbg);
pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+ goto out2;
} else if (stccm_avail()) { /* Setup counter set device */
cfset_init();
}
+
+ rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
+ "perf/s390/cf:online",
+ cpum_cf_online_cpu, cpum_cf_offline_cpu);
+ return rc;
+
+out2:
+ debug_unregister_view(cf_dbg, &debug_sprintf_view);
+ debug_unregister(cf_dbg);
+out1:
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert);
return rc;
}
@@ -1003,7 +1262,6 @@ static int cfset_all_start(struct cfset_request *req)
return rc;
}
-
/* Return the maximum required space for all possible CPUs in case one
* CPU will be onlined during the START, READ, STOP cycles.
* To find out the size of the counter sets, any one CPU will do. They
@@ -1266,7 +1524,7 @@ static struct miscdevice cfset_dev = {
/* Hotplug add of a CPU. Scan through all active processes and add
* that CPU to the list of CPUs supplied with ioctl(..., START, ...).
*/
-int cfset_online_cpu(unsigned int cpu)
+static int cfset_online_cpu(unsigned int cpu)
{
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
@@ -1286,7 +1544,7 @@ int cfset_online_cpu(unsigned int cpu)
/* Hotplug remove of a CPU. Scan through all active processes and clear
* that CPU from the list of CPUs supplied with ioctl(..., START, ...).
*/
-int cfset_offline_cpu(unsigned int cpu)
+static int cfset_offline_cpu(unsigned int cpu)
{
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c
deleted file mode 100644
index 8ee48672233f..000000000000
--- a/arch/s390/kernel/perf_cpum_cf_common.c
+++ /dev/null
@@ -1,233 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CPU-Measurement Counter Facility Support - Common Layer
- *
- * Copyright IBM Corp. 2019
- * Author(s): Hendrik Brueckner <[email protected]>
- */
-#define KMSG_COMPONENT "cpum_cf_common"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/kernel_stat.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <asm/ctl_reg.h>
-#include <asm/irq.h>
-#include <asm/cpu_mcf.h>
-
-/* Per-CPU event structure for the counter facility */
-DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = {
- .ctr_set = {
- [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0),
- },
- .alert = ATOMIC64_INIT(0),
- .state = 0,
- .dev_state = 0,
- .flags = 0,
- .used = 0,
- .usedss = 0,
- .sets = 0
-};
-/* Indicator whether the CPU-Measurement Counter Facility Support is ready */
-static bool cpum_cf_initalized;
-
-/* CPU-measurement alerts for the counter facility */
-static void cpumf_measurement_alert(struct ext_code ext_code,
- unsigned int alert, unsigned long unused)
-{
- struct cpu_cf_events *cpuhw;
-
- if (!(alert & CPU_MF_INT_CF_MASK))
- return;
-
- inc_irq_stat(IRQEXT_CMC);
- cpuhw = this_cpu_ptr(&cpu_cf_events);
-
- /* Measurement alerts are shared and might happen when the PMU
- * is not reserved. Ignore these alerts in this case. */
- if (!(cpuhw->flags & PMU_F_RESERVED))
- return;
-
- /* counter authorization change alert */
- if (alert & CPU_MF_INT_CF_CACA)
- qctri(&cpuhw->info);
-
- /* loss of counter data alert */
- if (alert & CPU_MF_INT_CF_LCDA)
- pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
-
- /* loss of MT counter data alert */
- if (alert & CPU_MF_INT_CF_MTDA)
- pr_warn("CPU[%i] MT counter data was lost\n",
- smp_processor_id());
-
- /* store alert for special handling by in-kernel users */
- atomic64_or(alert, &cpuhw->alert);
-}
-
-#define PMC_INIT 0
-#define PMC_RELEASE 1
-static void cpum_cf_setup_cpu(void *flags)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
-
- switch (*((int *) flags)) {
- case PMC_INIT:
- memset(&cpuhw->info, 0, sizeof(cpuhw->info));
- qctri(&cpuhw->info);
- cpuhw->flags |= PMU_F_RESERVED;
- break;
-
- case PMC_RELEASE:
- cpuhw->flags &= ~PMU_F_RESERVED;
- break;
- }
-
- /* Disable CPU counter sets */
- lcctl(0);
-}
-
-bool kernel_cpumcf_avail(void)
-{
- return cpum_cf_initalized;
-}
-EXPORT_SYMBOL(kernel_cpumcf_avail);
-
-/* Initialize the CPU-measurement counter facility */
-int __kernel_cpumcf_begin(void)
-{
- int flags = PMC_INIT;
-
- on_each_cpu(cpum_cf_setup_cpu, &flags, 1);
- irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
- return 0;
-}
-EXPORT_SYMBOL(__kernel_cpumcf_begin);
-
-/* Obtain the CPU-measurement alerts for the counter facility */
-unsigned long kernel_cpumcf_alert(int clear)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- unsigned long alert;
-
- alert = atomic64_read(&cpuhw->alert);
- if (clear)
- atomic64_set(&cpuhw->alert, 0);
-
- return alert;
-}
-EXPORT_SYMBOL(kernel_cpumcf_alert);
-
-/* Release the CPU-measurement counter facility */
-void __kernel_cpumcf_end(void)
-{
- int flags = PMC_RELEASE;
-
- on_each_cpu(cpum_cf_setup_cpu, &flags, 1);
- irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-}
-EXPORT_SYMBOL(__kernel_cpumcf_end);
-
-static int cpum_cf_setup(unsigned int cpu, int flags)
-{
- local_irq_disable();
- cpum_cf_setup_cpu(&flags);
- local_irq_enable();
- return 0;
-}
-
-static int cpum_cf_online_cpu(unsigned int cpu)
-{
- cpum_cf_setup(cpu, PMC_INIT);
- return cfset_online_cpu(cpu);
-}
-
-static int cpum_cf_offline_cpu(unsigned int cpu)
-{
- cfset_offline_cpu(cpu);
- return cpum_cf_setup(cpu, PMC_RELEASE);
-}
-
-/* Return the maximum possible counter set size (in number of 8 byte counters)
- * depending on type and model number.
- */
-size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
- struct cpumf_ctr_info *info)
-{
- size_t ctrset_size = 0;
-
- switch (ctrset) {
- case CPUMF_CTR_SET_BASIC:
- if (info->cfvn >= 1)
- ctrset_size = 6;
- break;
- case CPUMF_CTR_SET_USER:
- if (info->cfvn == 1)
- ctrset_size = 6;
- else if (info->cfvn >= 3)
- ctrset_size = 2;
- break;
- case CPUMF_CTR_SET_CRYPTO:
- if (info->csvn >= 1 && info->csvn <= 5)
- ctrset_size = 16;
- else if (info->csvn == 6 || info->csvn == 7)
- ctrset_size = 20;
- break;
- case CPUMF_CTR_SET_EXT:
- if (info->csvn == 1)
- ctrset_size = 32;
- else if (info->csvn == 2)
- ctrset_size = 48;
- else if (info->csvn >= 3 && info->csvn <= 5)
- ctrset_size = 128;
- else if (info->csvn == 6 || info->csvn == 7)
- ctrset_size = 160;
- break;
- case CPUMF_CTR_SET_MT_DIAG:
- if (info->csvn > 3)
- ctrset_size = 48;
- break;
- case CPUMF_CTR_SET_MAX:
- break;
- }
-
- return ctrset_size;
-}
-
-static int __init cpum_cf_init(void)
-{
- int rc;
-
- if (!cpum_cf_avail())
- return -ENODEV;
-
- /* clear bit 15 of cr0 to unauthorize problem-state to
- * extract measurement counters */
- ctl_clear_bit(0, 48);
-
- /* register handler for measurement-alert interruptions */
- rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
- cpumf_measurement_alert);
- if (rc) {
- pr_err("Registering for CPU-measurement alerts "
- "failed with rc=%i\n", rc);
- return rc;
- }
-
- rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
- "perf/s390/cf:online",
- cpum_cf_online_cpu, cpum_cf_offline_cpu);
- if (!rc)
- cpum_cf_initalized = true;
-
- return rc;
-}
-early_initcall(cpum_cf_init);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index ead6eea48be8..79904a839fb9 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -22,6 +22,7 @@
#include <asm/irq.h>
#include <asm/debug.h>
#include <asm/timex.h>
+#include <asm-generic/io.h>
/* Minimum number of sample-data-block-tables:
* At least one table is required for the sampling buffer structure.
@@ -99,6 +100,57 @@ static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
/* Debug feature */
static debug_info_t *sfdbg;
+/* Sampling control helper functions */
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+ unsigned long freq)
+{
+ return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+ unsigned long rate)
+{
+ return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+/* Return TOD timestamp contained in an trailer entry */
+static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+ /* TOD in STCKE format */
+ if (te->header.t)
+ return *((unsigned long long *)&te->timestamp[1]);
+
+ /* TOD in STCK format */
+ return *((unsigned long long *)&te->timestamp[0]);
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v)
+{
+ void *ret;
+
+ ret = (void *)v;
+ ret += PAGE_SIZE;
+ ret -= sizeof(struct hws_trailer_entry);
+
+ return ret;
+}
+
+/*
+ * Return true if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt
+ */
+static inline int is_link_entry(unsigned long *s)
+{
+ return *s & 0x1UL ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+ return phys_to_virt(*s & ~0x1UL);
+}
+
/*
* sf_disable() - Switch off sampling facility
*/
@@ -150,7 +202,7 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
} else {
/* Process SDB pointer */
if (*curr) {
- free_page(*curr);
+ free_page((unsigned long)phys_to_virt(*curr));
curr++;
}
}
@@ -170,11 +222,11 @@ static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
sdb = get_zeroed_page(gfp_flags);
if (!sdb)
return -ENOMEM;
- te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+ te = trailer_entry_ptr(sdb);
te->header.a = 1;
/* Link SDB into the sample-data-block-table */
- *sdbt = sdb;
+ *sdbt = virt_to_phys((void *)sdb);
return 0;
}
@@ -233,7 +285,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
}
sfb->num_sdbt++;
/* Link current page to tail of chain */
- *tail = (unsigned long)(void *) new + 1;
+ *tail = virt_to_phys((void *)new) + 1;
tail_prev = tail;
tail = new;
}
@@ -263,7 +315,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
}
/* Link sampling buffer to its origin */
- *tail = (unsigned long) sfb->sdbt + 1;
+ *tail = virt_to_phys(sfb->sdbt) + 1;
sfb->tail = tail;
debug_sprintf_event(sfdbg, 4, "%s: new buffer"
@@ -301,7 +353,7 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
* realloc_sampling_buffer() invocation.
*/
sfb->tail = sfb->sdbt;
- *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
+ *sfb->tail = virt_to_phys((void *)sfb->sdbt) + 1;
/* Allocate requested number of sample-data-blocks */
rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
@@ -557,9 +609,6 @@ static void setup_pmc_cpu(void *flags)
if (err)
pr_err("Switching off the sampling facility failed "
"with rc %i\n", err);
- debug_sprintf_event(sfdbg, 5,
- "%s: initialized: cpuhw %p\n", __func__,
- cpusf);
break;
case PMC_RELEASE:
cpusf->flags &= ~PMU_F_RESERVED;
@@ -569,9 +618,6 @@ static void setup_pmc_cpu(void *flags)
"with rc %i\n", err);
} else
deallocate_buffers(cpusf);
- debug_sprintf_event(sfdbg, 5,
- "%s: released: cpuhw %p\n", __func__,
- cpusf);
break;
}
if (err)
@@ -1177,8 +1223,8 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
struct hws_trailer_entry *te;
struct hws_basic_entry *sample;
- te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
- sample = (struct hws_basic_entry *) *sdbt;
+ te = trailer_entry_ptr((unsigned long)sdbt);
+ sample = (struct hws_basic_entry *)sdbt;
while ((unsigned long *) sample < (unsigned long *) te) {
/* Check for an empty sample */
if (!sample->def || sample->LS)
@@ -1259,7 +1305,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
union hws_trailer_header old, prev, new;
struct hw_perf_event *hwc = &event->hw;
struct hws_trailer_entry *te;
- unsigned long *sdbt;
+ unsigned long *sdbt, sdb;
int done;
/*
@@ -1276,7 +1322,8 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
done = event_overflow = sampl_overflow = num_sdb = 0;
while (!done) {
/* Get the trailer entry of the sample-data-block */
- te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+ sdb = (unsigned long)phys_to_virt(*sdbt);
+ te = trailer_entry_ptr(sdb);
/* Leave loop if no more work to do (block full indicator) */
if (!te->header.f) {
@@ -1294,16 +1341,17 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
sampl_overflow += te->header.overflow;
/* Timestamps are valid for full sample-data-blocks only */
- debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
+ debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx/%#lx "
"overflow %llu timestamp %#llx\n",
- __func__, (unsigned long)sdbt, te->header.overflow,
+ __func__, sdb, (unsigned long)sdbt,
+ te->header.overflow,
(te->header.f) ? trailer_timestamp(te) : 0ULL);
/* Collect all samples from a single sample-data-block and
* flag if an (perf) event overflow happened. If so, the PMU
* is stopped and remaining samples will be discarded.
*/
- hw_collect_samples(event, sdbt, &event_overflow);
+ hw_collect_samples(event, (unsigned long *)sdb, &event_overflow);
num_sdb++;
/* Reset trailer (using compare-double-and-swap) */
@@ -1361,10 +1409,26 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
OVERFLOW_REG(hwc), num_sdb);
}
-#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
-#define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0)
-#define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark)
-#define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark)
+static inline unsigned long aux_sdb_index(struct aux_buffer *aux,
+ unsigned long i)
+{
+ return i % aux->sfb.num_sdb;
+}
+
+static inline unsigned long aux_sdb_num(unsigned long start, unsigned long end)
+{
+ return end >= start ? end - start + 1 : 0;
+}
+
+static inline unsigned long aux_sdb_num_alert(struct aux_buffer *aux)
+{
+ return aux_sdb_num(aux->head, aux->alert_mark);
+}
+
+static inline unsigned long aux_sdb_num_empty(struct aux_buffer *aux)
+{
+ return aux_sdb_num(aux->head, aux->empty_mark);
+}
/*
* Get trailer entry by index of SDB.
@@ -1374,9 +1438,9 @@ static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux,
{
unsigned long sdb;
- index = AUX_SDB_INDEX(aux, index);
+ index = aux_sdb_index(aux, index);
sdb = aux->sdb_index[index];
- return (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+ return trailer_entry_ptr(sdb);
}
/*
@@ -1398,7 +1462,7 @@ static void aux_output_end(struct perf_output_handle *handle)
if (!aux)
return;
- range_scan = AUX_SDB_NUM_ALERT(aux);
+ range_scan = aux_sdb_num_alert(aux);
for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
if (!te->header.f)
@@ -1428,9 +1492,7 @@ static int aux_output_begin(struct perf_output_handle *handle,
struct aux_buffer *aux,
struct cpu_hw_sf *cpuhw)
{
- unsigned long range;
- unsigned long i, range_scan, idx;
- unsigned long head, base, offset;
+ unsigned long range, i, range_scan, idx, head, base, offset;
struct hws_trailer_entry *te;
if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
@@ -1449,8 +1511,8 @@ static int aux_output_begin(struct perf_output_handle *handle,
"%s: range %ld head %ld alert %ld empty %ld\n",
__func__, range, aux->head, aux->alert_mark,
aux->empty_mark);
- if (range > AUX_SDB_NUM_EMPTY(aux)) {
- range_scan = range - AUX_SDB_NUM_EMPTY(aux);
+ if (range > aux_sdb_num_empty(aux)) {
+ range_scan = range - aux_sdb_num_empty(aux);
idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
@@ -1468,11 +1530,11 @@ static int aux_output_begin(struct perf_output_handle *handle,
te->header.a = 1;
/* Reset hardware buffer head */
- head = AUX_SDB_INDEX(aux, aux->head);
+ head = aux_sdb_index(aux, aux->head);
base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE];
offset = head % CPUM_SF_SDB_PER_TABLE;
- cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
- cpuhw->lsctl.dear = aux->sdb_index[head];
+ cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long);
+ cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]);
debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
"index %ld tear %#lx dear %#lx\n", __func__,
@@ -1550,7 +1612,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
"empty %ld\n", __func__, range, aux->head,
aux->alert_mark, aux->empty_mark);
- if (range <= AUX_SDB_NUM_EMPTY(aux))
+ if (range <= aux_sdb_num_empty(aux))
/*
* No need to scan. All SDBs in range are marked as empty.
* Just set alert indicator. Should check race with hardware
@@ -1571,7 +1633,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
* Start scanning from one SDB behind empty_mark. If the new alert
* indicator fall into this range, set it.
*/
- range_scan = range - AUX_SDB_NUM_EMPTY(aux);
+ range_scan = range - aux_sdb_num_empty(aux);
idx_old = idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
@@ -1618,7 +1680,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
return;
/* Inform user space new data arrived */
- size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+ size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
size >> PAGE_SHIFT);
perf_aux_output_end(handle, size);
@@ -1660,7 +1722,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
"overflow %lld\n", __func__,
aux->head, range, overflow);
} else {
- size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+ size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
"already full, try another\n",
@@ -1702,7 +1764,7 @@ static void aux_sdb_init(unsigned long sdb)
{
struct hws_trailer_entry *te;
- te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+ te = trailer_entry_ptr(sdb);
/* Save clock base */
te->clock_base = 1;
@@ -1782,18 +1844,18 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
goto no_sdbt;
aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new;
/* Link current page to tail of chain */
- *tail = (unsigned long)(void *) new + 1;
+ *tail = virt_to_phys(new) + 1;
tail = new;
}
/* Tail is the entry in a SDBT */
- *tail = (unsigned long)pages[i];
+ *tail = virt_to_phys(pages[i]);
aux->sdb_index[i] = (unsigned long)pages[i];
aux_sdb_init((unsigned long)pages[i]);
}
sfb->num_sdb = nr_pages;
/* Link the last entry in the SDBT to the first SDBT */
- *tail = (unsigned long) sfb->sdbt + 1;
+ *tail = virt_to_phys(sfb->sdbt) + 1;
sfb->tail = tail;
/*
@@ -1933,7 +1995,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
cpuhw->lsctl.h = 1;
cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
if (!SAMPL_DIAG_MODE(&event->hw)) {
- cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
+ cpuhw->lsctl.tear = virt_to_phys(cpuhw->sfb.sdbt);
cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt;
}
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index 555597222bad..fcea307d7529 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -16,8 +16,8 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/perf_event.h>
-#include <asm/cpu_mcf.h>
#include <asm/ctl_reg.h>
#include <asm/pai.h>
#include <asm/debug.h>
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 3f5d2db0b854..67df64ef4839 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -147,8 +147,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (unlikely(args->fn)) {
/* kernel thread */
memset(&frame->childregs, 0, sizeof(struct pt_regs));
- frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO |
+ PSW_MASK_EXT | PSW_MASK_MCHECK;
frame->childregs.psw.addr =
(unsigned long)__ret_from_fork;
frame->childregs.gprs[9] = (unsigned long)args->fn;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 53e0209229f8..cf9659e13f03 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -990,7 +990,7 @@ static int s390_vxrs_low_get(struct task_struct *target,
if (target == current)
save_fpu_regs();
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
+ vxrs[i] = target->thread.fpu.vxrs[i].low;
return membuf_write(&to, vxrs, sizeof(vxrs));
}
@@ -1008,12 +1008,12 @@ static int s390_vxrs_low_set(struct task_struct *target,
save_fpu_regs();
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
+ vxrs[i] = target->thread.fpu.vxrs[i].low;
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
if (rc == 0)
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
+ target->thread.fpu.vxrs[i].low = vxrs[i];
return rc;
}
diff --git a/arch/s390/kernel/rethook.c b/arch/s390/kernel/rethook.c
new file mode 100644
index 000000000000..af10e6bdd34e
--- /dev/null
+++ b/arch/s390/kernel/rethook.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/rethook.h>
+#include <linux/kprobes.h>
+#include "rethook.h"
+
+void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount)
+{
+ rh->ret_addr = regs->gprs[14];
+ rh->frame = regs->gprs[15];
+
+ /* Replace the return addr with trampoline addr */
+ regs->gprs[14] = (unsigned long)&arch_rethook_trampoline;
+}
+NOKPROBE_SYMBOL(arch_rethook_prepare);
+
+void arch_rethook_fixup_return(struct pt_regs *regs,
+ unsigned long correct_ret_addr)
+{
+ /* Replace fake return address with real one. */
+ regs->gprs[14] = correct_ret_addr;
+}
+NOKPROBE_SYMBOL(arch_rethook_fixup_return);
+
+/*
+ * Called from arch_rethook_trampoline
+ */
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs)
+{
+ return rethook_trampoline_handler(regs, regs->gprs[15]);
+}
+NOKPROBE_SYMBOL(arch_rethook_trampoline_callback);
+
+/* assembler function that handles the rethook must not be probed itself */
+NOKPROBE_SYMBOL(arch_rethook_trampoline);
diff --git a/arch/s390/kernel/rethook.h b/arch/s390/kernel/rethook.h
new file mode 100644
index 000000000000..32f069eed3f3
--- /dev/null
+++ b/arch/s390/kernel/rethook.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __S390_RETHOOK_H
+#define __S390_RETHOOK_H
+
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs);
+
+#endif
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 696c9e007a36..8ec5cdf9dadc 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -149,6 +149,9 @@ int __bootdata(noexec_disabled);
unsigned long __bootdata(ident_map_size);
struct mem_detect_info __bootdata(mem_detect);
struct initrd_data __bootdata(initrd_data);
+unsigned long __bootdata(pgalloc_pos);
+unsigned long __bootdata(pgalloc_end);
+unsigned long __bootdata(pgalloc_low);
unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata(__amode31_base);
@@ -411,15 +414,10 @@ void __init arch_call_rest_init(void)
call_on_stack_noreturn(rest_init, stack);
}
-static void __init setup_lowcore_dat_off(void)
+static void __init setup_lowcore(void)
{
- unsigned long int_psw_mask = PSW_KERNEL_BITS;
- struct lowcore *abs_lc, *lc;
+ struct lowcore *lc, *abs_lc;
unsigned long mcck_stack;
- unsigned long flags;
-
- if (IS_ENABLED(CONFIG_KASAN))
- int_psw_mask |= PSW_MASK_DAT;
/*
* Setup lowcore for boot cpu
@@ -430,17 +428,17 @@ static void __init setup_lowcore_dat_off(void)
panic("%s: Failed to allocate %zu bytes align=%zx\n",
__func__, sizeof(*lc), sizeof(*lc));
- lc->restart_psw.mask = PSW_KERNEL_BITS;
- lc->restart_psw.addr = (unsigned long) restart_int_handler;
- lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
+ lc->restart_psw.addr = __pa(restart_int_handler);
+ lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->external_new_psw.addr = (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->svc_new_psw.addr = (unsigned long) system_call;
- lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
- lc->mcck_new_psw.mask = int_psw_mask;
+ lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
- lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->io_new_psw.addr = (unsigned long) io_int_handler;
lc->clock_comparator = clock_comparator_max;
lc->nodat_stack = ((unsigned long) &init_thread_union)
@@ -477,15 +475,7 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_fn = (unsigned long) do_restart;
lc->restart_data = 0;
lc->restart_source = -1U;
-
- abs_lc = get_abs_lowcore(&flags);
- abs_lc->restart_stack = lc->restart_stack;
- abs_lc->restart_fn = lc->restart_fn;
- abs_lc->restart_data = lc->restart_data;
- abs_lc->restart_source = lc->restart_source;
- abs_lc->restart_psw = lc->restart_psw;
- abs_lc->mcesad = lc->mcesad;
- put_abs_lowcore(abs_lc, flags);
+ __ctl_store(lc->cregs_save_area, 0, 15);
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
if (!mcck_stack)
@@ -499,34 +489,25 @@ static void __init setup_lowcore_dat_off(void)
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
lc->preempt_count = PREEMPT_DISABLED;
+ lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->user_asce = S390_lowcore.user_asce;
+
+ abs_lc = get_abs_lowcore();
+ abs_lc->restart_stack = lc->restart_stack;
+ abs_lc->restart_fn = lc->restart_fn;
+ abs_lc->restart_data = lc->restart_data;
+ abs_lc->restart_source = lc->restart_source;
+ abs_lc->restart_psw = lc->restart_psw;
+ abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+ memcpy(abs_lc->cregs_save_area, lc->cregs_save_area, sizeof(abs_lc->cregs_save_area));
+ abs_lc->program_new_psw = lc->program_new_psw;
+ abs_lc->mcesad = lc->mcesad;
+ put_abs_lowcore(abs_lc);
set_prefix(__pa(lc));
lowcore_ptr[0] = lc;
-}
-
-static void __init setup_lowcore_dat_on(void)
-{
- struct lowcore *abs_lc;
- unsigned long flags;
- int i;
-
- __ctl_clear_bit(0, 28);
- S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.mcck_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
- __ctl_set_bit(0, 28);
- __ctl_store(S390_lowcore.cregs_save_area, 0, 15);
- if (abs_lowcore_map(0, lowcore_ptr[0], true))
+ if (abs_lowcore_map(0, lowcore_ptr[0], false))
panic("Couldn't setup absolute lowcore");
- abs_lowcore_mapped = true;
- abs_lc = get_abs_lowcore(&flags);
- abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
- abs_lc->program_new_psw = S390_lowcore.program_new_psw;
- for (i = 0; i < 16; i++)
- abs_lc->cregs_save_area[i] = S390_lowcore.cregs_save_area[i];
- put_abs_lowcore(abs_lc, flags);
}
static struct resource code_resource = {
@@ -619,7 +600,6 @@ static void __init setup_resources(void)
static void __init setup_memory_end(void)
{
- memblock_remove(ident_map_size, PHYS_ADDR_MAX - ident_map_size);
max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
}
@@ -651,6 +631,14 @@ static struct notifier_block kdump_mem_nb = {
#endif
/*
+ * Reserve page tables created by decompressor
+ */
+static void __init reserve_pgtables(void)
+{
+ memblock_reserve(pgalloc_pos, pgalloc_end - pgalloc_pos);
+}
+
+/*
* Reserve memory for kdump kernel to be loaded with kexec
*/
static void __init reserve_crashkernel(void)
@@ -784,10 +772,10 @@ static void __init memblock_add_mem_detect_info(void)
get_mem_info_source(), mem_detect.info_source);
/* keep memblock lists close to the kernel */
memblock_set_bottom_up(true);
- for_each_mem_detect_block(i, &start, &end) {
+ for_each_mem_detect_usable_block(i, &start, &end)
memblock_add(start, end - start);
+ for_each_mem_detect_block(i, &start, &end)
memblock_physmem_add(start, end - start);
- }
memblock_set_bottom_up(false);
memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
}
@@ -1005,6 +993,7 @@ void __init setup_arch(char **cmdline_p)
setup_control_program_code();
/* Do some memory reservations *before* memory is added to memblock */
+ reserve_pgtables();
reserve_kernel();
reserve_initrd();
reserve_certificate_list();
@@ -1039,7 +1028,7 @@ void __init setup_arch(char **cmdline_p)
#endif
setup_resources();
- setup_lowcore_dat_off();
+ setup_lowcore();
smp_fill_possible_mask();
cpu_detect_mhz_feature();
cpu_init();
@@ -1051,15 +1040,14 @@ void __init setup_arch(char **cmdline_p)
static_branch_enable(&cpu_has_bear);
/*
- * Create kernel page tables and switch to virtual addressing.
+ * Create kernel page tables.
*/
paging_init();
- memcpy_real_init();
+
/*
* After paging_init created the kernel page table, the new PSWs
* in lowcore can now run with DAT enabled.
*/
- setup_lowcore_dat_on();
#ifdef CONFIG_CRASH_DUMP
smp_save_dump_ipl_cpu();
#endif
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 38258f817048..d63557d3868c 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -184,7 +184,7 @@ static int save_sigregs_ext(struct pt_regs *regs,
/* Save vector registers to signal stack */
if (MACHINE_HAS_VX) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
+ vxrs[i] = current->thread.fpu.vxrs[i].low;
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
sizeof(sregs_ext->vxrs_low)) ||
__copy_to_user(&sregs_ext->vxrs_high,
@@ -210,7 +210,7 @@ static int restore_sigregs_ext(struct pt_regs *regs,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
+ current->thread.fpu.vxrs[i].low = vxrs[i];
}
return 0;
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 0031325ce4bc..23c427284773 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -323,11 +323,10 @@ static void pcpu_delegate(struct pcpu *pcpu,
{
struct lowcore *lc, *abs_lc;
unsigned int source_cpu;
- unsigned long flags;
lc = lowcore_ptr[pcpu - pcpu_devices];
source_cpu = stap();
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+
if (pcpu->address == source_cpu) {
call_on_stack(2, stack, void, __pcpu_delegate,
pcpu_delegate_fn *, func, void *, data);
@@ -341,12 +340,12 @@ static void pcpu_delegate(struct pcpu *pcpu,
lc->restart_data = (unsigned long)data;
lc->restart_source = source_cpu;
} else {
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
abs_lc->restart_stack = stack;
abs_lc->restart_fn = (unsigned long)func;
abs_lc->restart_data = (unsigned long)data;
abs_lc->restart_source = source_cpu;
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
}
__bpon();
asm volatile(
@@ -488,7 +487,7 @@ void smp_send_stop(void)
int cpu;
/* Disable all interrupts/machine checks */
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+ __load_psw_mask(PSW_KERNEL_BITS);
trace_hardirqs_off();
debug_set_critical();
@@ -593,7 +592,6 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set)
{
struct ec_creg_mask_parms parms = { .cr = cr, };
struct lowcore *abs_lc;
- unsigned long flags;
u64 ctlreg;
if (set) {
@@ -604,11 +602,11 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set)
parms.andval = ~(1UL << bit);
}
spin_lock(&ctl_lock);
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
ctlreg = abs_lc->cregs_save_area[cr];
ctlreg = (ctlreg & parms.andval) | parms.orval;
abs_lc->cregs_save_area[cr] = ctlreg;
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
spin_unlock(&ctl_lock);
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 7ee455e8e3d5..0787010139f7 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -40,12 +40,12 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
if (!addr)
return -EINVAL;
-#ifdef CONFIG_KPROBES
+#ifdef CONFIG_RETHOOK
/*
- * Mark stacktraces with kretprobed functions on them
+ * Mark stacktraces with krethook functions on them
* as unreliable.
*/
- if (state.ip == (unsigned long)__kretprobe_trampoline)
+ if (state.ip == (unsigned long)arch_rethook_trampoline)
return -EINVAL;
#endif
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
index 2c8b14cc5556..e0f01ce251f5 100644
--- a/arch/s390/kernel/text_amode31.S
+++ b/arch/s390/kernel/text_amode31.S
@@ -63,6 +63,19 @@ ENTRY(_diag210_amode31)
ENDPROC(_diag210_amode31)
/*
+ * int diag8c(struct diag8c *addr, struct ccw_dev_id *devno, size_t len)
+*/
+ENTRY(_diag8c_amode31)
+ llgf %r3,0(%r3)
+ sam31
+ diag %r2,%r4,0x8c
+.Ldiag8c_ex:
+ sam64
+ lgfr %r2,%r3
+ BR_EX_AMODE31_r14
+ EX_TABLE_AMODE31(.Ldiag8c_ex, .Ldiag8c_ex)
+ENDPROC(_diag8c_amode31)
+/*
* int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode)
*/
ENTRY(_diag26c_amode31)
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 20262e3c0cff..b653ba8d51e6 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -216,6 +216,9 @@ SECTIONS
QUAD(__rela_dyn_start) /* rela_dyn_start */
QUAD(__rela_dyn_end) /* rela_dyn_end */
QUAD(_eamode31 - _samode31) /* amode31_size */
+ QUAD(init_mm)
+ QUAD(swapper_pg_dir)
+ QUAD(invalid_pg_dir)
} :NONE
/* Debugging sections. */
@@ -227,5 +230,6 @@ SECTIONS
DISCARDS
/DISCARD/ : {
*(.eh_frame)
+ *(.interp)
}
}
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 5a053b393d5c..7231bf97b93a 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -47,7 +47,7 @@ static void print_backtrace(char *bt)
static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
unsigned long sp)
{
- int frame_count, prev_is_func2, seen_func2_func1, seen_kretprobe_trampoline;
+ int frame_count, prev_is_func2, seen_func2_func1, seen_arch_rethook_trampoline;
const int max_frames = 128;
struct unwind_state state;
size_t bt_pos = 0;
@@ -63,7 +63,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
frame_count = 0;
prev_is_func2 = 0;
seen_func2_func1 = 0;
- seen_kretprobe_trampoline = 0;
+ seen_arch_rethook_trampoline = 0;
unwind_for_each_frame(&state, task, regs, sp) {
unsigned long addr = unwind_get_return_address(&state);
char sym[KSYM_SYMBOL_LEN];
@@ -89,8 +89,8 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1"))
seen_func2_func1 = 1;
prev_is_func2 = str_has_prefix(sym, "unwindme_func2");
- if (str_has_prefix(sym, "__kretprobe_trampoline+0x0/"))
- seen_kretprobe_trampoline = 1;
+ if (str_has_prefix(sym, "arch_rethook_trampoline+0x0/"))
+ seen_arch_rethook_trampoline = 1;
}
/* Check the results. */
@@ -106,8 +106,8 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
kunit_err(current_test, "Maximum number of frames exceeded\n");
ret = -EINVAL;
}
- if (seen_kretprobe_trampoline) {
- kunit_err(current_test, "__kretprobe_trampoline+0x0 in unwinding results\n");
+ if (seen_arch_rethook_trampoline) {
+ kunit_err(current_test, "arch_rethook_trampoline+0x0 in unwinding results\n");
ret = -EINVAL;
}
if (ret || force_bt)
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 9953819d7959..ba5f80268878 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -33,10 +33,6 @@ enum address_markers_idx {
#endif
IDENTITY_AFTER_NR,
IDENTITY_AFTER_END_NR,
-#ifdef CONFIG_KASAN
- KASAN_SHADOW_START_NR,
- KASAN_SHADOW_END_NR,
-#endif
VMEMMAP_NR,
VMEMMAP_END_NR,
VMALLOC_NR,
@@ -47,6 +43,10 @@ enum address_markers_idx {
ABS_LOWCORE_END_NR,
MEMCPY_REAL_NR,
MEMCPY_REAL_END_NR,
+#ifdef CONFIG_KASAN
+ KASAN_SHADOW_START_NR,
+ KASAN_SHADOW_END_NR,
+#endif
};
static struct addr_marker address_markers[] = {
@@ -62,10 +62,6 @@ static struct addr_marker address_markers[] = {
#endif
[IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"},
[IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"},
-#ifdef CONFIG_KASAN
- [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
- [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
-#endif
[VMEMMAP_NR] = {0, "vmemmap Area Start"},
[VMEMMAP_END_NR] = {0, "vmemmap Area End"},
[VMALLOC_NR] = {0, "vmalloc Area Start"},
@@ -76,6 +72,10 @@ static struct addr_marker address_markers[] = {
[ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"},
[MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"},
[MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"},
+#ifdef CONFIG_KASAN
+ [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
+ [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
+#endif
{ -1, NULL }
};
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
index 1e4d2187541a..fe87291df95d 100644
--- a/arch/s390/mm/extable.c
+++ b/arch/s390/mm/extable.c
@@ -47,13 +47,16 @@ static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struc
return true;
}
-static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, struct pt_regs *regs)
+static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex,
+ bool pair, struct pt_regs *regs)
{
unsigned int reg_zero = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
regs->gprs[reg_err] = -EFAULT;
regs->gprs[reg_zero] = 0;
+ if (pair)
+ regs->gprs[reg_zero + 1] = 0;
regs->psw.addr = extable_fixup(ex);
return true;
}
@@ -75,7 +78,9 @@ bool fixup_exception(struct pt_regs *regs)
case EX_TYPE_UA_LOAD_MEM:
return ex_handler_ua_load_mem(ex, regs);
case EX_TYPE_UA_LOAD_REG:
- return ex_handler_ua_load_reg(ex, regs);
+ return ex_handler_ua_load_reg(ex, false, regs);
+ case EX_TYPE_UA_LOAD_REGPAIR:
+ return ex_handler_ua_load_reg(ex, true, regs);
}
panic("invalid exception table entry");
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 9649d9382e0a..a2632fd97d00 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -46,11 +46,15 @@
#define __SUBCODE_MASK 0x0600
#define __PF_RES_FIELD 0x8000000000000000ULL
-#define VM_FAULT_BADCONTEXT ((__force vm_fault_t) 0x010000)
-#define VM_FAULT_BADMAP ((__force vm_fault_t) 0x020000)
-#define VM_FAULT_BADACCESS ((__force vm_fault_t) 0x040000)
-#define VM_FAULT_SIGNAL ((__force vm_fault_t) 0x080000)
-#define VM_FAULT_PFAULT ((__force vm_fault_t) 0x100000)
+/*
+ * Allocate private vm_fault_reason from top. Please make sure it won't
+ * collide with vm_fault_reason.
+ */
+#define VM_FAULT_BADCONTEXT ((__force vm_fault_t)0x80000000)
+#define VM_FAULT_BADMAP ((__force vm_fault_t)0x40000000)
+#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x20000000)
+#define VM_FAULT_SIGNAL ((__force vm_fault_t)0x10000000)
+#define VM_FAULT_PFAULT ((__force vm_fault_t)0x8000000)
enum fault_type {
KERNEL_FAULT,
@@ -96,6 +100,20 @@ static enum fault_type get_fault_type(struct pt_regs *regs)
return KERNEL_FAULT;
}
+static unsigned long get_fault_address(struct pt_regs *regs)
+{
+ unsigned long trans_exc_code = regs->int_parm_long;
+
+ return trans_exc_code & __FAIL_ADDR_MASK;
+}
+
+static bool fault_is_write(struct pt_regs *regs)
+{
+ unsigned long trans_exc_code = regs->int_parm_long;
+
+ return (trans_exc_code & store_indication) == 0x400;
+}
+
static int bad_address(void *p)
{
unsigned long dummy;
@@ -228,15 +246,26 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
}
-static noinline void do_no_context(struct pt_regs *regs)
+static noinline void do_no_context(struct pt_regs *regs, vm_fault_t fault)
{
+ enum fault_type fault_type;
+ unsigned long address;
+ bool is_write;
+
if (fixup_exception(regs))
return;
+ fault_type = get_fault_type(regs);
+ if ((fault_type == KERNEL_FAULT) && (fault == VM_FAULT_BADCONTEXT)) {
+ address = get_fault_address(regs);
+ is_write = fault_is_write(regs);
+ if (kfence_handle_page_fault(address, is_write, regs))
+ return;
+ }
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
- if (get_fault_type(regs) == KERNEL_FAULT)
+ if (fault_type == KERNEL_FAULT)
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
" in virtual kernel address space\n");
else
@@ -255,7 +284,7 @@ static noinline void do_low_address(struct pt_regs *regs)
die (regs, "Low-address protection");
}
- do_no_context(regs);
+ do_no_context(regs, VM_FAULT_BADACCESS);
}
static noinline void do_sigbus(struct pt_regs *regs)
@@ -286,28 +315,28 @@ static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
fallthrough;
case VM_FAULT_BADCONTEXT:
case VM_FAULT_PFAULT:
- do_no_context(regs);
+ do_no_context(regs, fault);
break;
case VM_FAULT_SIGNAL:
if (!user_mode(regs))
- do_no_context(regs);
+ do_no_context(regs, fault);
break;
default: /* fault & VM_FAULT_ERROR */
if (fault & VM_FAULT_OOM) {
if (!user_mode(regs))
- do_no_context(regs);
+ do_no_context(regs, fault);
else
pagefault_out_of_memory();
} else if (fault & VM_FAULT_SIGSEGV) {
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
- do_no_context(regs);
+ do_no_context(regs, fault);
else
do_sigsegv(regs, SEGV_MAPERR);
} else if (fault & VM_FAULT_SIGBUS) {
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
- do_no_context(regs);
+ do_no_context(regs, fault);
else
do_sigbus(regs);
} else
@@ -334,7 +363,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
struct mm_struct *mm;
struct vm_area_struct *vma;
enum fault_type type;
- unsigned long trans_exc_code;
unsigned long address;
unsigned int flags;
vm_fault_t fault;
@@ -351,9 +379,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
return 0;
mm = tsk->mm;
- trans_exc_code = regs->int_parm_long;
- address = trans_exc_code & __FAIL_ADDR_MASK;
- is_write = (trans_exc_code & store_indication) == 0x400;
+ address = get_fault_address(regs);
+ is_write = fault_is_write(regs);
/*
* Verify that the fault happened in user space, that
@@ -364,8 +391,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
type = get_fault_type(regs);
switch (type) {
case KERNEL_FAULT:
- if (kfence_handle_page_fault(address, is_write, regs))
- return 0;
goto out;
case USER_FAULT:
case GMAP_FAULT:
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 30ab55f868f6..144447d5cb4c 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -52,9 +52,9 @@
#include <linux/virtio_config.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
-static pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
+pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
-unsigned long s390_invalid_asce;
+unsigned long __bootdata_preserved(s390_invalid_asce);
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
@@ -93,37 +93,8 @@ static void __init setup_zero_pages(void)
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
- unsigned long pgd_type, asce_bits;
- psw_t psw;
-
- s390_invalid_asce = (unsigned long)invalid_pg_dir;
- s390_invalid_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
- crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
- init_mm.pgd = swapper_pg_dir;
- if (VMALLOC_END > _REGION2_SIZE) {
- asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
- pgd_type = _REGION2_ENTRY_EMPTY;
- } else {
- asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
- pgd_type = _REGION3_ENTRY_EMPTY;
- }
- init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
- S390_lowcore.kernel_asce = init_mm.context.asce;
- S390_lowcore.user_asce = s390_invalid_asce;
- crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
- vmem_map_init();
- kasan_copy_shadow_mapping();
-
- /* enable virtual mapping in kernel mode */
- __ctl_load(S390_lowcore.kernel_asce, 1, 1);
- __ctl_load(S390_lowcore.user_asce, 7, 7);
- __ctl_load(S390_lowcore.kernel_asce, 13, 13);
- psw.mask = __extract_psw();
- psw_bits(psw).dat = 1;
- psw_bits(psw).as = PSW_BITS_AS_HOME;
- __load_psw_mask(psw.mask);
- kasan_free_early_identity();
+ vmem_map_init();
sparse_init();
zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 9f988d4582ed..ef89a5f26853 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kasan.h>
#include <linux/sched/task.h>
-#include <linux/memblock.h>
#include <linux/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/kasan.h>
@@ -15,16 +14,11 @@
static unsigned long segment_pos __initdata;
static unsigned long segment_low __initdata;
-static unsigned long pgalloc_pos __initdata;
-static unsigned long pgalloc_low __initdata;
-static unsigned long pgalloc_freeable __initdata;
static bool has_edat __initdata;
static bool has_nx __initdata;
#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
-static pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
-
static void __init kasan_early_panic(const char *reason)
{
sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n");
@@ -39,7 +33,7 @@ static void * __init kasan_early_alloc_segment(void)
if (segment_pos < segment_low)
kasan_early_panic("out of memory during initialisation\n");
- return (void *)segment_pos;
+ return __va(segment_pos);
}
static void * __init kasan_early_alloc_pages(unsigned int order)
@@ -49,7 +43,7 @@ static void * __init kasan_early_alloc_pages(unsigned int order)
if (pgalloc_pos < pgalloc_low)
kasan_early_panic("out of memory during initialisation\n");
- return (void *)pgalloc_pos;
+ return __va(pgalloc_pos);
}
static void * __init kasan_early_crst_alloc(unsigned long val)
@@ -81,35 +75,37 @@ static pte_t * __init kasan_early_pte_alloc(void)
}
enum populate_mode {
- POPULATE_ONE2ONE,
POPULATE_MAP,
POPULATE_ZERO_SHADOW,
POPULATE_SHALLOW
};
+
+static inline pgprot_t pgprot_clear_bit(pgprot_t pgprot, unsigned long bit)
+{
+ return __pgprot(pgprot_val(pgprot) & ~bit);
+}
+
static void __init kasan_early_pgtable_populate(unsigned long address,
unsigned long end,
enum populate_mode mode)
{
- unsigned long pgt_prot_zero, pgt_prot, sgt_prot;
+ pgprot_t pgt_prot_zero = PAGE_KERNEL_RO;
+ pgprot_t pgt_prot = PAGE_KERNEL;
+ pgprot_t sgt_prot = SEGMENT_KERNEL;
pgd_t *pg_dir;
p4d_t *p4_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
+ pmd_t pmd;
+ pte_t pte;
- pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO);
- if (!has_nx)
- pgt_prot_zero &= ~_PAGE_NOEXEC;
- pgt_prot = pgprot_val(PAGE_KERNEL);
- sgt_prot = pgprot_val(SEGMENT_KERNEL);
- if (!has_nx || mode == POPULATE_ONE2ONE) {
- pgt_prot &= ~_PAGE_NOEXEC;
- sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
+ if (!has_nx) {
+ pgt_prot_zero = pgprot_clear_bit(pgt_prot_zero, _PAGE_NOEXEC);
+ pgt_prot = pgprot_clear_bit(pgt_prot, _PAGE_NOEXEC);
+ sgt_prot = pgprot_clear_bit(sgt_prot, _SEGMENT_ENTRY_NOEXEC);
}
- /*
- * The first 1MB of 1:1 mapping is mapped with 4KB pages
- */
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
@@ -166,16 +162,13 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte);
address = (address + PMD_SIZE) & PMD_MASK;
continue;
- } else if (has_edat && address) {
- void *page;
-
- if (mode == POPULATE_ONE2ONE) {
- page = (void *)address;
- } else {
- page = kasan_early_alloc_segment();
- memset(page, 0, _SEGMENT_SIZE);
- }
- set_pmd(pm_dir, __pmd(__pa(page) | sgt_prot));
+ } else if (has_edat) {
+ void *page = kasan_early_alloc_segment();
+
+ memset(page, 0, _SEGMENT_SIZE);
+ pmd = __pmd(__pa(page));
+ pmd = set_pmd_bit(pmd, sgt_prot);
+ set_pmd(pm_dir, pmd);
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
@@ -192,18 +185,18 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
void *page;
switch (mode) {
- case POPULATE_ONE2ONE:
- page = (void *)address;
- set_pte(pt_dir, __pte(__pa(page) | pgt_prot));
- break;
case POPULATE_MAP:
page = kasan_early_alloc_pages(0);
memset(page, 0, PAGE_SIZE);
- set_pte(pt_dir, __pte(__pa(page) | pgt_prot));
+ pte = __pte(__pa(page));
+ pte = set_pte_bit(pte, pgt_prot);
+ set_pte(pt_dir, pte);
break;
case POPULATE_ZERO_SHADOW:
page = kasan_early_shadow_page;
- set_pte(pt_dir, __pte(__pa(page) | pgt_prot_zero));
+ pte = __pte(__pa(page));
+ pte = set_pte_bit(pte, pgt_prot_zero);
+ set_pte(pt_dir, pte);
break;
case POPULATE_SHALLOW:
/* should never happen */
@@ -214,29 +207,6 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
}
}
-static void __init kasan_set_pgd(pgd_t *pgd, unsigned long asce_type)
-{
- unsigned long asce_bits;
-
- asce_bits = asce_type | _ASCE_TABLE_LENGTH;
- S390_lowcore.kernel_asce = (__pa(pgd) & PAGE_MASK) | asce_bits;
- S390_lowcore.user_asce = S390_lowcore.kernel_asce;
-
- __ctl_load(S390_lowcore.kernel_asce, 1, 1);
- __ctl_load(S390_lowcore.kernel_asce, 7, 7);
- __ctl_load(S390_lowcore.kernel_asce, 13, 13);
-}
-
-static void __init kasan_enable_dat(void)
-{
- psw_t psw;
-
- psw.mask = __extract_psw();
- psw_bits(psw).dat = 1;
- psw_bits(psw).as = PSW_BITS_AS_HOME;
- __load_psw_mask(psw.mask);
-}
-
static void __init kasan_early_detect_facilities(void)
{
if (test_facility(8)) {
@@ -251,153 +221,81 @@ static void __init kasan_early_detect_facilities(void)
void __init kasan_early_init(void)
{
- unsigned long shadow_alloc_size;
- unsigned long initrd_end;
- unsigned long memsize;
- unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO);
- pte_t pte_z;
+ pte_t pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
+ unsigned long untracked_end = MODULES_VADDR;
+ unsigned long shadow_alloc_size;
+ unsigned long start, end;
+ int i;
kasan_early_detect_facilities();
if (!has_nx)
- pgt_prot &= ~_PAGE_NOEXEC;
- pte_z = __pte(__pa(kasan_early_shadow_page) | pgt_prot);
-
- memsize = get_mem_detect_end();
- if (!memsize)
- kasan_early_panic("cannot detect physical memory size\n");
- /*
- * Kasan currently supports standby memory but only if it follows
- * online memory (default allocation), i.e. no memory holes.
- * - memsize represents end of online memory
- * - ident_map_size represents online + standby and memory limits
- * accounted.
- * Kasan maps "memsize" right away.
- * [0, memsize] - as identity mapping
- * [__sha(0), __sha(memsize)] - shadow memory for identity mapping
- * The rest [memsize, ident_map_size] if memsize < ident_map_size
- * could be mapped/unmapped dynamically later during memory hotplug.
- */
- memsize = min(memsize, ident_map_size);
+ pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE));
- crst_table_init((unsigned long *)early_pg_dir, _REGION2_ENTRY_EMPTY);
/* init kasan zero shadow */
- crst_table_init((unsigned long *)kasan_early_shadow_p4d,
- p4d_val(p4d_z));
- crst_table_init((unsigned long *)kasan_early_shadow_pud,
- pud_val(pud_z));
- crst_table_init((unsigned long *)kasan_early_shadow_pmd,
- pmd_val(pmd_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
- shadow_alloc_size = memsize >> KASAN_SHADOW_SCALE_SHIFT;
- pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE);
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
- initrd_end =
- round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
- pgalloc_low = max(pgalloc_low, initrd_end);
- }
-
- if (pgalloc_low + shadow_alloc_size > memsize)
- kasan_early_panic("out of memory during initialisation\n");
-
if (has_edat) {
- segment_pos = round_down(memsize, _SEGMENT_SIZE);
+ shadow_alloc_size = get_mem_detect_usable_total() >> KASAN_SHADOW_SCALE_SHIFT;
+ segment_pos = round_down(pgalloc_pos, _SEGMENT_SIZE);
segment_low = segment_pos - shadow_alloc_size;
+ segment_low = round_down(segment_low, _SEGMENT_SIZE);
pgalloc_pos = segment_low;
- } else {
- pgalloc_pos = memsize;
}
- init_mm.pgd = early_pg_dir;
/*
* Current memory layout:
- * +- 0 -------------+ +- shadow start -+
- * | 1:1 ram mapping | /| 1/8 ram |
- * | | / | |
- * +- end of ram ----+ / +----------------+
- * | ... gap ... | / | |
- * | |/ | kasan |
- * +- shadow start --+ | zero |
- * | 1/8 addr space | | page |
- * +- shadow end -+ | mapping |
- * | ... gap ... |\ | (untracked) |
- * +- vmalloc area -+ \ | |
- * | vmalloc_size | \ | |
- * +- modules vaddr -+ \ +----------------+
- * | 2Gb | \| unmapped | allocated per module
- * +-----------------+ +- shadow end ---+
+ * +- 0 -------------+ +- shadow start -+
+ * |1:1 ident mapping| /|1/8 of ident map|
+ * | | / | |
+ * +-end of ident map+ / +----------------+
+ * | ... gap ... | / | kasan |
+ * | | / | zero page |
+ * +- vmalloc area -+ / | mapping |
+ * | vmalloc_size | / | (untracked) |
+ * +- modules vaddr -+ / +----------------+
+ * | 2Gb |/ | unmapped | allocated per module
+ * +- shadow start -+ +----------------+
+ * | 1/8 addr space | | zero pg mapping| (untracked)
+ * +- shadow end ----+---------+- shadow end ---+
*
* Current memory layout (KASAN_VMALLOC):
- * +- 0 -------------+ +- shadow start -+
- * | 1:1 ram mapping | /| 1/8 ram |
- * | | / | |
- * +- end of ram ----+ / +----------------+
- * | ... gap ... | / | kasan |
- * | |/ | zero |
- * +- shadow start --+ | page |
- * | 1/8 addr space | | mapping |
- * +- shadow end -+ | (untracked) |
- * | ... gap ... |\ | |
- * +- vmalloc area -+ \ +- vmalloc area -+
- * | vmalloc_size | \ |shallow populate|
- * +- modules vaddr -+ \ +- modules area -+
- * | 2Gb | \|shallow populate|
- * +-----------------+ +- shadow end ---+
+ * +- 0 -------------+ +- shadow start -+
+ * |1:1 ident mapping| /|1/8 of ident map|
+ * | | / | |
+ * +-end of ident map+ / +----------------+
+ * | ... gap ... | / | kasan zero page| (untracked)
+ * | | / | mapping |
+ * +- vmalloc area -+ / +----------------+
+ * | vmalloc_size | / |shallow populate|
+ * +- modules vaddr -+ / +----------------+
+ * | 2Gb |/ |shallow populate|
+ * +- shadow start -+ +----------------+
+ * | 1/8 addr space | | zero pg mapping| (untracked)
+ * +- shadow end ----+---------+- shadow end ---+
*/
/* populate kasan shadow (for identity mapping and zero page mapping) */
- kasan_early_pgtable_populate(__sha(0), __sha(memsize), POPULATE_MAP);
+ for_each_mem_detect_usable_block(i, &start, &end)
+ kasan_early_pgtable_populate(__sha(start), __sha(end), POPULATE_MAP);
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ untracked_end = VMALLOC_START;
/* shallowly populate kasan shadow for vmalloc and modules */
kasan_early_pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END),
POPULATE_SHALLOW);
}
/* populate kasan shadow for untracked memory */
- kasan_early_pgtable_populate(__sha(ident_map_size),
- IS_ENABLED(CONFIG_KASAN_VMALLOC) ?
- __sha(VMALLOC_START) :
- __sha(MODULES_VADDR),
+ kasan_early_pgtable_populate(__sha(ident_map_size), __sha(untracked_end),
POPULATE_ZERO_SHADOW);
kasan_early_pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE),
POPULATE_ZERO_SHADOW);
- /* memory allocated for identity mapping structs will be freed later */
- pgalloc_freeable = pgalloc_pos;
- /* populate identity mapping */
- kasan_early_pgtable_populate(0, memsize, POPULATE_ONE2ONE);
- kasan_set_pgd(early_pg_dir, _ASCE_TYPE_REGION2);
- kasan_enable_dat();
/* enable kasan */
init_task.kasan_depth = 0;
- memblock_reserve(pgalloc_pos, memsize - pgalloc_pos);
sclp_early_printk("KernelAddressSanitizer initialized\n");
}
-
-void __init kasan_copy_shadow_mapping(void)
-{
- /*
- * At this point we are still running on early pages setup early_pg_dir,
- * while swapper_pg_dir has just been initialized with identity mapping.
- * Carry over shadow memory region from early_pg_dir to swapper_pg_dir.
- */
-
- pgd_t *pg_dir_src;
- pgd_t *pg_dir_dst;
- p4d_t *p4_dir_src;
- p4d_t *p4_dir_dst;
-
- pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START);
- pg_dir_dst = pgd_offset_raw(init_mm.pgd, KASAN_SHADOW_START);
- p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START);
- p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START);
- memcpy(p4_dir_dst, p4_dir_src,
- (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t));
-}
-
-void __init kasan_free_early_identity(void)
-{
- memblock_phys_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos);
-}
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 4824d1cd33d8..d02a61620cfa 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -21,7 +21,7 @@
#include <asm/maccess.h>
unsigned long __bootdata_preserved(__memcpy_real_area);
-static __ro_after_init pte_t *memcpy_real_ptep;
+pte_t *__bootdata_preserved(memcpy_real_ptep);
static DEFINE_MUTEX(memcpy_real_mutex);
static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
@@ -68,28 +68,17 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
long copied;
spin_lock_irqsave(&s390_kernel_write_lock, flags);
- if (!(flags & PSW_MASK_DAT)) {
- memcpy(dst, src, size);
- } else {
- while (size) {
- copied = s390_kernel_write_odd(tmp, src, size);
- tmp += copied;
- src += copied;
- size -= copied;
- }
+ while (size) {
+ copied = s390_kernel_write_odd(tmp, src, size);
+ tmp += copied;
+ src += copied;
+ size -= copied;
}
spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
return dst;
}
-void __init memcpy_real_init(void)
-{
- memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true);
- if (!memcpy_real_ptep)
- panic("Couldn't setup memcpy real area");
-}
-
size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
size_t len, copied, res = 0;
@@ -162,7 +151,6 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
void *ptr = phys_to_virt(addr);
void *bounce = ptr;
struct lowcore *abs_lc;
- unsigned long flags;
unsigned long size;
int this_cpu, cpu;
@@ -178,10 +166,10 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
goto out;
size = PAGE_SIZE - (addr & ~PAGE_MASK);
if (addr < sizeof(struct lowcore)) {
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
ptr = (void *)abs_lc + addr;
memcpy(bounce, ptr, size);
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
} else if (cpu == this_cpu) {
ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu]));
memcpy(bounce, ptr, size);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4909dcd762e8..6effb24de6d9 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -302,6 +302,31 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(ptep_xchg_direct);
+/*
+ * Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that
+ * RDP can be used instead of IPTE. See also comments at pte_allow_rdp().
+ */
+void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t new)
+{
+ preempt_disable();
+ atomic_inc(&mm->context.flush_count);
+ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __ptep_rdp(addr, ptep, 0, 0, 1);
+ else
+ __ptep_rdp(addr, ptep, 0, 0, 0);
+ /*
+ * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
+ * means it is still valid and active, and must not be changed according
+ * to the architecture. But writing a new value that only differs in SW
+ * bits is allowed.
+ */
+ set_pte(ptep, new);
+ atomic_dec(&mm->context.flush_count);
+ preempt_enable();
+}
+EXPORT_SYMBOL(ptep_reset_dat_prot);
+
pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t new)
{
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index ee1a97078527..4113a7ffa149 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/sort.h>
#include <asm/cacheflush.h>
#include <asm/nospec-branch.h>
#include <asm/pgalloc.h>
@@ -296,10 +297,7 @@ static void try_free_pmd_table(pud_t *pud, unsigned long start)
/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
if (end > VMALLOC_START)
return;
-#ifdef CONFIG_KASAN
- if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
- return;
-#endif
+
pmd = pmd_offset(pud, start);
for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
if (!pmd_none(*pmd))
@@ -371,10 +369,6 @@ static void try_free_pud_table(p4d_t *p4d, unsigned long start)
/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
if (end > VMALLOC_START)
return;
-#ifdef CONFIG_KASAN
- if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
- return;
-#endif
pud = pud_offset(p4d, start);
for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
@@ -425,10 +419,6 @@ static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
if (end > VMALLOC_START)
return;
-#ifdef CONFIG_KASAN
- if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
- return;
-#endif
p4d = p4d_offset(pgd, start);
for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
@@ -657,6 +647,23 @@ void vmem_unmap_4k_page(unsigned long addr)
mutex_unlock(&vmem_mutex);
}
+static int __init memblock_region_cmp(const void *a, const void *b)
+{
+ const struct memblock_region *r1 = a;
+ const struct memblock_region *r2 = b;
+
+ if (r1->base < r2->base)
+ return -1;
+ if (r1->base > r2->base)
+ return 1;
+ return 0;
+}
+
+static void __init memblock_region_swap(void *a, void *b, int size)
+{
+ swap(*(struct memblock_region *)a, *(struct memblock_region *)b);
+}
+
/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug
@@ -664,11 +671,68 @@ void vmem_unmap_4k_page(unsigned long addr)
*/
void __init vmem_map_init(void)
{
+ struct memblock_region memory_rwx_regions[] = {
+ {
+ .base = 0,
+ .size = sizeof(struct lowcore),
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __pa(_stext),
+ .size = _etext - _stext,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __pa(_sinittext),
+ .size = _einittext - _sinittext,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __stext_amode31,
+ .size = __etext_amode31 - __stext_amode31,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ };
+ struct memblock_type memory_rwx = {
+ .regions = memory_rwx_regions,
+ .cnt = ARRAY_SIZE(memory_rwx_regions),
+ .max = ARRAY_SIZE(memory_rwx_regions),
+ };
phys_addr_t base, end;
u64 i;
- for_each_mem_range(i, &base, &end)
- vmem_add_range(base, end - base);
+ /*
+ * Set RW+NX attribute on all memory, except regions enumerated with
+ * memory_rwx exclude type. These regions need different attributes,
+ * which are enforced afterwards.
+ *
+ * __for_each_mem_range() iterate and exclude types should be sorted.
+ * The relative location of _stext and _sinittext is hardcoded in the
+ * linker script. However a location of __stext_amode31 and the kernel
+ * image itself are chosen dynamically. Thus, sort the exclude type.
+ */
+ sort(&memory_rwx_regions,
+ ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]),
+ memblock_region_cmp, memblock_region_swap);
+ __for_each_mem_range(i, &memblock.memory, &memory_rwx,
+ NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) {
+ __set_memory((unsigned long)__va(base),
+ (end - base) >> PAGE_SHIFT,
+ SET_MEMORY_RW | SET_MEMORY_NX);
+ }
+
__set_memory((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
@@ -678,15 +742,14 @@ void __init vmem_map_init(void)
__set_memory((unsigned long)_sinittext,
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
- __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
+ __set_memory(__stext_amode31,
+ (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
- /* lowcore requires 4k mapping for real addresses / prefixing */
- set_memory_4k(0, LC_PAGES);
-
/* lowcore must be executable for LPSWE */
- if (!static_key_enabled(&cpu_has_bear))
- set_memory_x(0, 1);
+ if (static_key_enabled(&cpu_has_bear))
+ set_memory_nx(0, 1);
+ set_memory_nx(PAGE_SIZE, 1);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index af35052d06ed..d0846ba818ee 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -30,6 +30,7 @@
#include <asm/facility.h>
#include <asm/nospec-branch.h>
#include <asm/set_memory.h>
+#include <asm/text-patching.h>
#include "bpf_jit.h"
struct bpf_jit {
@@ -50,12 +51,13 @@ struct bpf_jit {
int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
int excnt; /* Number of exception table entries */
+ int prologue_plt_ret; /* Return address for prologue hotpatch PLT */
+ int prologue_plt; /* Start of prologue hotpatch PLT */
};
#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
#define SEEN_LITERAL BIT(1) /* code uses literals */
#define SEEN_FUNC BIT(2) /* calls C functions */
-#define SEEN_TAIL_CALL BIT(3) /* code uses tail calls */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
/*
@@ -68,6 +70,10 @@ struct bpf_jit {
#define REG_0 REG_W0 /* Register 0 */
#define REG_1 REG_W1 /* Register 1 */
#define REG_2 BPF_REG_1 /* Register 2 */
+#define REG_3 BPF_REG_2 /* Register 3 */
+#define REG_4 BPF_REG_3 /* Register 4 */
+#define REG_7 BPF_REG_6 /* Register 7 */
+#define REG_8 BPF_REG_7 /* Register 8 */
#define REG_14 BPF_REG_0 /* Register 14 */
/*
@@ -507,20 +513,58 @@ static void bpf_skip(struct bpf_jit *jit, int size)
}
/*
+ * PLT for hotpatchable calls. The calling convention is the same as for the
+ * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
+ */
+extern const char bpf_plt[];
+extern const char bpf_plt_ret[];
+extern const char bpf_plt_target[];
+extern const char bpf_plt_end[];
+#define BPF_PLT_SIZE 32
+asm(
+ ".pushsection .rodata\n"
+ " .align 8\n"
+ "bpf_plt:\n"
+ " lgrl %r0,bpf_plt_ret\n"
+ " lgrl %r1,bpf_plt_target\n"
+ " br %r1\n"
+ " .align 8\n"
+ "bpf_plt_ret: .quad 0\n"
+ "bpf_plt_target: .quad 0\n"
+ "bpf_plt_end:\n"
+ " .popsection\n"
+);
+
+static void bpf_jit_plt(void *plt, void *ret, void *target)
+{
+ memcpy(plt, bpf_plt, BPF_PLT_SIZE);
+ *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
+ *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target;
+}
+
+/*
* Emit function prologue
*
* Save registers and create stack frame if necessary.
- * See stack frame layout desription in "bpf_jit.h"!
+ * See stack frame layout description in "bpf_jit.h"!
*/
-static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
+static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
+ u32 stack_depth)
{
- if (jit->seen & SEEN_TAIL_CALL) {
+ /* No-op for hotpatching */
+ /* brcl 0,prologue_plt */
+ EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
+ jit->prologue_plt_ret = jit->prg;
+
+ if (fp->aux->func_idx == 0) {
+ /* Initialize the tail call counter in the main program. */
/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
} else {
/*
- * There are no tail calls. Insert nops in order to have
- * tail_call_start at a predictable offset.
+ * Skip the tail call counter initialization in subprograms.
+ * Insert nops in order to have tail_call_start at a
+ * predictable offset.
*/
bpf_skip(jit, 6);
}
@@ -558,6 +602,43 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
}
/*
+ * Emit an expoline for a jump that follows
+ */
+static void emit_expoline(struct bpf_jit *jit)
+{
+ /* exrl %r0,.+10 */
+ EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+}
+
+/*
+ * Emit __s390_indirect_jump_r1 thunk if necessary
+ */
+static void emit_r1_thunk(struct bpf_jit *jit)
+{
+ if (nospec_uses_trampoline()) {
+ jit->r1_thunk_ip = jit->prg;
+ emit_expoline(jit);
+ /* br %r1 */
+ _EMIT2(0x07f1);
+ }
+}
+
+/*
+ * Call r1 either directly or via __s390_indirect_jump_r1 thunk
+ */
+static void call_r1(struct bpf_jit *jit)
+{
+ if (nospec_uses_trampoline())
+ /* brasl %r14,__s390_indirect_jump_r1 */
+ EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+ else
+ /* basr %r14,%r1 */
+ EMIT2(0x0d00, REG_14, REG_1);
+}
+
+/*
* Function epilogue
*/
static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
@@ -570,25 +651,20 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
if (nospec_uses_trampoline()) {
jit->r14_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r14 thunk */
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
+ emit_expoline(jit);
}
/* br %r14 */
_EMIT2(0x07fe);
- if ((nospec_uses_trampoline()) &&
- (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
- jit->r1_thunk_ip = jit->prg;
- /* Generate __s390_indirect_jump_r1 thunk */
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
- /* br %r1 */
- _EMIT2(0x07f1);
- }
+ if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
+ emit_r1_thunk(jit);
+
+ jit->prg = ALIGN(jit->prg, 8);
+ jit->prologue_plt = jit->prg;
+ if (jit->prg_buf)
+ bpf_jit_plt(jit->prg_buf + jit->prg,
+ jit->prg_buf + jit->prologue_plt_ret, NULL);
+ jit->prg += BPF_PLT_SIZE;
}
static int get_probe_mem_regno(const u8 *insn)
@@ -663,6 +739,34 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
}
/*
+ * Sign-extend the register if necessary
+ */
+static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
+{
+ if (!(flags & BTF_FMODEL_SIGNED_ARG))
+ return 0;
+
+ switch (size) {
+ case 1:
+ /* lgbr %r,%r */
+ EMIT4(0xb9060000, r, r);
+ return 0;
+ case 2:
+ /* lghr %r,%r */
+ EMIT4(0xb9070000, r, r);
+ return 0;
+ case 4:
+ /* lgfr %r,%r */
+ EMIT4(0xb9140000, r, r);
+ return 0;
+ case 8:
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+/*
* Compile one eBPF instruction into s390x code
*
* NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
@@ -1297,9 +1401,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
*/
case BPF_JMP | BPF_CALL:
{
- u64 func;
+ const struct btf_func_model *m;
bool func_addr_fixed;
- int ret;
+ int j, ret;
+ u64 func;
ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
&func, &func_addr_fixed);
@@ -1308,15 +1413,38 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
+ /*
+ * Copy the tail call counter to where the callee expects it.
+ *
+ * Note 1: The callee can increment the tail call counter, but
+ * we do not load it back, since the x86 JIT does not do this
+ * either.
+ *
+ * Note 2: We assume that the verifier does not let us call the
+ * main program, which clears the tail call counter on entry.
+ */
+ /* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
+ _EMIT6(0xd203f000 | STK_OFF_TCCNT,
+ 0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));
+
+ /* Sign-extend the kfunc arguments. */
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ m = bpf_jit_find_kfunc_model(fp, insn);
+ if (!m)
+ return -1;
+
+ for (j = 0; j < m->nr_args; j++) {
+ if (sign_extend(jit, BPF_REG_1 + j,
+ m->arg_size[j],
+ m->arg_flags[j]))
+ return -1;
+ }
+ }
+
/* lgrl %w1,func */
EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
- if (nospec_uses_trampoline()) {
- /* brasl %r14,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
- } else {
- /* basr %r14,%w1 */
- EMIT2(0x0d00, REG_14, REG_W1);
- }
+ /* %r1() */
+ call_r1(jit);
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
break;
@@ -1329,10 +1457,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
* B1: pointer to ctx
* B2: pointer to bpf_array
* B3: index in bpf_array
- */
- jit->seen |= SEEN_TAIL_CALL;
-
- /*
+ *
* if (index >= array->map.max_entries)
* goto out;
*/
@@ -1393,8 +1518,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* lg %r1,bpf_func(%r1) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
offsetof(struct bpf_prog, bpf_func));
- /* bc 0xf,tail_call_start(%r1) */
- _EMIT4(0x47f01000 + jit->tail_call_start);
+ if (nospec_uses_trampoline()) {
+ jit->seen |= SEEN_FUNC;
+ /* aghi %r1,tail_call_start */
+ EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
+ /* brcl 0xf,__s390_indirect_jump_r1 */
+ EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
+ } else {
+ /* bc 0xf,tail_call_start(%r1) */
+ _EMIT4(0x47f01000 + jit->tail_call_start);
+ }
/* out: */
if (jit->prg_buf) {
*(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
@@ -1688,7 +1821,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
jit->prg = 0;
jit->excnt = 0;
- bpf_jit_prologue(jit, stack_depth);
+ bpf_jit_prologue(jit, fp, stack_depth);
if (bpf_set_addr(jit, 0) < 0)
return -1;
for (i = 0; i < fp->len; i += insn_count) {
@@ -1768,6 +1901,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_jit jit;
int pass;
+ if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
+ return orig_fp;
+
if (!fp->jit_requested)
return orig_fp;
@@ -1859,3 +1995,508 @@ out:
tmp : orig_fp);
return fp;
}
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+ return true;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr)
+{
+ struct {
+ u16 opc;
+ s32 disp;
+ } __packed insn;
+ char expected_plt[BPF_PLT_SIZE];
+ char current_plt[BPF_PLT_SIZE];
+ char *plt;
+ int err;
+
+ /* Verify the branch to be patched. */
+ err = copy_from_kernel_nofault(&insn, ip, sizeof(insn));
+ if (err < 0)
+ return err;
+ if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
+ return -EINVAL;
+
+ if (t == BPF_MOD_JUMP &&
+ insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
+ /*
+ * The branch already points to the destination,
+ * there is no PLT.
+ */
+ } else {
+ /* Verify the PLT. */
+ plt = (char *)ip + (insn.disp << 1);
+ err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
+ if (err < 0)
+ return err;
+ bpf_jit_plt(expected_plt, (char *)ip + 6, old_addr);
+ if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
+ return -EINVAL;
+ /* Adjust the call address. */
+ s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
+ &new_addr, sizeof(void *));
+ }
+
+ /* Adjust the mask of the branch. */
+ insn.opc = 0xc004 | (new_addr ? 0xf0 : 0);
+ s390_kernel_write((char *)ip + 1, (char *)&insn.opc + 1, 1);
+
+ /* Make the new code visible to the other CPUs. */
+ text_poke_sync_lock();
+
+ return 0;
+}
+
+struct bpf_tramp_jit {
+ struct bpf_jit common;
+ int orig_stack_args_off;/* Offset of arguments placed on stack by the
+ * func_addr's original caller
+ */
+ int stack_size; /* Trampoline stack size */
+ int stack_args_off; /* Offset of stack arguments for calling
+ * func_addr, has to be at the top
+ */
+ int reg_args_off; /* Offset of register arguments for calling
+ * func_addr
+ */
+ int ip_off; /* For bpf_get_func_ip(), has to be at
+ * (ctx - 16)
+ */
+ int arg_cnt_off; /* For bpf_get_func_arg_cnt(), has to be at
+ * (ctx - 8)
+ */
+ int bpf_args_off; /* Offset of BPF_PROG context, which consists
+ * of BPF arguments followed by return value
+ */
+ int retval_off; /* Offset of return value (see above) */
+ int r7_r8_off; /* Offset of saved %r7 and %r8, which are used
+ * for __bpf_prog_enter() return value and
+ * func_addr respectively
+ */
+ int r14_off; /* Offset of saved %r14 */
+ int run_ctx_off; /* Offset of struct bpf_tramp_run_ctx */
+ int do_fexit; /* do_fexit: label */
+};
+
+static void load_imm64(struct bpf_jit *jit, int dst_reg, u64 val)
+{
+ /* llihf %dst_reg,val_hi */
+ EMIT6_IMM(0xc00e0000, dst_reg, (val >> 32));
+ /* oilf %rdst_reg,val_lo */
+ EMIT6_IMM(0xc00d0000, dst_reg, val);
+}
+
+static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
+ const struct btf_func_model *m,
+ struct bpf_tramp_link *tlink, bool save_ret)
+{
+ struct bpf_jit *jit = &tjit->common;
+ int cookie_off = tjit->run_ctx_off +
+ offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
+ struct bpf_prog *p = tlink->link.prog;
+ int patch;
+
+ /*
+ * run_ctx.cookie = tlink->cookie;
+ */
+
+ /* %r0 = tlink->cookie */
+ load_imm64(jit, REG_W0, tlink->cookie);
+ /* stg %r0,cookie_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, REG_0, REG_15, cookie_off);
+
+ /*
+ * if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
+ * goto skip;
+ */
+
+ /* %r1 = __bpf_prog_enter */
+ load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p));
+ /* %r2 = p */
+ load_imm64(jit, REG_2, (u64)p);
+ /* la %r3,run_ctx_off(%r15) */
+ EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
+ /* %r1() */
+ call_r1(jit);
+ /* ltgr %r7,%r2 */
+ EMIT4(0xb9020000, REG_7, REG_2);
+ /* brcl 8,skip */
+ patch = jit->prg;
+ EMIT6_PCREL_RILC(0xc0040000, 8, 0);
+
+ /*
+ * retval = bpf_func(args, p->insnsi);
+ */
+
+ /* %r1 = p->bpf_func */
+ load_imm64(jit, REG_1, (u64)p->bpf_func);
+ /* la %r2,bpf_args_off(%r15) */
+ EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
+ /* %r3 = p->insnsi */
+ if (!p->jited)
+ load_imm64(jit, REG_3, (u64)p->insnsi);
+ /* %r1() */
+ call_r1(jit);
+ /* stg %r2,retval_off(%r15) */
+ if (save_ret) {
+ if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
+ return -1;
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
+ tjit->retval_off);
+ }
+
+ /* skip: */
+ if (jit->prg_buf)
+ *(u32 *)&jit->prg_buf[patch + 2] = (jit->prg - patch) >> 1;
+
+ /*
+ * __bpf_prog_exit(p, start, &run_ctx);
+ */
+
+ /* %r1 = __bpf_prog_exit */
+ load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p));
+ /* %r2 = p */
+ load_imm64(jit, REG_2, (u64)p);
+ /* lgr %r3,%r7 */
+ EMIT4(0xb9040000, REG_3, REG_7);
+ /* la %r4,run_ctx_off(%r15) */
+ EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
+ /* %r1() */
+ call_r1(jit);
+
+ return 0;
+}
+
+static int alloc_stack(struct bpf_tramp_jit *tjit, size_t size)
+{
+ int stack_offset = tjit->stack_size;
+
+ tjit->stack_size += size;
+ return stack_offset;
+}
+
+/* ABI uses %r2 - %r6 for parameter passing. */
+#define MAX_NR_REG_ARGS 5
+
+/* The "L" field of the "mvc" instruction is 8 bits. */
+#define MAX_MVC_SIZE 256
+#define MAX_NR_STACK_ARGS (MAX_MVC_SIZE / sizeof(u64))
+
+/* -mfentry generates a 6-byte nop on s390x. */
+#define S390X_PATCH_SIZE 6
+
+static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
+ struct bpf_tramp_jit *tjit,
+ const struct btf_func_model *m,
+ u32 flags,
+ struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ int nr_bpf_args, nr_reg_args, nr_stack_args;
+ struct bpf_jit *jit = &tjit->common;
+ int arg, bpf_arg_off;
+ int i, j;
+
+ /* Support as many stack arguments as "mvc" instruction can handle. */
+ nr_reg_args = min_t(int, m->nr_args, MAX_NR_REG_ARGS);
+ nr_stack_args = m->nr_args - nr_reg_args;
+ if (nr_stack_args > MAX_NR_STACK_ARGS)
+ return -ENOTSUPP;
+
+ /* Return to %r14, since func_addr and %r0 are not available. */
+ if (!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK))
+ flags |= BPF_TRAMP_F_SKIP_FRAME;
+
+ /*
+ * Compute how many arguments we need to pass to BPF programs.
+ * BPF ABI mirrors that of x86_64: arguments that are 16 bytes or
+ * smaller are packed into 1 or 2 registers; larger arguments are
+ * passed via pointers.
+ * In s390x ABI, arguments that are 8 bytes or smaller are packed into
+ * a register; larger arguments are passed via pointers.
+ * We need to deal with this difference.
+ */
+ nr_bpf_args = 0;
+ for (i = 0; i < m->nr_args; i++) {
+ if (m->arg_size[i] <= 8)
+ nr_bpf_args += 1;
+ else if (m->arg_size[i] <= 16)
+ nr_bpf_args += 2;
+ else
+ return -ENOTSUPP;
+ }
+
+ /*
+ * Calculate the stack layout.
+ */
+
+ /* Reserve STACK_FRAME_OVERHEAD bytes for the callees. */
+ tjit->stack_size = STACK_FRAME_OVERHEAD;
+ tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64));
+ tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64));
+ tjit->ip_off = alloc_stack(tjit, sizeof(u64));
+ tjit->arg_cnt_off = alloc_stack(tjit, sizeof(u64));
+ tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64));
+ tjit->retval_off = alloc_stack(tjit, sizeof(u64));
+ tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64));
+ tjit->r14_off = alloc_stack(tjit, sizeof(u64));
+ tjit->run_ctx_off = alloc_stack(tjit,
+ sizeof(struct bpf_tramp_run_ctx));
+ /* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */
+ tjit->stack_size -= STACK_FRAME_OVERHEAD;
+ tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
+
+ /* aghi %r15,-stack_size */
+ EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
+ /* stmg %r2,%rN,fwd_reg_args_off(%r15) */
+ if (nr_reg_args)
+ EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
+ REG_2 + (nr_reg_args - 1), REG_15,
+ tjit->reg_args_off);
+ for (i = 0, j = 0; i < m->nr_args; i++) {
+ if (i < MAX_NR_REG_ARGS)
+ arg = REG_2 + i;
+ else
+ arg = tjit->orig_stack_args_off +
+ (i - MAX_NR_REG_ARGS) * sizeof(u64);
+ bpf_arg_off = tjit->bpf_args_off + j * sizeof(u64);
+ if (m->arg_size[i] <= 8) {
+ if (i < MAX_NR_REG_ARGS)
+ /* stg %arg,bpf_arg_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, arg,
+ REG_0, REG_15, bpf_arg_off);
+ else
+ /* mvc bpf_arg_off(8,%r15),arg(%r15) */
+ _EMIT6(0xd207f000 | bpf_arg_off,
+ 0xf000 | arg);
+ j += 1;
+ } else {
+ if (i < MAX_NR_REG_ARGS) {
+ /* mvc bpf_arg_off(16,%r15),0(%arg) */
+ _EMIT6(0xd20ff000 | bpf_arg_off,
+ reg2hex[arg] << 12);
+ } else {
+ /* lg %r1,arg(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_0,
+ REG_15, arg);
+ /* mvc bpf_arg_off(16,%r15),0(%r1) */
+ _EMIT6(0xd20ff000 | bpf_arg_off, 0x1000);
+ }
+ j += 2;
+ }
+ }
+ /* stmg %r7,%r8,r7_r8_off(%r15) */
+ EMIT6_DISP_LH(0xeb000000, 0x0024, REG_7, REG_8, REG_15,
+ tjit->r7_r8_off);
+ /* stg %r14,r14_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_14, REG_0, REG_15, tjit->r14_off);
+
+ if (flags & BPF_TRAMP_F_ORIG_STACK) {
+ /*
+ * The ftrace trampoline puts the return address (which is the
+ * address of the original function + S390X_PATCH_SIZE) into
+ * %r0; see ftrace_shared_hotpatch_trampoline_br and
+ * ftrace_init_nop() for details.
+ */
+
+ /* lgr %r8,%r0 */
+ EMIT4(0xb9040000, REG_8, REG_0);
+ } else {
+ /* %r8 = func_addr + S390X_PATCH_SIZE */
+ load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE);
+ }
+
+ /*
+ * ip = func_addr;
+ * arg_cnt = m->nr_args;
+ */
+
+ if (flags & BPF_TRAMP_F_IP_ARG) {
+ /* %r0 = func_addr */
+ load_imm64(jit, REG_0, (u64)func_addr);
+ /* stg %r0,ip_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
+ tjit->ip_off);
+ }
+ /* lghi %r0,nr_bpf_args */
+ EMIT4_IMM(0xa7090000, REG_0, nr_bpf_args);
+ /* stg %r0,arg_cnt_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
+ tjit->arg_cnt_off);
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ /*
+ * __bpf_tramp_enter(im);
+ */
+
+ /* %r1 = __bpf_tramp_enter */
+ load_imm64(jit, REG_1, (u64)__bpf_tramp_enter);
+ /* %r2 = im */
+ load_imm64(jit, REG_2, (u64)im);
+ /* %r1() */
+ call_r1(jit);
+ }
+
+ for (i = 0; i < fentry->nr_links; i++)
+ if (invoke_bpf_prog(tjit, m, fentry->links[i],
+ flags & BPF_TRAMP_F_RET_FENTRY_RET))
+ return -EINVAL;
+
+ if (fmod_ret->nr_links) {
+ /*
+ * retval = 0;
+ */
+
+ /* xc retval_off(8,%r15),retval_off(%r15) */
+ _EMIT6(0xd707f000 | tjit->retval_off,
+ 0xf000 | tjit->retval_off);
+
+ for (i = 0; i < fmod_ret->nr_links; i++) {
+ if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true))
+ return -EINVAL;
+
+ /*
+ * if (retval)
+ * goto do_fexit;
+ */
+
+ /* ltg %r0,retval_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0002, REG_0, REG_0, REG_15,
+ tjit->retval_off);
+ /* brcl 7,do_fexit */
+ EMIT6_PCREL_RILC(0xc0040000, 7, tjit->do_fexit);
+ }
+ }
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ /*
+ * retval = func_addr(args);
+ */
+
+ /* lmg %r2,%rN,reg_args_off(%r15) */
+ if (nr_reg_args)
+ EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
+ REG_2 + (nr_reg_args - 1), REG_15,
+ tjit->reg_args_off);
+ /* mvc stack_args_off(N,%r15),orig_stack_args_off(%r15) */
+ if (nr_stack_args)
+ _EMIT6(0xd200f000 |
+ (nr_stack_args * sizeof(u64) - 1) << 16 |
+ tjit->stack_args_off,
+ 0xf000 | tjit->orig_stack_args_off);
+ /* lgr %r1,%r8 */
+ EMIT4(0xb9040000, REG_1, REG_8);
+ /* %r1() */
+ call_r1(jit);
+ /* stg %r2,retval_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
+ tjit->retval_off);
+
+ im->ip_after_call = jit->prg_buf + jit->prg;
+
+ /*
+ * The following nop will be patched by bpf_tramp_image_put().
+ */
+
+ /* brcl 0,im->ip_epilogue */
+ EMIT6_PCREL_RILC(0xc0040000, 0, (u64)im->ip_epilogue);
+ }
+
+ /* do_fexit: */
+ tjit->do_fexit = jit->prg;
+ for (i = 0; i < fexit->nr_links; i++)
+ if (invoke_bpf_prog(tjit, m, fexit->links[i], false))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ im->ip_epilogue = jit->prg_buf + jit->prg;
+
+ /*
+ * __bpf_tramp_exit(im);
+ */
+
+ /* %r1 = __bpf_tramp_exit */
+ load_imm64(jit, REG_1, (u64)__bpf_tramp_exit);
+ /* %r2 = im */
+ load_imm64(jit, REG_2, (u64)im);
+ /* %r1() */
+ call_r1(jit);
+ }
+
+ /* lmg %r2,%rN,reg_args_off(%r15) */
+ if ((flags & BPF_TRAMP_F_RESTORE_REGS) && nr_reg_args)
+ EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
+ REG_2 + (nr_reg_args - 1), REG_15,
+ tjit->reg_args_off);
+ /* lgr %r1,%r8 */
+ if (!(flags & BPF_TRAMP_F_SKIP_FRAME))
+ EMIT4(0xb9040000, REG_1, REG_8);
+ /* lmg %r7,%r8,r7_r8_off(%r15) */
+ EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
+ tjit->r7_r8_off);
+ /* lg %r14,r14_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_14, REG_0, REG_15, tjit->r14_off);
+ /* lg %r2,retval_off(%r15) */
+ if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
+ tjit->retval_off);
+ /* aghi %r15,stack_size */
+ EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
+ /* Emit an expoline for the following indirect jump. */
+ if (nospec_uses_trampoline())
+ emit_expoline(jit);
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* br %r14 */
+ _EMIT2(0x07fe);
+ else
+ /* br %r1 */
+ _EMIT2(0x07f1);
+
+ emit_r1_thunk(jit);
+
+ return 0;
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
+ void *image_end, const struct btf_func_model *m,
+ u32 flags, struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ struct bpf_tramp_jit tjit;
+ int ret;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ if (i == 0) {
+ /* Compute offsets, check whether the code fits. */
+ memset(&tjit, 0, sizeof(tjit));
+ } else {
+ /* Generate the code. */
+ tjit.common.prg = 0;
+ tjit.common.prg_buf = image;
+ }
+ ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
+ tlinks, func_addr);
+ if (ret < 0)
+ return ret;
+ if (tjit.common.prg > (char *)image_end - (char *)image)
+ /*
+ * Use the same error code as for exceeding
+ * BPF_MAX_TRAMP_LINKS.
+ */
+ return -E2BIG;
+ }
+
+ return ret;
+}
+
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+ return true;
+}
diff --git a/arch/sh/include/asm/checksum_32.h b/arch/sh/include/asm/checksum_32.h
index a6501b856f3e..2b5fa75b4651 100644
--- a/arch/sh/include/asm/checksum_32.h
+++ b/arch/sh/include/asm/checksum_32.h
@@ -7,6 +7,7 @@
*/
#include <linux/in6.h>
+#include <linux/uaccess.h>
/*
* computes the checksum of a memory block at buff, length len,
diff --git a/arch/sh/include/asm/gpio.h b/arch/sh/include/asm/gpio.h
index d643250f0a0f..588c1380e4cb 100644
--- a/arch/sh/include/asm/gpio.h
+++ b/arch/sh/include/asm/gpio.h
@@ -40,11 +40,6 @@ static inline int gpio_to_irq(unsigned gpio)
return __gpio_to_irq(gpio);
}
-static inline int irq_to_gpio(unsigned int irq)
-{
- return -ENOSYS;
-}
-
#endif /* CONFIG_GPIOLIB */
#endif /* __ASM_SH_GPIO_H */
diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler
index 26b8c08e2fc4..b88f784cb02e 100644
--- a/arch/x86/Kconfig.assembler
+++ b/arch/x86/Kconfig.assembler
@@ -19,3 +19,8 @@ config AS_TPAUSE
def_bool $(as-instr,tpause %ecx)
help
Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7
+
+config AS_GFNI
+ def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2)
+ help
+ Supported by binutils >= 2.30 and LLVM integrated assembler
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
index 71c4c473d34b..9bbfd01cfa2f 100644
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -304,6 +304,44 @@ config CRYPTO_ARIA_AESNI_AVX_X86_64
Processes 16 blocks in parallel.
+config CRYPTO_ARIA_AESNI_AVX2_X86_64
+ tristate "Ciphers: ARIA with modes: ECB, CTR (AES-NI/AVX2/GFNI)"
+ depends on X86 && 64BIT
+ select CRYPTO_SKCIPHER
+ select CRYPTO_SIMD
+ select CRYPTO_ALGAPI
+ select CRYPTO_ARIA
+ select CRYPTO_ARIA_AESNI_AVX_X86_64
+ help
+ Length-preserving cipher: ARIA cipher algorithms
+ (RFC 5794) with ECB and CTR modes
+
+ Architecture: x86_64 using:
+ - AES-NI (AES New Instructions)
+ - AVX2 (Advanced Vector Extensions)
+ - GFNI (Galois Field New Instructions)
+
+ Processes 32 blocks in parallel.
+
+config CRYPTO_ARIA_GFNI_AVX512_X86_64
+ tristate "Ciphers: ARIA with modes: ECB, CTR (AVX512/GFNI)"
+ depends on X86 && 64BIT && AS_AVX512 && AS_GFNI
+ select CRYPTO_SKCIPHER
+ select CRYPTO_SIMD
+ select CRYPTO_ALGAPI
+ select CRYPTO_ARIA
+ select CRYPTO_ARIA_AESNI_AVX_X86_64
+ select CRYPTO_ARIA_AESNI_AVX2_X86_64
+ help
+ Length-preserving cipher: ARIA cipher algorithms
+ (RFC 5794) with ECB and CTR modes
+
+ Architecture: x86_64 using:
+ - AVX512 (Advanced Vector Extensions)
+ - GFNI (Galois Field New Instructions)
+
+ Processes 64 blocks in parallel.
+
config CRYPTO_CHACHA20_X86_64
tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (SSSE3/AVX2/AVX-512VL)"
depends on X86 && 64BIT
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 3e7a329235bd..9aa46093c91b 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -103,6 +103,12 @@ sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o
obj-$(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) += aria-aesni-avx-x86_64.o
aria-aesni-avx-x86_64-y := aria-aesni-avx-asm_64.o aria_aesni_avx_glue.o
+obj-$(CONFIG_CRYPTO_ARIA_AESNI_AVX2_X86_64) += aria-aesni-avx2-x86_64.o
+aria-aesni-avx2-x86_64-y := aria-aesni-avx2-asm_64.o aria_aesni_avx2_glue.o
+
+obj-$(CONFIG_CRYPTO_ARIA_GFNI_AVX512_X86_64) += aria-gfni-avx512-x86_64.o
+aria-gfni-avx512-x86_64-y := aria-gfni-avx512-asm_64.o aria_gfni_avx512_glue.o
+
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $< > $@
$(obj)/%.S: $(src)/%.pl FORCE
diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S
index 03ae4cd1d976..9243f6289d34 100644
--- a/arch/x86/crypto/aria-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S
@@ -8,13 +8,9 @@
#include <linux/linkage.h>
#include <linux/cfi_types.h>
+#include <asm/asm-offsets.h>
#include <asm/frame.h>
-/* struct aria_ctx: */
-#define enc_key 0
-#define dec_key 272
-#define rounds 544
-
/* register macros */
#define CTX %rdi
@@ -271,34 +267,44 @@
#define aria_ark_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
- t0, rk, idx, round) \
+ t0, t1, t2, rk, \
+ idx, round) \
/* AddRoundKey */ \
- vpbroadcastb ((round * 16) + idx + 3)(rk), t0; \
- vpxor t0, x0, x0; \
- vpbroadcastb ((round * 16) + idx + 2)(rk), t0; \
- vpxor t0, x1, x1; \
- vpbroadcastb ((round * 16) + idx + 1)(rk), t0; \
- vpxor t0, x2, x2; \
- vpbroadcastb ((round * 16) + idx + 0)(rk), t0; \
- vpxor t0, x3, x3; \
- vpbroadcastb ((round * 16) + idx + 7)(rk), t0; \
- vpxor t0, x4, x4; \
- vpbroadcastb ((round * 16) + idx + 6)(rk), t0; \
- vpxor t0, x5, x5; \
- vpbroadcastb ((round * 16) + idx + 5)(rk), t0; \
- vpxor t0, x6, x6; \
- vpbroadcastb ((round * 16) + idx + 4)(rk), t0; \
- vpxor t0, x7, x7;
-
+ vbroadcastss ((round * 16) + idx + 0)(rk), t0; \
+ vpsrld $24, t0, t2; \
+ vpshufb t1, t2, t2; \
+ vpxor t2, x0, x0; \
+ vpsrld $16, t0, t2; \
+ vpshufb t1, t2, t2; \
+ vpxor t2, x1, x1; \
+ vpsrld $8, t0, t2; \
+ vpshufb t1, t2, t2; \
+ vpxor t2, x2, x2; \
+ vpshufb t1, t0, t2; \
+ vpxor t2, x3, x3; \
+ vbroadcastss ((round * 16) + idx + 4)(rk), t0; \
+ vpsrld $24, t0, t2; \
+ vpshufb t1, t2, t2; \
+ vpxor t2, x4, x4; \
+ vpsrld $16, t0, t2; \
+ vpshufb t1, t2, t2; \
+ vpxor t2, x5, x5; \
+ vpsrld $8, t0, t2; \
+ vpshufb t1, t2, t2; \
+ vpxor t2, x6, x6; \
+ vpshufb t1, t0, t2; \
+ vpxor t2, x7, x7;
+
+#ifdef CONFIG_AS_GFNI
#define aria_sbox_8way_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
t0, t1, t2, t3, \
t4, t5, t6, t7) \
- vpbroadcastq .Ltf_s2_bitmatrix, t0; \
- vpbroadcastq .Ltf_inv_bitmatrix, t1; \
- vpbroadcastq .Ltf_id_bitmatrix, t2; \
- vpbroadcastq .Ltf_aff_bitmatrix, t3; \
- vpbroadcastq .Ltf_x2_bitmatrix, t4; \
+ vmovdqa .Ltf_s2_bitmatrix, t0; \
+ vmovdqa .Ltf_inv_bitmatrix, t1; \
+ vmovdqa .Ltf_id_bitmatrix, t2; \
+ vmovdqa .Ltf_aff_bitmatrix, t3; \
+ vmovdqa .Ltf_x2_bitmatrix, t4; \
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
@@ -312,14 +318,15 @@
vgf2p8affineinvqb $0, t2, x3, x3; \
vgf2p8affineinvqb $0, t2, x7, x7
+#endif /* CONFIG_AS_GFNI */
+
#define aria_sbox_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
t0, t1, t2, t3, \
t4, t5, t6, t7) \
- vpxor t7, t7, t7; \
vmovdqa .Linv_shift_row, t0; \
vmovdqa .Lshift_row, t1; \
- vpbroadcastd .L0f0f0f0f, t6; \
+ vbroadcastss .L0f0f0f0f, t6; \
vmovdqa .Ltf_lo__inv_aff__and__s2, t2; \
vmovdqa .Ltf_hi__inv_aff__and__s2, t3; \
vmovdqa .Ltf_lo__x2__and__fwd_aff, t4; \
@@ -414,8 +421,9 @@
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, rk, round) \
+ vpxor y7, y7, y7; \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 8, round); \
+ y0, y7, y2, rk, 8, round); \
\
aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \
y0, y1, y2, y3, y4, y5, y6, y7); \
@@ -430,7 +438,7 @@
x4, x5, x6, x7, \
mem_tmp, 0); \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 0, round); \
+ y0, y7, y2, rk, 0, round); \
\
aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \
y0, y1, y2, y3, y4, y5, y6, y7); \
@@ -468,8 +476,9 @@
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, rk, round) \
+ vpxor y7, y7, y7; \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 8, round); \
+ y0, y7, y2, rk, 8, round); \
\
aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, y1, y2, y3, y4, y5, y6, y7); \
@@ -484,7 +493,7 @@
x4, x5, x6, x7, \
mem_tmp, 0); \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 0, round); \
+ y0, y7, y2, rk, 0, round); \
\
aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, y1, y2, y3, y4, y5, y6, y7); \
@@ -522,14 +531,15 @@
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, rk, round, last_round) \
+ vpxor y7, y7, y7; \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 8, round); \
+ y0, y7, y2, rk, 8, round); \
\
aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \
y0, y1, y2, y3, y4, y5, y6, y7); \
\
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 8, last_round); \
+ y0, y7, y2, rk, 8, last_round); \
\
aria_store_state_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
@@ -539,25 +549,27 @@
x4, x5, x6, x7, \
mem_tmp, 0); \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 0, round); \
+ y0, y7, y2, rk, 0, round); \
\
aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \
y0, y1, y2, y3, y4, y5, y6, y7); \
\
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 0, last_round); \
+ y0, y7, y2, rk, 0, last_round); \
\
aria_load_state_8way(y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, 8);
+#ifdef CONFIG_AS_GFNI
#define aria_fe_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, rk, round) \
+ vpxor y7, y7, y7; \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 8, round); \
+ y0, y7, y2, rk, 8, round); \
\
aria_sbox_8way_gfni(x2, x3, x0, x1, \
x6, x7, x4, x5, \
@@ -574,7 +586,7 @@
x4, x5, x6, x7, \
mem_tmp, 0); \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 0, round); \
+ y0, y7, y2, rk, 0, round); \
\
aria_sbox_8way_gfni(x2, x3, x0, x1, \
x6, x7, x4, x5, \
@@ -614,8 +626,9 @@
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, rk, round) \
+ vpxor y7, y7, y7; \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 8, round); \
+ y0, y7, y2, rk, 8, round); \
\
aria_sbox_8way_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
@@ -632,7 +645,7 @@
x4, x5, x6, x7, \
mem_tmp, 0); \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 0, round); \
+ y0, y7, y2, rk, 0, round); \
\
aria_sbox_8way_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
@@ -672,8 +685,9 @@
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, rk, round, last_round) \
+ vpxor y7, y7, y7; \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 8, round); \
+ y0, y7, y2, rk, 8, round); \
\
aria_sbox_8way_gfni(x2, x3, x0, x1, \
x6, x7, x4, x5, \
@@ -681,7 +695,7 @@
y4, y5, y6, y7); \
\
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 8, last_round); \
+ y0, y7, y2, rk, 8, last_round); \
\
aria_store_state_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
@@ -691,7 +705,7 @@
x4, x5, x6, x7, \
mem_tmp, 0); \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 0, round); \
+ y0, y7, y2, rk, 0, round); \
\
aria_sbox_8way_gfni(x2, x3, x0, x1, \
x6, x7, x4, x5, \
@@ -699,12 +713,14 @@
y4, y5, y6, y7); \
\
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
- y0, rk, 0, last_round); \
+ y0, y7, y2, rk, 0, last_round); \
\
aria_load_state_8way(y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, 8);
+#endif /* CONFIG_AS_GFNI */
+
/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
.section .rodata.cst16, "aM", @progbits, 16
.align 16
@@ -756,6 +772,7 @@
.Ltf_hi__x2__and__fwd_aff:
.octa 0x3F893781E95FE1576CDA64D2BA0CB204
+#ifdef CONFIG_AS_GFNI
.section .rodata.cst8, "aM", @progbits, 8
.align 8
/* AES affine: */
@@ -769,6 +786,14 @@
BV8(0, 1, 1, 1, 1, 1, 0, 0),
BV8(0, 0, 1, 1, 1, 1, 1, 0),
BV8(0, 0, 0, 1, 1, 1, 1, 1))
+ .quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1),
+ BV8(1, 1, 0, 0, 0, 1, 1, 1),
+ BV8(1, 1, 1, 0, 0, 0, 1, 1),
+ BV8(1, 1, 1, 1, 0, 0, 0, 1),
+ BV8(1, 1, 1, 1, 1, 0, 0, 0),
+ BV8(0, 1, 1, 1, 1, 1, 0, 0),
+ BV8(0, 0, 1, 1, 1, 1, 1, 0),
+ BV8(0, 0, 0, 1, 1, 1, 1, 1))
/* AES inverse affine: */
#define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0)
@@ -781,6 +806,14 @@
BV8(0, 0, 1, 0, 1, 0, 0, 1),
BV8(1, 0, 0, 1, 0, 1, 0, 0),
BV8(0, 1, 0, 0, 1, 0, 1, 0))
+ .quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1),
+ BV8(1, 0, 0, 1, 0, 0, 1, 0),
+ BV8(0, 1, 0, 0, 1, 0, 0, 1),
+ BV8(1, 0, 1, 0, 0, 1, 0, 0),
+ BV8(0, 1, 0, 1, 0, 0, 1, 0),
+ BV8(0, 0, 1, 0, 1, 0, 0, 1),
+ BV8(1, 0, 0, 1, 0, 1, 0, 0),
+ BV8(0, 1, 0, 0, 1, 0, 1, 0))
/* S2: */
#define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1)
@@ -793,6 +826,14 @@
BV8(1, 1, 0, 0, 1, 1, 1, 0),
BV8(0, 1, 1, 0, 0, 0, 1, 1),
BV8(1, 1, 1, 1, 0, 1, 1, 0))
+ .quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1),
+ BV8(0, 0, 1, 1, 1, 1, 1, 1),
+ BV8(1, 1, 1, 0, 1, 1, 0, 1),
+ BV8(1, 1, 0, 0, 0, 0, 1, 1),
+ BV8(0, 1, 0, 0, 0, 0, 1, 1),
+ BV8(1, 1, 0, 0, 1, 1, 1, 0),
+ BV8(0, 1, 1, 0, 0, 0, 1, 1),
+ BV8(1, 1, 1, 1, 0, 1, 1, 0))
/* X2: */
#define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0)
@@ -805,6 +846,14 @@
BV8(0, 1, 1, 0, 1, 0, 1, 1),
BV8(1, 0, 1, 1, 1, 1, 0, 1),
BV8(1, 0, 0, 1, 0, 0, 1, 1))
+ .quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0),
+ BV8(0, 0, 1, 0, 0, 1, 1, 0),
+ BV8(0, 0, 0, 0, 1, 0, 1, 0),
+ BV8(1, 1, 1, 0, 0, 0, 1, 1),
+ BV8(1, 1, 1, 0, 1, 1, 0, 0),
+ BV8(0, 1, 1, 0, 1, 0, 1, 1),
+ BV8(1, 0, 1, 1, 1, 1, 0, 1),
+ BV8(1, 0, 0, 1, 0, 0, 1, 1))
/* Identity matrix: */
.Ltf_id_bitmatrix:
@@ -816,6 +865,15 @@
BV8(0, 0, 0, 0, 0, 1, 0, 0),
BV8(0, 0, 0, 0, 0, 0, 1, 0),
BV8(0, 0, 0, 0, 0, 0, 0, 1))
+ .quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0),
+ BV8(0, 1, 0, 0, 0, 0, 0, 0),
+ BV8(0, 0, 1, 0, 0, 0, 0, 0),
+ BV8(0, 0, 0, 1, 0, 0, 0, 0),
+ BV8(0, 0, 0, 0, 1, 0, 0, 0),
+ BV8(0, 0, 0, 0, 0, 1, 0, 0),
+ BV8(0, 0, 0, 0, 0, 0, 1, 0),
+ BV8(0, 0, 0, 0, 0, 0, 0, 1))
+#endif /* CONFIG_AS_GFNI */
/* 4-bit mask */
.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
@@ -874,7 +932,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way)
aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15,
%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%rax, %r9, 10);
- cmpl $12, rounds(CTX);
+ cmpl $12, ARIA_CTX_rounds(CTX);
jne .Laria_192;
aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@@ -887,7 +945,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way)
aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15,
%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%rax, %r9, 12);
- cmpl $14, rounds(CTX);
+ cmpl $14, ARIA_CTX_rounds(CTX);
jne .Laria_256;
aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@@ -923,7 +981,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_encrypt_16way)
FRAME_BEGIN
- leaq enc_key(CTX), %r9;
+ leaq ARIA_CTX_enc_key(CTX), %r9;
inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@@ -948,7 +1006,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_decrypt_16way)
FRAME_BEGIN
- leaq dec_key(CTX), %r9;
+ leaq ARIA_CTX_dec_key(CTX), %r9;
inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@@ -1056,7 +1114,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way)
leaq (%rdx), %r11;
leaq (%rcx), %rsi;
leaq (%rcx), %rdx;
- leaq enc_key(CTX), %r9;
+ leaq ARIA_CTX_enc_key(CTX), %r9;
call __aria_aesni_avx_crypt_16way;
@@ -1084,6 +1142,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way)
RET;
SYM_FUNC_END(aria_aesni_avx_ctr_crypt_16way)
+#ifdef CONFIG_AS_GFNI
SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way)
/* input:
* %r9: rk
@@ -1157,7 +1216,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way)
%xmm0, %xmm1, %xmm2, %xmm3,
%xmm4, %xmm5, %xmm6, %xmm7,
%rax, %r9, 10);
- cmpl $12, rounds(CTX);
+ cmpl $12, ARIA_CTX_rounds(CTX);
jne .Laria_gfni_192;
aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@@ -1174,7 +1233,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way)
%xmm0, %xmm1, %xmm2, %xmm3,
%xmm4, %xmm5, %xmm6, %xmm7,
%rax, %r9, 12);
- cmpl $14, rounds(CTX);
+ cmpl $14, ARIA_CTX_rounds(CTX);
jne .Laria_gfni_256;
aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2,
%xmm4, %xmm5, %xmm6, %xmm7,
@@ -1218,7 +1277,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_encrypt_16way)
FRAME_BEGIN
- leaq enc_key(CTX), %r9;
+ leaq ARIA_CTX_enc_key(CTX), %r9;
inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@@ -1243,7 +1302,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_decrypt_16way)
FRAME_BEGIN
- leaq dec_key(CTX), %r9;
+ leaq ARIA_CTX_dec_key(CTX), %r9;
inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@@ -1275,7 +1334,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way)
leaq (%rdx), %r11;
leaq (%rcx), %rsi;
leaq (%rcx), %rdx;
- leaq enc_key(CTX), %r9;
+ leaq ARIA_CTX_enc_key(CTX), %r9;
call __aria_aesni_avx_gfni_crypt_16way;
@@ -1302,3 +1361,4 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way)
FRAME_END
RET;
SYM_FUNC_END(aria_aesni_avx_gfni_ctr_crypt_16way)
+#endif /* CONFIG_AS_GFNI */
diff --git a/arch/x86/crypto/aria-aesni-avx2-asm_64.S b/arch/x86/crypto/aria-aesni-avx2-asm_64.S
new file mode 100644
index 000000000000..82a14b4ad920
--- /dev/null
+++ b/arch/x86/crypto/aria-aesni-avx2-asm_64.S
@@ -0,0 +1,1441 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * ARIA Cipher 32-way parallel algorithm (AVX2)
+ *
+ * Copyright (c) 2022 Taehee Yoo <[email protected]>
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+#include <asm/asm-offsets.h>
+#include <linux/cfi_types.h>
+
+/* register macros */
+#define CTX %rdi
+
+#define ymm0_x xmm0
+#define ymm1_x xmm1
+#define ymm2_x xmm2
+#define ymm3_x xmm3
+#define ymm4_x xmm4
+#define ymm5_x xmm5
+#define ymm6_x xmm6
+#define ymm7_x xmm7
+#define ymm8_x xmm8
+#define ymm9_x xmm9
+#define ymm10_x xmm10
+#define ymm11_x xmm11
+#define ymm12_x xmm12
+#define ymm13_x xmm13
+#define ymm14_x xmm14
+#define ymm15_x xmm15
+
+#define BV8(a0, a1, a2, a3, a4, a5, a6, a7) \
+ ( (((a0) & 1) << 0) | \
+ (((a1) & 1) << 1) | \
+ (((a2) & 1) << 2) | \
+ (((a3) & 1) << 3) | \
+ (((a4) & 1) << 4) | \
+ (((a5) & 1) << 5) | \
+ (((a6) & 1) << 6) | \
+ (((a7) & 1) << 7) )
+
+#define BM8X8(l0, l1, l2, l3, l4, l5, l6, l7) \
+ ( ((l7) << (0 * 8)) | \
+ ((l6) << (1 * 8)) | \
+ ((l5) << (2 * 8)) | \
+ ((l4) << (3 * 8)) | \
+ ((l3) << (4 * 8)) | \
+ ((l2) << (5 * 8)) | \
+ ((l1) << (6 * 8)) | \
+ ((l0) << (7 * 8)) )
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpand x, mask4bit, tmp0; \
+ vpandn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+#define byteslice_16x16b(a0, b0, c0, d0, \
+ a1, b1, c1, d1, \
+ a2, b2, c2, d2, \
+ a3, b3, c3, d3, \
+ st0, st1) \
+ vmovdqu d2, st0; \
+ vmovdqu d3, st1; \
+ transpose_4x4(a0, a1, a2, a3, d2, d3); \
+ transpose_4x4(b0, b1, b2, b3, d2, d3); \
+ vmovdqu st0, d2; \
+ vmovdqu st1, d3; \
+ \
+ vmovdqu a0, st0; \
+ vmovdqu a1, st1; \
+ transpose_4x4(c0, c1, c2, c3, a0, a1); \
+ transpose_4x4(d0, d1, d2, d3, a0, a1); \
+ \
+ vbroadcasti128 .Lshufb_16x16b, a0; \
+ vmovdqu st1, a1; \
+ vpshufb a0, a2, a2; \
+ vpshufb a0, a3, a3; \
+ vpshufb a0, b0, b0; \
+ vpshufb a0, b1, b1; \
+ vpshufb a0, b2, b2; \
+ vpshufb a0, b3, b3; \
+ vpshufb a0, a1, a1; \
+ vpshufb a0, c0, c0; \
+ vpshufb a0, c1, c1; \
+ vpshufb a0, c2, c2; \
+ vpshufb a0, c3, c3; \
+ vpshufb a0, d0, d0; \
+ vpshufb a0, d1, d1; \
+ vpshufb a0, d2, d2; \
+ vpshufb a0, d3, d3; \
+ vmovdqu d3, st1; \
+ vmovdqu st0, d3; \
+ vpshufb a0, d3, a0; \
+ vmovdqu d2, st0; \
+ \
+ transpose_4x4(a0, b0, c0, d0, d2, d3); \
+ transpose_4x4(a1, b1, c1, d1, d2, d3); \
+ vmovdqu st0, d2; \
+ vmovdqu st1, d3; \
+ \
+ vmovdqu b0, st0; \
+ vmovdqu b1, st1; \
+ transpose_4x4(a2, b2, c2, d2, b0, b1); \
+ transpose_4x4(a3, b3, c3, d3, b0, b1); \
+ vmovdqu st0, b0; \
+ vmovdqu st1, b1; \
+ /* does not adjust output bytes inside vectors */
+
+#define debyteslice_16x16b(a0, b0, c0, d0, \
+ a1, b1, c1, d1, \
+ a2, b2, c2, d2, \
+ a3, b3, c3, d3, \
+ st0, st1) \
+ vmovdqu d2, st0; \
+ vmovdqu d3, st1; \
+ transpose_4x4(a0, a1, a2, a3, d2, d3); \
+ transpose_4x4(b0, b1, b2, b3, d2, d3); \
+ vmovdqu st0, d2; \
+ vmovdqu st1, d3; \
+ \
+ vmovdqu a0, st0; \
+ vmovdqu a1, st1; \
+ transpose_4x4(c0, c1, c2, c3, a0, a1); \
+ transpose_4x4(d0, d1, d2, d3, a0, a1); \
+ \
+ vbroadcasti128 .Lshufb_16x16b, a0; \
+ vmovdqu st1, a1; \
+ vpshufb a0, a2, a2; \
+ vpshufb a0, a3, a3; \
+ vpshufb a0, b0, b0; \
+ vpshufb a0, b1, b1; \
+ vpshufb a0, b2, b2; \
+ vpshufb a0, b3, b3; \
+ vpshufb a0, a1, a1; \
+ vpshufb a0, c0, c0; \
+ vpshufb a0, c1, c1; \
+ vpshufb a0, c2, c2; \
+ vpshufb a0, c3, c3; \
+ vpshufb a0, d0, d0; \
+ vpshufb a0, d1, d1; \
+ vpshufb a0, d2, d2; \
+ vpshufb a0, d3, d3; \
+ vmovdqu d3, st1; \
+ vmovdqu st0, d3; \
+ vpshufb a0, d3, a0; \
+ vmovdqu d2, st0; \
+ \
+ transpose_4x4(c0, d0, a0, b0, d2, d3); \
+ transpose_4x4(c1, d1, a1, b1, d2, d3); \
+ vmovdqu st0, d2; \
+ vmovdqu st1, d3; \
+ \
+ vmovdqu b0, st0; \
+ vmovdqu b1, st1; \
+ transpose_4x4(c2, d2, a2, b2, b0, b1); \
+ transpose_4x4(c3, d3, a3, b3, b0, b1); \
+ vmovdqu st0, b0; \
+ vmovdqu st1, b1; \
+ /* does not adjust output bytes inside vectors */
+
+/* load blocks to registers and apply pre-whitening */
+#define inpack16_pre(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ rio) \
+ vmovdqu (0 * 32)(rio), x0; \
+ vmovdqu (1 * 32)(rio), x1; \
+ vmovdqu (2 * 32)(rio), x2; \
+ vmovdqu (3 * 32)(rio), x3; \
+ vmovdqu (4 * 32)(rio), x4; \
+ vmovdqu (5 * 32)(rio), x5; \
+ vmovdqu (6 * 32)(rio), x6; \
+ vmovdqu (7 * 32)(rio), x7; \
+ vmovdqu (8 * 32)(rio), y0; \
+ vmovdqu (9 * 32)(rio), y1; \
+ vmovdqu (10 * 32)(rio), y2; \
+ vmovdqu (11 * 32)(rio), y3; \
+ vmovdqu (12 * 32)(rio), y4; \
+ vmovdqu (13 * 32)(rio), y5; \
+ vmovdqu (14 * 32)(rio), y6; \
+ vmovdqu (15 * 32)(rio), y7;
+
+/* byteslice pre-whitened blocks and store to temporary memory */
+#define inpack16_post(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_ab, mem_cd) \
+ byteslice_16x16b(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ (mem_ab), (mem_cd)); \
+ \
+ vmovdqu x0, 0 * 32(mem_ab); \
+ vmovdqu x1, 1 * 32(mem_ab); \
+ vmovdqu x2, 2 * 32(mem_ab); \
+ vmovdqu x3, 3 * 32(mem_ab); \
+ vmovdqu x4, 4 * 32(mem_ab); \
+ vmovdqu x5, 5 * 32(mem_ab); \
+ vmovdqu x6, 6 * 32(mem_ab); \
+ vmovdqu x7, 7 * 32(mem_ab); \
+ vmovdqu y0, 0 * 32(mem_cd); \
+ vmovdqu y1, 1 * 32(mem_cd); \
+ vmovdqu y2, 2 * 32(mem_cd); \
+ vmovdqu y3, 3 * 32(mem_cd); \
+ vmovdqu y4, 4 * 32(mem_cd); \
+ vmovdqu y5, 5 * 32(mem_cd); \
+ vmovdqu y6, 6 * 32(mem_cd); \
+ vmovdqu y7, 7 * 32(mem_cd);
+
+#define write_output(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem) \
+ vmovdqu x0, 0 * 32(mem); \
+ vmovdqu x1, 1 * 32(mem); \
+ vmovdqu x2, 2 * 32(mem); \
+ vmovdqu x3, 3 * 32(mem); \
+ vmovdqu x4, 4 * 32(mem); \
+ vmovdqu x5, 5 * 32(mem); \
+ vmovdqu x6, 6 * 32(mem); \
+ vmovdqu x7, 7 * 32(mem); \
+ vmovdqu y0, 8 * 32(mem); \
+ vmovdqu y1, 9 * 32(mem); \
+ vmovdqu y2, 10 * 32(mem); \
+ vmovdqu y3, 11 * 32(mem); \
+ vmovdqu y4, 12 * 32(mem); \
+ vmovdqu y5, 13 * 32(mem); \
+ vmovdqu y6, 14 * 32(mem); \
+ vmovdqu y7, 15 * 32(mem); \
+
+#define aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, idx) \
+ vmovdqu x0, ((idx + 0) * 32)(mem_tmp); \
+ vmovdqu x1, ((idx + 1) * 32)(mem_tmp); \
+ vmovdqu x2, ((idx + 2) * 32)(mem_tmp); \
+ vmovdqu x3, ((idx + 3) * 32)(mem_tmp); \
+ vmovdqu x4, ((idx + 4) * 32)(mem_tmp); \
+ vmovdqu x5, ((idx + 5) * 32)(mem_tmp); \
+ vmovdqu x6, ((idx + 6) * 32)(mem_tmp); \
+ vmovdqu x7, ((idx + 7) * 32)(mem_tmp);
+
+#define aria_load_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, idx) \
+ vmovdqu ((idx + 0) * 32)(mem_tmp), x0; \
+ vmovdqu ((idx + 1) * 32)(mem_tmp), x1; \
+ vmovdqu ((idx + 2) * 32)(mem_tmp), x2; \
+ vmovdqu ((idx + 3) * 32)(mem_tmp), x3; \
+ vmovdqu ((idx + 4) * 32)(mem_tmp), x4; \
+ vmovdqu ((idx + 5) * 32)(mem_tmp), x5; \
+ vmovdqu ((idx + 6) * 32)(mem_tmp), x6; \
+ vmovdqu ((idx + 7) * 32)(mem_tmp), x7;
+
+#define aria_ark_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ t0, rk, idx, round) \
+ /* AddRoundKey */ \
+ vpbroadcastb ((round * 16) + idx + 3)(rk), t0; \
+ vpxor t0, x0, x0; \
+ vpbroadcastb ((round * 16) + idx + 2)(rk), t0; \
+ vpxor t0, x1, x1; \
+ vpbroadcastb ((round * 16) + idx + 1)(rk), t0; \
+ vpxor t0, x2, x2; \
+ vpbroadcastb ((round * 16) + idx + 0)(rk), t0; \
+ vpxor t0, x3, x3; \
+ vpbroadcastb ((round * 16) + idx + 7)(rk), t0; \
+ vpxor t0, x4, x4; \
+ vpbroadcastb ((round * 16) + idx + 6)(rk), t0; \
+ vpxor t0, x5, x5; \
+ vpbroadcastb ((round * 16) + idx + 5)(rk), t0; \
+ vpxor t0, x6, x6; \
+ vpbroadcastb ((round * 16) + idx + 4)(rk), t0; \
+ vpxor t0, x7, x7;
+
+#ifdef CONFIG_AS_GFNI
+#define aria_sbox_8way_gfni(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ t0, t1, t2, t3, \
+ t4, t5, t6, t7) \
+ vpbroadcastq .Ltf_s2_bitmatrix, t0; \
+ vpbroadcastq .Ltf_inv_bitmatrix, t1; \
+ vpbroadcastq .Ltf_id_bitmatrix, t2; \
+ vpbroadcastq .Ltf_aff_bitmatrix, t3; \
+ vpbroadcastq .Ltf_x2_bitmatrix, t4; \
+ vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
+ vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
+ vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
+ vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \
+ vgf2p8affineinvqb $0, t2, x2, x2; \
+ vgf2p8affineinvqb $0, t2, x6, x6; \
+ vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \
+ vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \
+ vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \
+ vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \
+ vgf2p8affineinvqb $0, t2, x3, x3; \
+ vgf2p8affineinvqb $0, t2, x7, x7
+
+#endif /* CONFIG_AS_GFNI */
+#define aria_sbox_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ t0, t1, t2, t3, \
+ t4, t5, t6, t7) \
+ vpxor t7, t7, t7; \
+ vpxor t6, t6, t6; \
+ vbroadcasti128 .Linv_shift_row, t0; \
+ vbroadcasti128 .Lshift_row, t1; \
+ vbroadcasti128 .Ltf_lo__inv_aff__and__s2, t2; \
+ vbroadcasti128 .Ltf_hi__inv_aff__and__s2, t3; \
+ vbroadcasti128 .Ltf_lo__x2__and__fwd_aff, t4; \
+ vbroadcasti128 .Ltf_hi__x2__and__fwd_aff, t5; \
+ \
+ vextracti128 $1, x0, t6##_x; \
+ vaesenclast t7##_x, x0##_x, x0##_x; \
+ vaesenclast t7##_x, t6##_x, t6##_x; \
+ vinserti128 $1, t6##_x, x0, x0; \
+ \
+ vextracti128 $1, x4, t6##_x; \
+ vaesenclast t7##_x, x4##_x, x4##_x; \
+ vaesenclast t7##_x, t6##_x, t6##_x; \
+ vinserti128 $1, t6##_x, x4, x4; \
+ \
+ vextracti128 $1, x1, t6##_x; \
+ vaesenclast t7##_x, x1##_x, x1##_x; \
+ vaesenclast t7##_x, t6##_x, t6##_x; \
+ vinserti128 $1, t6##_x, x1, x1; \
+ \
+ vextracti128 $1, x5, t6##_x; \
+ vaesenclast t7##_x, x5##_x, x5##_x; \
+ vaesenclast t7##_x, t6##_x, t6##_x; \
+ vinserti128 $1, t6##_x, x5, x5; \
+ \
+ vextracti128 $1, x2, t6##_x; \
+ vaesdeclast t7##_x, x2##_x, x2##_x; \
+ vaesdeclast t7##_x, t6##_x, t6##_x; \
+ vinserti128 $1, t6##_x, x2, x2; \
+ \
+ vextracti128 $1, x6, t6##_x; \
+ vaesdeclast t7##_x, x6##_x, x6##_x; \
+ vaesdeclast t7##_x, t6##_x, t6##_x; \
+ vinserti128 $1, t6##_x, x6, x6; \
+ \
+ vpbroadcastd .L0f0f0f0f, t6; \
+ \
+ /* AES inverse shift rows */ \
+ vpshufb t0, x0, x0; \
+ vpshufb t0, x4, x4; \
+ vpshufb t0, x1, x1; \
+ vpshufb t0, x5, x5; \
+ vpshufb t1, x3, x3; \
+ vpshufb t1, x7, x7; \
+ vpshufb t1, x2, x2; \
+ vpshufb t1, x6, x6; \
+ \
+ /* affine transformation for S2 */ \
+ filter_8bit(x1, t2, t3, t6, t0); \
+ /* affine transformation for S2 */ \
+ filter_8bit(x5, t2, t3, t6, t0); \
+ \
+ /* affine transformation for X2 */ \
+ filter_8bit(x3, t4, t5, t6, t0); \
+ /* affine transformation for X2 */ \
+ filter_8bit(x7, t4, t5, t6, t0); \
+ \
+ vpxor t6, t6, t6; \
+ vextracti128 $1, x3, t6##_x; \
+ vaesdeclast t7##_x, x3##_x, x3##_x; \
+ vaesdeclast t7##_x, t6##_x, t6##_x; \
+ vinserti128 $1, t6##_x, x3, x3; \
+ \
+ vextracti128 $1, x7, t6##_x; \
+ vaesdeclast t7##_x, x7##_x, x7##_x; \
+ vaesdeclast t7##_x, t6##_x, t6##_x; \
+ vinserti128 $1, t6##_x, x7, x7; \
+
+#define aria_diff_m(x0, x1, x2, x3, \
+ t0, t1, t2, t3) \
+ /* T = rotr32(X, 8); */ \
+ /* X ^= T */ \
+ vpxor x0, x3, t0; \
+ vpxor x1, x0, t1; \
+ vpxor x2, x1, t2; \
+ vpxor x3, x2, t3; \
+ /* X = T ^ rotr(X, 16); */ \
+ vpxor t2, x0, x0; \
+ vpxor x1, t3, t3; \
+ vpxor t0, x2, x2; \
+ vpxor t1, x3, x1; \
+ vmovdqu t3, x3;
+
+#define aria_diff_word(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7) \
+ /* t1 ^= t2; */ \
+ vpxor y0, x4, x4; \
+ vpxor y1, x5, x5; \
+ vpxor y2, x6, x6; \
+ vpxor y3, x7, x7; \
+ \
+ /* t2 ^= t3; */ \
+ vpxor y4, y0, y0; \
+ vpxor y5, y1, y1; \
+ vpxor y6, y2, y2; \
+ vpxor y7, y3, y3; \
+ \
+ /* t0 ^= t1; */ \
+ vpxor x4, x0, x0; \
+ vpxor x5, x1, x1; \
+ vpxor x6, x2, x2; \
+ vpxor x7, x3, x3; \
+ \
+ /* t3 ^= t1; */ \
+ vpxor x4, y4, y4; \
+ vpxor x5, y5, y5; \
+ vpxor x6, y6, y6; \
+ vpxor x7, y7, y7; \
+ \
+ /* t2 ^= t0; */ \
+ vpxor x0, y0, y0; \
+ vpxor x1, y1, y1; \
+ vpxor x2, y2, y2; \
+ vpxor x3, y3, y3; \
+ \
+ /* t1 ^= t2; */ \
+ vpxor y0, x4, x4; \
+ vpxor y1, x5, x5; \
+ vpxor y2, x6, x6; \
+ vpxor y3, x7, x7;
+
+#define aria_fe(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, rk, round) \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 8, round); \
+ \
+ aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \
+ y0, y1, y2, y3, y4, y5, y6, y7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \
+ aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 8); \
+ \
+ aria_load_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 0, round); \
+ \
+ aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \
+ y0, y1, y2, y3, y4, y5, y6, y7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \
+ aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_load_state_8way(y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, 8); \
+ aria_diff_word(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ /* aria_diff_byte() \
+ * T3 = ABCD -> BADC \
+ * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \
+ * T0 = ABCD -> CDAB \
+ * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \
+ * T1 = ABCD -> DCBA \
+ * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \
+ */ \
+ aria_diff_word(x2, x3, x0, x1, \
+ x7, x6, x5, x4, \
+ y0, y1, y2, y3, \
+ y5, y4, y7, y6); \
+ aria_store_state_8way(x3, x2, x1, x0, \
+ x6, x7, x4, x5, \
+ mem_tmp, 0);
+
+#define aria_fo(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, rk, round) \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 8, round); \
+ \
+ aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, y1, y2, y3, y4, y5, y6, y7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \
+ aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 8); \
+ \
+ aria_load_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 0, round); \
+ \
+ aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, y1, y2, y3, y4, y5, y6, y7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \
+ aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_load_state_8way(y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, 8); \
+ aria_diff_word(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ /* aria_diff_byte() \
+ * T1 = ABCD -> BADC \
+ * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \
+ * T2 = ABCD -> CDAB \
+ * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \
+ * T3 = ABCD -> DCBA \
+ * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \
+ */ \
+ aria_diff_word(x0, x1, x2, x3, \
+ x5, x4, x7, x6, \
+ y2, y3, y0, y1, \
+ y7, y6, y5, y4); \
+ aria_store_state_8way(x3, x2, x1, x0, \
+ x6, x7, x4, x5, \
+ mem_tmp, 0);
+
+#define aria_ff(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, rk, round, last_round) \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 8, round); \
+ \
+ aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \
+ y0, y1, y2, y3, y4, y5, y6, y7); \
+ \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 8, last_round); \
+ \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 8); \
+ \
+ aria_load_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 0, round); \
+ \
+ aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \
+ y0, y1, y2, y3, y4, y5, y6, y7); \
+ \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 0, last_round); \
+ \
+ aria_load_state_8way(y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, 8);
+#ifdef CONFIG_AS_GFNI
+#define aria_fe_gfni(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, rk, round) \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 8, round); \
+ \
+ aria_sbox_8way_gfni(x2, x3, x0, x1, \
+ x6, x7, x4, x5, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \
+ aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 8); \
+ \
+ aria_load_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 0, round); \
+ \
+ aria_sbox_8way_gfni(x2, x3, x0, x1, \
+ x6, x7, x4, x5, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \
+ aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_load_state_8way(y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, 8); \
+ aria_diff_word(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ /* aria_diff_byte() \
+ * T3 = ABCD -> BADC \
+ * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \
+ * T0 = ABCD -> CDAB \
+ * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \
+ * T1 = ABCD -> DCBA \
+ * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \
+ */ \
+ aria_diff_word(x2, x3, x0, x1, \
+ x7, x6, x5, x4, \
+ y0, y1, y2, y3, \
+ y5, y4, y7, y6); \
+ aria_store_state_8way(x3, x2, x1, x0, \
+ x6, x7, x4, x5, \
+ mem_tmp, 0);
+
+#define aria_fo_gfni(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, rk, round) \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 8, round); \
+ \
+ aria_sbox_8way_gfni(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \
+ aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 8); \
+ \
+ aria_load_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 0, round); \
+ \
+ aria_sbox_8way_gfni(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \
+ aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_load_state_8way(y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, 8); \
+ aria_diff_word(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ /* aria_diff_byte() \
+ * T1 = ABCD -> BADC \
+ * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \
+ * T2 = ABCD -> CDAB \
+ * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \
+ * T3 = ABCD -> DCBA \
+ * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \
+ */ \
+ aria_diff_word(x0, x1, x2, x3, \
+ x5, x4, x7, x6, \
+ y2, y3, y0, y1, \
+ y7, y6, y5, y4); \
+ aria_store_state_8way(x3, x2, x1, x0, \
+ x6, x7, x4, x5, \
+ mem_tmp, 0);
+
+#define aria_ff_gfni(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, rk, round, last_round) \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 8, round); \
+ \
+ aria_sbox_8way_gfni(x2, x3, x0, x1, \
+ x6, x7, x4, x5, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 8, last_round); \
+ \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 8); \
+ \
+ aria_load_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 0, round); \
+ \
+ aria_sbox_8way_gfni(x2, x3, x0, x1, \
+ x6, x7, x4, x5, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 0, last_round); \
+ \
+ aria_load_state_8way(y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, 8);
+#endif /* CONFIG_AS_GFNI */
+
+.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32
+.align 32
+#define SHUFB_BYTES(idx) \
+ 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
+.Lshufb_16x16b:
+ .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
+ .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
+
+.section .rodata.cst16, "aM", @progbits, 16
+.align 16
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+ .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+ .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+.Lshift_row:
+ .byte 0x00, 0x05, 0x0a, 0x0f, 0x04, 0x09, 0x0e, 0x03
+ .byte 0x08, 0x0d, 0x02, 0x07, 0x0c, 0x01, 0x06, 0x0b
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
+ .byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
+
+/* AES inverse affine and S2 combined:
+ * 1 1 0 0 0 0 0 1 x0 0
+ * 0 1 0 0 1 0 0 0 x1 0
+ * 1 1 0 0 1 1 1 1 x2 0
+ * 0 1 1 0 1 0 0 1 x3 1
+ * 0 1 0 0 1 1 0 0 * x4 + 0
+ * 0 1 0 1 1 0 0 0 x5 0
+ * 0 0 0 0 0 1 0 1 x6 0
+ * 1 1 1 0 0 1 1 1 x7 1
+ */
+.Ltf_lo__inv_aff__and__s2:
+ .octa 0x92172DA81A9FA520B2370D883ABF8500
+.Ltf_hi__inv_aff__and__s2:
+ .octa 0x2B15FFC1AF917B45E6D8320C625CB688
+
+/* X2 and AES forward affine combined:
+ * 1 0 1 1 0 0 0 1 x0 0
+ * 0 1 1 1 1 0 1 1 x1 0
+ * 0 0 0 1 1 0 1 0 x2 1
+ * 0 1 0 0 0 1 0 0 x3 0
+ * 0 0 1 1 1 0 1 1 * x4 + 0
+ * 0 1 0 0 1 0 0 0 x5 0
+ * 1 1 0 1 0 0 1 1 x6 0
+ * 0 1 0 0 1 0 1 0 x7 0
+ */
+.Ltf_lo__x2__and__fwd_aff:
+ .octa 0xEFAE0544FCBD1657B8F95213ABEA4100
+.Ltf_hi__x2__and__fwd_aff:
+ .octa 0x3F893781E95FE1576CDA64D2BA0CB204
+
+#ifdef CONFIG_AS_GFNI
+.section .rodata.cst8, "aM", @progbits, 8
+.align 8
+/* AES affine: */
+#define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0)
+.Ltf_aff_bitmatrix:
+ .quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1),
+ BV8(1, 1, 0, 0, 0, 1, 1, 1),
+ BV8(1, 1, 1, 0, 0, 0, 1, 1),
+ BV8(1, 1, 1, 1, 0, 0, 0, 1),
+ BV8(1, 1, 1, 1, 1, 0, 0, 0),
+ BV8(0, 1, 1, 1, 1, 1, 0, 0),
+ BV8(0, 0, 1, 1, 1, 1, 1, 0),
+ BV8(0, 0, 0, 1, 1, 1, 1, 1))
+
+/* AES inverse affine: */
+#define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0)
+.Ltf_inv_bitmatrix:
+ .quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1),
+ BV8(1, 0, 0, 1, 0, 0, 1, 0),
+ BV8(0, 1, 0, 0, 1, 0, 0, 1),
+ BV8(1, 0, 1, 0, 0, 1, 0, 0),
+ BV8(0, 1, 0, 1, 0, 0, 1, 0),
+ BV8(0, 0, 1, 0, 1, 0, 0, 1),
+ BV8(1, 0, 0, 1, 0, 1, 0, 0),
+ BV8(0, 1, 0, 0, 1, 0, 1, 0))
+
+/* S2: */
+#define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1)
+.Ltf_s2_bitmatrix:
+ .quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1),
+ BV8(0, 0, 1, 1, 1, 1, 1, 1),
+ BV8(1, 1, 1, 0, 1, 1, 0, 1),
+ BV8(1, 1, 0, 0, 0, 0, 1, 1),
+ BV8(0, 1, 0, 0, 0, 0, 1, 1),
+ BV8(1, 1, 0, 0, 1, 1, 1, 0),
+ BV8(0, 1, 1, 0, 0, 0, 1, 1),
+ BV8(1, 1, 1, 1, 0, 1, 1, 0))
+
+/* X2: */
+#define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0)
+.Ltf_x2_bitmatrix:
+ .quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0),
+ BV8(0, 0, 1, 0, 0, 1, 1, 0),
+ BV8(0, 0, 0, 0, 1, 0, 1, 0),
+ BV8(1, 1, 1, 0, 0, 0, 1, 1),
+ BV8(1, 1, 1, 0, 1, 1, 0, 0),
+ BV8(0, 1, 1, 0, 1, 0, 1, 1),
+ BV8(1, 0, 1, 1, 1, 1, 0, 1),
+ BV8(1, 0, 0, 1, 0, 0, 1, 1))
+
+/* Identity matrix: */
+.Ltf_id_bitmatrix:
+ .quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0),
+ BV8(0, 1, 0, 0, 0, 0, 0, 0),
+ BV8(0, 0, 1, 0, 0, 0, 0, 0),
+ BV8(0, 0, 0, 1, 0, 0, 0, 0),
+ BV8(0, 0, 0, 0, 1, 0, 0, 0),
+ BV8(0, 0, 0, 0, 0, 1, 0, 0),
+ BV8(0, 0, 0, 0, 0, 0, 1, 0),
+ BV8(0, 0, 0, 0, 0, 0, 0, 1))
+
+#endif /* CONFIG_AS_GFNI */
+
+/* 4-bit mask */
+.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
+.align 4
+.L0f0f0f0f:
+ .long 0x0f0f0f0f
+
+.text
+
+SYM_FUNC_START_LOCAL(__aria_aesni_avx2_crypt_32way)
+ /* input:
+ * %r9: rk
+ * %rsi: dst
+ * %rdx: src
+ * %ymm0..%ymm15: byte-sliced blocks
+ */
+
+ FRAME_BEGIN
+
+ movq %rsi, %rax;
+ leaq 8 * 32(%rax), %r8;
+
+ inpack16_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r8);
+ aria_fo(%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 0);
+ aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 1);
+ aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 2);
+ aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 3);
+ aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 4);
+ aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 5);
+ aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 6);
+ aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 7);
+ aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 8);
+ aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 9);
+ aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 10);
+ cmpl $12, ARIA_CTX_rounds(CTX);
+ jne .Laria_192;
+ aria_ff(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 11, 12);
+ jmp .Laria_end;
+.Laria_192:
+ aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 11);
+ aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 12);
+ cmpl $14, ARIA_CTX_rounds(CTX);
+ jne .Laria_256;
+ aria_ff(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 13, 14);
+ jmp .Laria_end;
+.Laria_256:
+ aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 13);
+ aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 14);
+ aria_ff(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 15, 16);
+.Laria_end:
+ debyteslice_16x16b(%ymm8, %ymm12, %ymm1, %ymm4,
+ %ymm9, %ymm13, %ymm0, %ymm5,
+ %ymm10, %ymm14, %ymm3, %ymm6,
+ %ymm11, %ymm15, %ymm2, %ymm7,
+ (%rax), (%r8));
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(__aria_aesni_avx2_crypt_32way)
+
+SYM_TYPED_FUNC_START(aria_aesni_avx2_encrypt_32way)
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ FRAME_BEGIN
+
+ leaq ARIA_CTX_enc_key(CTX), %r9;
+
+ inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rdx);
+
+ call __aria_aesni_avx2_crypt_32way;
+
+ write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax);
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(aria_aesni_avx2_encrypt_32way)
+
+SYM_TYPED_FUNC_START(aria_aesni_avx2_decrypt_32way)
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ FRAME_BEGIN
+
+ leaq ARIA_CTX_dec_key(CTX), %r9;
+
+ inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rdx);
+
+ call __aria_aesni_avx2_crypt_32way;
+
+ write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax);
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(aria_aesni_avx2_decrypt_32way)
+
+SYM_FUNC_START_LOCAL(__aria_aesni_avx2_ctr_gen_keystream_32way)
+ /* input:
+ * %rdi: ctx
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: keystream
+ * %r8: iv (big endian, 128bit)
+ */
+
+ FRAME_BEGIN
+ movq 8(%r8), %r11;
+ bswapq %r11;
+
+ vbroadcasti128 .Lbswap128_mask (%rip), %ymm6;
+ vpcmpeqd %ymm0, %ymm0, %ymm0;
+ vpsrldq $8, %ymm0, %ymm0; /* ab: -1:0 ; cd: -1:0 */
+ vpaddq %ymm0, %ymm0, %ymm5; /* ab: -2:0 ; cd: -2:0 */
+
+ /* load IV and byteswap */
+ vmovdqu (%r8), %xmm7;
+ vpshufb %xmm6, %xmm7, %xmm7;
+ vmovdqa %xmm7, %xmm3;
+ inc_le128(%xmm7, %xmm0, %xmm4);
+ vinserti128 $1, %xmm7, %ymm3, %ymm3;
+ vpshufb %ymm6, %ymm3, %ymm8; /* +1 ; +0 */
+
+ /* check need for handling 64-bit overflow and carry */
+ cmpq $(0xffffffffffffffff - 32), %r11;
+ ja .Lhandle_ctr_carry;
+
+ /* construct IVs */
+ vpsubq %ymm5, %ymm3, %ymm3; /* +3 ; +2 */
+ vpshufb %ymm6, %ymm3, %ymm9;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +5 ; +4 */
+ vpshufb %ymm6, %ymm3, %ymm10;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +7 ; +6 */
+ vpshufb %ymm6, %ymm3, %ymm11;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +9 ; +8 */
+ vpshufb %ymm6, %ymm3, %ymm12;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +11 ; +10 */
+ vpshufb %ymm6, %ymm3, %ymm13;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +13 ; +12 */
+ vpshufb %ymm6, %ymm3, %ymm14;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +15 ; +14 */
+ vpshufb %ymm6, %ymm3, %ymm15;
+ vmovdqu %ymm8, (0 * 32)(%rcx);
+ vmovdqu %ymm9, (1 * 32)(%rcx);
+ vmovdqu %ymm10, (2 * 32)(%rcx);
+ vmovdqu %ymm11, (3 * 32)(%rcx);
+ vmovdqu %ymm12, (4 * 32)(%rcx);
+ vmovdqu %ymm13, (5 * 32)(%rcx);
+ vmovdqu %ymm14, (6 * 32)(%rcx);
+ vmovdqu %ymm15, (7 * 32)(%rcx);
+
+ vpsubq %ymm5, %ymm3, %ymm3; /* +17 ; +16 */
+ vpshufb %ymm6, %ymm3, %ymm8;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +19 ; +18 */
+ vpshufb %ymm6, %ymm3, %ymm9;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +21 ; +20 */
+ vpshufb %ymm6, %ymm3, %ymm10;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +23 ; +22 */
+ vpshufb %ymm6, %ymm3, %ymm11;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +25 ; +24 */
+ vpshufb %ymm6, %ymm3, %ymm12;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +27 ; +26 */
+ vpshufb %ymm6, %ymm3, %ymm13;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +29 ; +28 */
+ vpshufb %ymm6, %ymm3, %ymm14;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +31 ; +30 */
+ vpshufb %ymm6, %ymm3, %ymm15;
+ vpsubq %ymm5, %ymm3, %ymm3; /* +32 */
+ vpshufb %xmm6, %xmm3, %xmm3;
+ vmovdqu %xmm3, (%r8);
+ vmovdqu (0 * 32)(%rcx), %ymm0;
+ vmovdqu (1 * 32)(%rcx), %ymm1;
+ vmovdqu (2 * 32)(%rcx), %ymm2;
+ vmovdqu (3 * 32)(%rcx), %ymm3;
+ vmovdqu (4 * 32)(%rcx), %ymm4;
+ vmovdqu (5 * 32)(%rcx), %ymm5;
+ vmovdqu (6 * 32)(%rcx), %ymm6;
+ vmovdqu (7 * 32)(%rcx), %ymm7;
+ jmp .Lctr_carry_done;
+
+ .Lhandle_ctr_carry:
+ /* construct IVs */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm9; /* +3 ; +2 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm10; /* +5 ; +4 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm11; /* +7 ; +6 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm12; /* +9 ; +8 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm13; /* +11 ; +10 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm14; /* +13 ; +12 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm15; /* +15 ; +14 */
+ vmovdqu %ymm8, (0 * 32)(%rcx);
+ vmovdqu %ymm9, (1 * 32)(%rcx);
+ vmovdqu %ymm10, (2 * 32)(%rcx);
+ vmovdqu %ymm11, (3 * 32)(%rcx);
+ vmovdqu %ymm12, (4 * 32)(%rcx);
+ vmovdqu %ymm13, (5 * 32)(%rcx);
+ vmovdqu %ymm14, (6 * 32)(%rcx);
+ vmovdqu %ymm15, (7 * 32)(%rcx);
+
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm8; /* +17 ; +16 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm9; /* +19 ; +18 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm10; /* +21 ; +20 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm11; /* +23 ; +22 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm12; /* +25 ; +24 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm13; /* +27 ; +26 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm14; /* +29 ; +28 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vpshufb %ymm6, %ymm3, %ymm15; /* +31 ; +30 */
+ inc_le128(%ymm3, %ymm0, %ymm4);
+ vextracti128 $1, %ymm3, %xmm3;
+ vpshufb %xmm6, %xmm3, %xmm3; /* +32 */
+ vmovdqu %xmm3, (%r8);
+ vmovdqu (0 * 32)(%rcx), %ymm0;
+ vmovdqu (1 * 32)(%rcx), %ymm1;
+ vmovdqu (2 * 32)(%rcx), %ymm2;
+ vmovdqu (3 * 32)(%rcx), %ymm3;
+ vmovdqu (4 * 32)(%rcx), %ymm4;
+ vmovdqu (5 * 32)(%rcx), %ymm5;
+ vmovdqu (6 * 32)(%rcx), %ymm6;
+ vmovdqu (7 * 32)(%rcx), %ymm7;
+
+ .Lctr_carry_done:
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(__aria_aesni_avx2_ctr_gen_keystream_32way)
+
+SYM_TYPED_FUNC_START(aria_aesni_avx2_ctr_crypt_32way)
+ /* input:
+ * %rdi: ctx
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: keystream
+ * %r8: iv (big endian, 128bit)
+ */
+ FRAME_BEGIN
+
+ call __aria_aesni_avx2_ctr_gen_keystream_32way;
+
+ leaq (%rsi), %r10;
+ leaq (%rdx), %r11;
+ leaq (%rcx), %rsi;
+ leaq (%rcx), %rdx;
+ leaq ARIA_CTX_enc_key(CTX), %r9;
+
+ call __aria_aesni_avx2_crypt_32way;
+
+ vpxor (0 * 32)(%r11), %ymm1, %ymm1;
+ vpxor (1 * 32)(%r11), %ymm0, %ymm0;
+ vpxor (2 * 32)(%r11), %ymm3, %ymm3;
+ vpxor (3 * 32)(%r11), %ymm2, %ymm2;
+ vpxor (4 * 32)(%r11), %ymm4, %ymm4;
+ vpxor (5 * 32)(%r11), %ymm5, %ymm5;
+ vpxor (6 * 32)(%r11), %ymm6, %ymm6;
+ vpxor (7 * 32)(%r11), %ymm7, %ymm7;
+ vpxor (8 * 32)(%r11), %ymm8, %ymm8;
+ vpxor (9 * 32)(%r11), %ymm9, %ymm9;
+ vpxor (10 * 32)(%r11), %ymm10, %ymm10;
+ vpxor (11 * 32)(%r11), %ymm11, %ymm11;
+ vpxor (12 * 32)(%r11), %ymm12, %ymm12;
+ vpxor (13 * 32)(%r11), %ymm13, %ymm13;
+ vpxor (14 * 32)(%r11), %ymm14, %ymm14;
+ vpxor (15 * 32)(%r11), %ymm15, %ymm15;
+ write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %r10);
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(aria_aesni_avx2_ctr_crypt_32way)
+
+#ifdef CONFIG_AS_GFNI
+SYM_FUNC_START_LOCAL(__aria_aesni_avx2_gfni_crypt_32way)
+ /* input:
+ * %r9: rk
+ * %rsi: dst
+ * %rdx: src
+ * %ymm0..%ymm15: 16 byte-sliced blocks
+ */
+
+ FRAME_BEGIN
+
+ movq %rsi, %rax;
+ leaq 8 * 32(%rax), %r8;
+
+ inpack16_post(%ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r8);
+ aria_fo_gfni(%ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 0);
+ aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 1);
+ aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10,
+ %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 2);
+ aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 3);
+ aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10,
+ %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 4);
+ aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 5);
+ aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10,
+ %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 6);
+ aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 7);
+ aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10,
+ %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 8);
+ aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 9);
+ aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10,
+ %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 10);
+ cmpl $12, ARIA_CTX_rounds(CTX);
+ jne .Laria_gfni_192;
+ aria_ff_gfni(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 11, 12);
+ jmp .Laria_gfni_end;
+.Laria_gfni_192:
+ aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 11);
+ aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10,
+ %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 12);
+ cmpl $14, ARIA_CTX_rounds(CTX);
+ jne .Laria_gfni_256;
+ aria_ff_gfni(%ymm1, %ymm0, %ymm3, %ymm2,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 13, 14);
+ jmp .Laria_gfni_end;
+.Laria_gfni_256:
+ aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 13);
+ aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10,
+ %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 14);
+ aria_ff_gfni(%ymm1, %ymm0, %ymm3, %ymm2,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r9, 15, 16);
+.Laria_gfni_end:
+ debyteslice_16x16b(%ymm8, %ymm12, %ymm1, %ymm4,
+ %ymm9, %ymm13, %ymm0, %ymm5,
+ %ymm10, %ymm14, %ymm3, %ymm6,
+ %ymm11, %ymm15, %ymm2, %ymm7,
+ (%rax), (%r8));
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(__aria_aesni_avx2_gfni_crypt_32way)
+
+SYM_TYPED_FUNC_START(aria_aesni_avx2_gfni_encrypt_32way)
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ FRAME_BEGIN
+
+ leaq ARIA_CTX_enc_key(CTX), %r9;
+
+ inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rdx);
+
+ call __aria_aesni_avx2_gfni_crypt_32way;
+
+ write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax);
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(aria_aesni_avx2_gfni_encrypt_32way)
+
+SYM_TYPED_FUNC_START(aria_aesni_avx2_gfni_decrypt_32way)
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ FRAME_BEGIN
+
+ leaq ARIA_CTX_dec_key(CTX), %r9;
+
+ inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rdx);
+
+ call __aria_aesni_avx2_gfni_crypt_32way;
+
+ write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax);
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(aria_aesni_avx2_gfni_decrypt_32way)
+
+SYM_TYPED_FUNC_START(aria_aesni_avx2_gfni_ctr_crypt_32way)
+ /* input:
+ * %rdi: ctx
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: keystream
+ * %r8: iv (big endian, 128bit)
+ */
+ FRAME_BEGIN
+
+ call __aria_aesni_avx2_ctr_gen_keystream_32way
+
+ leaq (%rsi), %r10;
+ leaq (%rdx), %r11;
+ leaq (%rcx), %rsi;
+ leaq (%rcx), %rdx;
+ leaq ARIA_CTX_enc_key(CTX), %r9;
+
+ call __aria_aesni_avx2_gfni_crypt_32way;
+
+ vpxor (0 * 32)(%r11), %ymm1, %ymm1;
+ vpxor (1 * 32)(%r11), %ymm0, %ymm0;
+ vpxor (2 * 32)(%r11), %ymm3, %ymm3;
+ vpxor (3 * 32)(%r11), %ymm2, %ymm2;
+ vpxor (4 * 32)(%r11), %ymm4, %ymm4;
+ vpxor (5 * 32)(%r11), %ymm5, %ymm5;
+ vpxor (6 * 32)(%r11), %ymm6, %ymm6;
+ vpxor (7 * 32)(%r11), %ymm7, %ymm7;
+ vpxor (8 * 32)(%r11), %ymm8, %ymm8;
+ vpxor (9 * 32)(%r11), %ymm9, %ymm9;
+ vpxor (10 * 32)(%r11), %ymm10, %ymm10;
+ vpxor (11 * 32)(%r11), %ymm11, %ymm11;
+ vpxor (12 * 32)(%r11), %ymm12, %ymm12;
+ vpxor (13 * 32)(%r11), %ymm13, %ymm13;
+ vpxor (14 * 32)(%r11), %ymm14, %ymm14;
+ vpxor (15 * 32)(%r11), %ymm15, %ymm15;
+ write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %r10);
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(aria_aesni_avx2_gfni_ctr_crypt_32way)
+#endif /* CONFIG_AS_GFNI */
diff --git a/arch/x86/crypto/aria-avx.h b/arch/x86/crypto/aria-avx.h
index 01e9a01dc157..6e1b2d8a31ed 100644
--- a/arch/x86/crypto/aria-avx.h
+++ b/arch/x86/crypto/aria-avx.h
@@ -5,12 +5,58 @@
#include <linux/types.h>
#define ARIA_AESNI_PARALLEL_BLOCKS 16
-#define ARIA_AESNI_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * 16)
+#define ARIA_AESNI_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_AESNI_PARALLEL_BLOCKS)
+
+#define ARIA_AESNI_AVX2_PARALLEL_BLOCKS 32
+#define ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_AESNI_AVX2_PARALLEL_BLOCKS)
+
+#define ARIA_GFNI_AVX512_PARALLEL_BLOCKS 64
+#define ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_GFNI_AVX512_PARALLEL_BLOCKS)
+
+asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst,
+ const u8 *src,
+ u8 *keystream, u8 *iv);
+asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst,
+ const u8 *src,
+ u8 *keystream, u8 *iv);
+
+asmlinkage void aria_aesni_avx2_encrypt_32way(const void *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void aria_aesni_avx2_decrypt_32way(const void *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void aria_aesni_avx2_ctr_crypt_32way(const void *ctx, u8 *dst,
+ const u8 *src,
+ u8 *keystream, u8 *iv);
+asmlinkage void aria_aesni_avx2_gfni_encrypt_32way(const void *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void aria_aesni_avx2_gfni_decrypt_32way(const void *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void aria_aesni_avx2_gfni_ctr_crypt_32way(const void *ctx, u8 *dst,
+ const u8 *src,
+ u8 *keystream, u8 *iv);
struct aria_avx_ops {
void (*aria_encrypt_16way)(const void *ctx, u8 *dst, const u8 *src);
void (*aria_decrypt_16way)(const void *ctx, u8 *dst, const u8 *src);
void (*aria_ctr_crypt_16way)(const void *ctx, u8 *dst, const u8 *src,
u8 *keystream, u8 *iv);
+ void (*aria_encrypt_32way)(const void *ctx, u8 *dst, const u8 *src);
+ void (*aria_decrypt_32way)(const void *ctx, u8 *dst, const u8 *src);
+ void (*aria_ctr_crypt_32way)(const void *ctx, u8 *dst, const u8 *src,
+ u8 *keystream, u8 *iv);
+ void (*aria_encrypt_64way)(const void *ctx, u8 *dst, const u8 *src);
+ void (*aria_decrypt_64way)(const void *ctx, u8 *dst, const u8 *src);
+ void (*aria_ctr_crypt_64way)(const void *ctx, u8 *dst, const u8 *src,
+ u8 *keystream, u8 *iv);
+
+
};
#endif
diff --git a/arch/x86/crypto/aria-gfni-avx512-asm_64.S b/arch/x86/crypto/aria-gfni-avx512-asm_64.S
new file mode 100644
index 000000000000..3193f0701450
--- /dev/null
+++ b/arch/x86/crypto/aria-gfni-avx512-asm_64.S
@@ -0,0 +1,971 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * ARIA Cipher 64-way parallel algorithm (AVX512)
+ *
+ * Copyright (c) 2022 Taehee Yoo <[email protected]>
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+#include <asm/asm-offsets.h>
+#include <linux/cfi_types.h>
+
+/* register macros */
+#define CTX %rdi
+
+
+#define BV8(a0, a1, a2, a3, a4, a5, a6, a7) \
+ ( (((a0) & 1) << 0) | \
+ (((a1) & 1) << 1) | \
+ (((a2) & 1) << 2) | \
+ (((a3) & 1) << 3) | \
+ (((a4) & 1) << 4) | \
+ (((a5) & 1) << 5) | \
+ (((a6) & 1) << 6) | \
+ (((a7) & 1) << 7) )
+
+#define BM8X8(l0, l1, l2, l3, l4, l5, l6, l7) \
+ ( ((l7) << (0 * 8)) | \
+ ((l6) << (1 * 8)) | \
+ ((l5) << (2 * 8)) | \
+ ((l4) << (3 * 8)) | \
+ ((l3) << (4 * 8)) | \
+ ((l2) << (5 * 8)) | \
+ ((l1) << (6 * 8)) | \
+ ((l0) << (7 * 8)) )
+
+#define add_le128(out, in, lo_counter, hi_counter1) \
+ vpaddq lo_counter, in, out; \
+ vpcmpuq $1, lo_counter, out, %k1; \
+ kaddb %k1, %k1, %k1; \
+ vpaddq hi_counter1, out, out{%k1};
+
+#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpandq x, mask4bit, tmp0; \
+ vpandqn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxorq tmp0, x, x;
+
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+#define byteslice_16x16b(a0, b0, c0, d0, \
+ a1, b1, c1, d1, \
+ a2, b2, c2, d2, \
+ a3, b3, c3, d3, \
+ st0, st1) \
+ vmovdqu64 d2, st0; \
+ vmovdqu64 d3, st1; \
+ transpose_4x4(a0, a1, a2, a3, d2, d3); \
+ transpose_4x4(b0, b1, b2, b3, d2, d3); \
+ vmovdqu64 st0, d2; \
+ vmovdqu64 st1, d3; \
+ \
+ vmovdqu64 a0, st0; \
+ vmovdqu64 a1, st1; \
+ transpose_4x4(c0, c1, c2, c3, a0, a1); \
+ transpose_4x4(d0, d1, d2, d3, a0, a1); \
+ \
+ vbroadcasti64x2 .Lshufb_16x16b, a0; \
+ vmovdqu64 st1, a1; \
+ vpshufb a0, a2, a2; \
+ vpshufb a0, a3, a3; \
+ vpshufb a0, b0, b0; \
+ vpshufb a0, b1, b1; \
+ vpshufb a0, b2, b2; \
+ vpshufb a0, b3, b3; \
+ vpshufb a0, a1, a1; \
+ vpshufb a0, c0, c0; \
+ vpshufb a0, c1, c1; \
+ vpshufb a0, c2, c2; \
+ vpshufb a0, c3, c3; \
+ vpshufb a0, d0, d0; \
+ vpshufb a0, d1, d1; \
+ vpshufb a0, d2, d2; \
+ vpshufb a0, d3, d3; \
+ vmovdqu64 d3, st1; \
+ vmovdqu64 st0, d3; \
+ vpshufb a0, d3, a0; \
+ vmovdqu64 d2, st0; \
+ \
+ transpose_4x4(a0, b0, c0, d0, d2, d3); \
+ transpose_4x4(a1, b1, c1, d1, d2, d3); \
+ vmovdqu64 st0, d2; \
+ vmovdqu64 st1, d3; \
+ \
+ vmovdqu64 b0, st0; \
+ vmovdqu64 b1, st1; \
+ transpose_4x4(a2, b2, c2, d2, b0, b1); \
+ transpose_4x4(a3, b3, c3, d3, b0, b1); \
+ vmovdqu64 st0, b0; \
+ vmovdqu64 st1, b1; \
+ /* does not adjust output bytes inside vectors */
+
+#define debyteslice_16x16b(a0, b0, c0, d0, \
+ a1, b1, c1, d1, \
+ a2, b2, c2, d2, \
+ a3, b3, c3, d3, \
+ st0, st1) \
+ vmovdqu64 d2, st0; \
+ vmovdqu64 d3, st1; \
+ transpose_4x4(a0, a1, a2, a3, d2, d3); \
+ transpose_4x4(b0, b1, b2, b3, d2, d3); \
+ vmovdqu64 st0, d2; \
+ vmovdqu64 st1, d3; \
+ \
+ vmovdqu64 a0, st0; \
+ vmovdqu64 a1, st1; \
+ transpose_4x4(c0, c1, c2, c3, a0, a1); \
+ transpose_4x4(d0, d1, d2, d3, a0, a1); \
+ \
+ vbroadcasti64x2 .Lshufb_16x16b, a0; \
+ vmovdqu64 st1, a1; \
+ vpshufb a0, a2, a2; \
+ vpshufb a0, a3, a3; \
+ vpshufb a0, b0, b0; \
+ vpshufb a0, b1, b1; \
+ vpshufb a0, b2, b2; \
+ vpshufb a0, b3, b3; \
+ vpshufb a0, a1, a1; \
+ vpshufb a0, c0, c0; \
+ vpshufb a0, c1, c1; \
+ vpshufb a0, c2, c2; \
+ vpshufb a0, c3, c3; \
+ vpshufb a0, d0, d0; \
+ vpshufb a0, d1, d1; \
+ vpshufb a0, d2, d2; \
+ vpshufb a0, d3, d3; \
+ vmovdqu64 d3, st1; \
+ vmovdqu64 st0, d3; \
+ vpshufb a0, d3, a0; \
+ vmovdqu64 d2, st0; \
+ \
+ transpose_4x4(c0, d0, a0, b0, d2, d3); \
+ transpose_4x4(c1, d1, a1, b1, d2, d3); \
+ vmovdqu64 st0, d2; \
+ vmovdqu64 st1, d3; \
+ \
+ vmovdqu64 b0, st0; \
+ vmovdqu64 b1, st1; \
+ transpose_4x4(c2, d2, a2, b2, b0, b1); \
+ transpose_4x4(c3, d3, a3, b3, b0, b1); \
+ vmovdqu64 st0, b0; \
+ vmovdqu64 st1, b1; \
+ /* does not adjust output bytes inside vectors */
+
+/* load blocks to registers and apply pre-whitening */
+#define inpack16_pre(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ rio) \
+ vmovdqu64 (0 * 64)(rio), x0; \
+ vmovdqu64 (1 * 64)(rio), x1; \
+ vmovdqu64 (2 * 64)(rio), x2; \
+ vmovdqu64 (3 * 64)(rio), x3; \
+ vmovdqu64 (4 * 64)(rio), x4; \
+ vmovdqu64 (5 * 64)(rio), x5; \
+ vmovdqu64 (6 * 64)(rio), x6; \
+ vmovdqu64 (7 * 64)(rio), x7; \
+ vmovdqu64 (8 * 64)(rio), y0; \
+ vmovdqu64 (9 * 64)(rio), y1; \
+ vmovdqu64 (10 * 64)(rio), y2; \
+ vmovdqu64 (11 * 64)(rio), y3; \
+ vmovdqu64 (12 * 64)(rio), y4; \
+ vmovdqu64 (13 * 64)(rio), y5; \
+ vmovdqu64 (14 * 64)(rio), y6; \
+ vmovdqu64 (15 * 64)(rio), y7;
+
+/* byteslice pre-whitened blocks and store to temporary memory */
+#define inpack16_post(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_ab, mem_cd) \
+ byteslice_16x16b(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ (mem_ab), (mem_cd)); \
+ \
+ vmovdqu64 x0, 0 * 64(mem_ab); \
+ vmovdqu64 x1, 1 * 64(mem_ab); \
+ vmovdqu64 x2, 2 * 64(mem_ab); \
+ vmovdqu64 x3, 3 * 64(mem_ab); \
+ vmovdqu64 x4, 4 * 64(mem_ab); \
+ vmovdqu64 x5, 5 * 64(mem_ab); \
+ vmovdqu64 x6, 6 * 64(mem_ab); \
+ vmovdqu64 x7, 7 * 64(mem_ab); \
+ vmovdqu64 y0, 0 * 64(mem_cd); \
+ vmovdqu64 y1, 1 * 64(mem_cd); \
+ vmovdqu64 y2, 2 * 64(mem_cd); \
+ vmovdqu64 y3, 3 * 64(mem_cd); \
+ vmovdqu64 y4, 4 * 64(mem_cd); \
+ vmovdqu64 y5, 5 * 64(mem_cd); \
+ vmovdqu64 y6, 6 * 64(mem_cd); \
+ vmovdqu64 y7, 7 * 64(mem_cd);
+
+#define write_output(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem) \
+ vmovdqu64 x0, 0 * 64(mem); \
+ vmovdqu64 x1, 1 * 64(mem); \
+ vmovdqu64 x2, 2 * 64(mem); \
+ vmovdqu64 x3, 3 * 64(mem); \
+ vmovdqu64 x4, 4 * 64(mem); \
+ vmovdqu64 x5, 5 * 64(mem); \
+ vmovdqu64 x6, 6 * 64(mem); \
+ vmovdqu64 x7, 7 * 64(mem); \
+ vmovdqu64 y0, 8 * 64(mem); \
+ vmovdqu64 y1, 9 * 64(mem); \
+ vmovdqu64 y2, 10 * 64(mem); \
+ vmovdqu64 y3, 11 * 64(mem); \
+ vmovdqu64 y4, 12 * 64(mem); \
+ vmovdqu64 y5, 13 * 64(mem); \
+ vmovdqu64 y6, 14 * 64(mem); \
+ vmovdqu64 y7, 15 * 64(mem); \
+
+#define aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, idx) \
+ vmovdqu64 x0, ((idx + 0) * 64)(mem_tmp); \
+ vmovdqu64 x1, ((idx + 1) * 64)(mem_tmp); \
+ vmovdqu64 x2, ((idx + 2) * 64)(mem_tmp); \
+ vmovdqu64 x3, ((idx + 3) * 64)(mem_tmp); \
+ vmovdqu64 x4, ((idx + 4) * 64)(mem_tmp); \
+ vmovdqu64 x5, ((idx + 5) * 64)(mem_tmp); \
+ vmovdqu64 x6, ((idx + 6) * 64)(mem_tmp); \
+ vmovdqu64 x7, ((idx + 7) * 64)(mem_tmp);
+
+#define aria_load_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, idx) \
+ vmovdqu64 ((idx + 0) * 64)(mem_tmp), x0; \
+ vmovdqu64 ((idx + 1) * 64)(mem_tmp), x1; \
+ vmovdqu64 ((idx + 2) * 64)(mem_tmp), x2; \
+ vmovdqu64 ((idx + 3) * 64)(mem_tmp), x3; \
+ vmovdqu64 ((idx + 4) * 64)(mem_tmp), x4; \
+ vmovdqu64 ((idx + 5) * 64)(mem_tmp), x5; \
+ vmovdqu64 ((idx + 6) * 64)(mem_tmp), x6; \
+ vmovdqu64 ((idx + 7) * 64)(mem_tmp), x7;
+
+#define aria_ark_16way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ t0, rk, round) \
+ /* AddRoundKey */ \
+ vpbroadcastb ((round * 16) + 3)(rk), t0; \
+ vpxorq t0, x0, x0; \
+ vpbroadcastb ((round * 16) + 2)(rk), t0; \
+ vpxorq t0, x1, x1; \
+ vpbroadcastb ((round * 16) + 1)(rk), t0; \
+ vpxorq t0, x2, x2; \
+ vpbroadcastb ((round * 16) + 0)(rk), t0; \
+ vpxorq t0, x3, x3; \
+ vpbroadcastb ((round * 16) + 7)(rk), t0; \
+ vpxorq t0, x4, x4; \
+ vpbroadcastb ((round * 16) + 6)(rk), t0; \
+ vpxorq t0, x5, x5; \
+ vpbroadcastb ((round * 16) + 5)(rk), t0; \
+ vpxorq t0, x6, x6; \
+ vpbroadcastb ((round * 16) + 4)(rk), t0; \
+ vpxorq t0, x7, x7; \
+ vpbroadcastb ((round * 16) + 11)(rk), t0; \
+ vpxorq t0, y0, y0; \
+ vpbroadcastb ((round * 16) + 10)(rk), t0; \
+ vpxorq t0, y1, y1; \
+ vpbroadcastb ((round * 16) + 9)(rk), t0; \
+ vpxorq t0, y2, y2; \
+ vpbroadcastb ((round * 16) + 8)(rk), t0; \
+ vpxorq t0, y3, y3; \
+ vpbroadcastb ((round * 16) + 15)(rk), t0; \
+ vpxorq t0, y4, y4; \
+ vpbroadcastb ((round * 16) + 14)(rk), t0; \
+ vpxorq t0, y5, y5; \
+ vpbroadcastb ((round * 16) + 13)(rk), t0; \
+ vpxorq t0, y6, y6; \
+ vpbroadcastb ((round * 16) + 12)(rk), t0; \
+ vpxorq t0, y7, y7;
+
+#define aria_sbox_8way_gfni(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ t0, t1, t2, t3, \
+ t4, t5, t6, t7) \
+ vpbroadcastq .Ltf_s2_bitmatrix, t0; \
+ vpbroadcastq .Ltf_inv_bitmatrix, t1; \
+ vpbroadcastq .Ltf_id_bitmatrix, t2; \
+ vpbroadcastq .Ltf_aff_bitmatrix, t3; \
+ vpbroadcastq .Ltf_x2_bitmatrix, t4; \
+ vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
+ vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
+ vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
+ vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \
+ vgf2p8affineinvqb $0, t2, x2, x2; \
+ vgf2p8affineinvqb $0, t2, x6, x6; \
+ vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \
+ vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \
+ vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \
+ vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \
+ vgf2p8affineinvqb $0, t2, x3, x3; \
+ vgf2p8affineinvqb $0, t2, x7, x7;
+
+#define aria_sbox_16way_gfni(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ t0, t1, t2, t3, \
+ t4, t5, t6, t7) \
+ vpbroadcastq .Ltf_s2_bitmatrix, t0; \
+ vpbroadcastq .Ltf_inv_bitmatrix, t1; \
+ vpbroadcastq .Ltf_id_bitmatrix, t2; \
+ vpbroadcastq .Ltf_aff_bitmatrix, t3; \
+ vpbroadcastq .Ltf_x2_bitmatrix, t4; \
+ vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
+ vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
+ vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
+ vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \
+ vgf2p8affineinvqb $0, t2, x2, x2; \
+ vgf2p8affineinvqb $0, t2, x6, x6; \
+ vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \
+ vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \
+ vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \
+ vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \
+ vgf2p8affineinvqb $0, t2, x3, x3; \
+ vgf2p8affineinvqb $0, t2, x7, x7; \
+ vgf2p8affineinvqb $(tf_s2_const), t0, y1, y1; \
+ vgf2p8affineinvqb $(tf_s2_const), t0, y5, y5; \
+ vgf2p8affineqb $(tf_inv_const), t1, y2, y2; \
+ vgf2p8affineqb $(tf_inv_const), t1, y6, y6; \
+ vgf2p8affineinvqb $0, t2, y2, y2; \
+ vgf2p8affineinvqb $0, t2, y6, y6; \
+ vgf2p8affineinvqb $(tf_aff_const), t3, y0, y0; \
+ vgf2p8affineinvqb $(tf_aff_const), t3, y4, y4; \
+ vgf2p8affineqb $(tf_x2_const), t4, y3, y3; \
+ vgf2p8affineqb $(tf_x2_const), t4, y7, y7; \
+ vgf2p8affineinvqb $0, t2, y3, y3; \
+ vgf2p8affineinvqb $0, t2, y7, y7;
+
+
+#define aria_diff_m(x0, x1, x2, x3, \
+ t0, t1, t2, t3) \
+ /* T = rotr32(X, 8); */ \
+ /* X ^= T */ \
+ vpxorq x0, x3, t0; \
+ vpxorq x1, x0, t1; \
+ vpxorq x2, x1, t2; \
+ vpxorq x3, x2, t3; \
+ /* X = T ^ rotr(X, 16); */ \
+ vpxorq t2, x0, x0; \
+ vpxorq x1, t3, t3; \
+ vpxorq t0, x2, x2; \
+ vpxorq t1, x3, x1; \
+ vmovdqu64 t3, x3;
+
+#define aria_diff_word(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7) \
+ /* t1 ^= t2; */ \
+ vpxorq y0, x4, x4; \
+ vpxorq y1, x5, x5; \
+ vpxorq y2, x6, x6; \
+ vpxorq y3, x7, x7; \
+ \
+ /* t2 ^= t3; */ \
+ vpxorq y4, y0, y0; \
+ vpxorq y5, y1, y1; \
+ vpxorq y6, y2, y2; \
+ vpxorq y7, y3, y3; \
+ \
+ /* t0 ^= t1; */ \
+ vpxorq x4, x0, x0; \
+ vpxorq x5, x1, x1; \
+ vpxorq x6, x2, x2; \
+ vpxorq x7, x3, x3; \
+ \
+ /* t3 ^= t1; */ \
+ vpxorq x4, y4, y4; \
+ vpxorq x5, y5, y5; \
+ vpxorq x6, y6, y6; \
+ vpxorq x7, y7, y7; \
+ \
+ /* t2 ^= t0; */ \
+ vpxorq x0, y0, y0; \
+ vpxorq x1, y1, y1; \
+ vpxorq x2, y2, y2; \
+ vpxorq x3, y3, y3; \
+ \
+ /* t1 ^= t2; */ \
+ vpxorq y0, x4, x4; \
+ vpxorq y1, x5, x5; \
+ vpxorq y2, x6, x6; \
+ vpxorq y3, x7, x7;
+
+#define aria_fe_gfni(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ z0, z1, z2, z3, \
+ z4, z5, z6, z7, \
+ mem_tmp, rk, round) \
+ aria_ark_16way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, y1, y2, y3, y4, y5, y6, y7, \
+ z0, rk, round); \
+ \
+ aria_sbox_16way_gfni(x2, x3, x0, x1, \
+ x6, x7, x4, x5, \
+ y2, y3, y0, y1, \
+ y6, y7, y4, y5, \
+ z0, z1, z2, z3, \
+ z4, z5, z6, z7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, z0, z1, z2, z3); \
+ aria_diff_m(x4, x5, x6, x7, z0, z1, z2, z3); \
+ aria_diff_m(y0, y1, y2, y3, z0, z1, z2, z3); \
+ aria_diff_m(y4, y5, y6, y7, z0, z1, z2, z3); \
+ aria_diff_word(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ /* aria_diff_byte() \
+ * T3 = ABCD -> BADC \
+ * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \
+ * T0 = ABCD -> CDAB \
+ * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \
+ * T1 = ABCD -> DCBA \
+ * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \
+ */ \
+ aria_diff_word(x2, x3, x0, x1, \
+ x7, x6, x5, x4, \
+ y0, y1, y2, y3, \
+ y5, y4, y7, y6); \
+
+
+#define aria_fo_gfni(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ z0, z1, z2, z3, \
+ z4, z5, z6, z7, \
+ mem_tmp, rk, round) \
+ aria_ark_16way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, y1, y2, y3, y4, y5, y6, y7, \
+ z0, rk, round); \
+ \
+ aria_sbox_16way_gfni(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ z0, z1, z2, z3, \
+ z4, z5, z6, z7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, z0, z1, z2, z3); \
+ aria_diff_m(x4, x5, x6, x7, z0, z1, z2, z3); \
+ aria_diff_m(y0, y1, y2, y3, z0, z1, z2, z3); \
+ aria_diff_m(y4, y5, y6, y7, z0, z1, z2, z3); \
+ aria_diff_word(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ /* aria_diff_byte() \
+ * T1 = ABCD -> BADC \
+ * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \
+ * T2 = ABCD -> CDAB \
+ * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \
+ * T3 = ABCD -> DCBA \
+ * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \
+ */ \
+ aria_diff_word(x0, x1, x2, x3, \
+ x5, x4, x7, x6, \
+ y2, y3, y0, y1, \
+ y7, y6, y5, y4);
+
+#define aria_ff_gfni(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ z0, z1, z2, z3, \
+ z4, z5, z6, z7, \
+ mem_tmp, rk, round, last_round) \
+ aria_ark_16way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ z0, rk, round); \
+ aria_sbox_16way_gfni(x2, x3, x0, x1, \
+ x6, x7, x4, x5, \
+ y2, y3, y0, y1, \
+ y6, y7, y4, y5, \
+ z0, z1, z2, z3, \
+ z4, z5, z6, z7); \
+ aria_ark_16way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ z0, rk, last_round);
+
+
+.section .rodata.cst64, "aM", @progbits, 64
+.align 64
+.Lcounter0123_lo:
+ .quad 0, 0
+ .quad 1, 0
+ .quad 2, 0
+ .quad 3, 0
+
+.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32
+.align 32
+#define SHUFB_BYTES(idx) \
+ 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
+.Lshufb_16x16b:
+ .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
+ .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
+
+.section .rodata.cst16, "aM", @progbits, 16
+.align 16
+
+.Lcounter4444_lo:
+ .quad 4, 0
+.Lcounter8888_lo:
+ .quad 8, 0
+.Lcounter16161616_lo:
+ .quad 16, 0
+.Lcounter1111_hi:
+ .quad 0, 1
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
+ .byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
+
+.section .rodata.cst8, "aM", @progbits, 8
+.align 8
+/* AES affine: */
+#define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0)
+.Ltf_aff_bitmatrix:
+ .quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1),
+ BV8(1, 1, 0, 0, 0, 1, 1, 1),
+ BV8(1, 1, 1, 0, 0, 0, 1, 1),
+ BV8(1, 1, 1, 1, 0, 0, 0, 1),
+ BV8(1, 1, 1, 1, 1, 0, 0, 0),
+ BV8(0, 1, 1, 1, 1, 1, 0, 0),
+ BV8(0, 0, 1, 1, 1, 1, 1, 0),
+ BV8(0, 0, 0, 1, 1, 1, 1, 1))
+
+/* AES inverse affine: */
+#define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0)
+.Ltf_inv_bitmatrix:
+ .quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1),
+ BV8(1, 0, 0, 1, 0, 0, 1, 0),
+ BV8(0, 1, 0, 0, 1, 0, 0, 1),
+ BV8(1, 0, 1, 0, 0, 1, 0, 0),
+ BV8(0, 1, 0, 1, 0, 0, 1, 0),
+ BV8(0, 0, 1, 0, 1, 0, 0, 1),
+ BV8(1, 0, 0, 1, 0, 1, 0, 0),
+ BV8(0, 1, 0, 0, 1, 0, 1, 0))
+
+/* S2: */
+#define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1)
+.Ltf_s2_bitmatrix:
+ .quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1),
+ BV8(0, 0, 1, 1, 1, 1, 1, 1),
+ BV8(1, 1, 1, 0, 1, 1, 0, 1),
+ BV8(1, 1, 0, 0, 0, 0, 1, 1),
+ BV8(0, 1, 0, 0, 0, 0, 1, 1),
+ BV8(1, 1, 0, 0, 1, 1, 1, 0),
+ BV8(0, 1, 1, 0, 0, 0, 1, 1),
+ BV8(1, 1, 1, 1, 0, 1, 1, 0))
+
+/* X2: */
+#define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0)
+.Ltf_x2_bitmatrix:
+ .quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0),
+ BV8(0, 0, 1, 0, 0, 1, 1, 0),
+ BV8(0, 0, 0, 0, 1, 0, 1, 0),
+ BV8(1, 1, 1, 0, 0, 0, 1, 1),
+ BV8(1, 1, 1, 0, 1, 1, 0, 0),
+ BV8(0, 1, 1, 0, 1, 0, 1, 1),
+ BV8(1, 0, 1, 1, 1, 1, 0, 1),
+ BV8(1, 0, 0, 1, 0, 0, 1, 1))
+
+/* Identity matrix: */
+.Ltf_id_bitmatrix:
+ .quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0),
+ BV8(0, 1, 0, 0, 0, 0, 0, 0),
+ BV8(0, 0, 1, 0, 0, 0, 0, 0),
+ BV8(0, 0, 0, 1, 0, 0, 0, 0),
+ BV8(0, 0, 0, 0, 1, 0, 0, 0),
+ BV8(0, 0, 0, 0, 0, 1, 0, 0),
+ BV8(0, 0, 0, 0, 0, 0, 1, 0),
+ BV8(0, 0, 0, 0, 0, 0, 0, 1))
+
+.text
+SYM_FUNC_START_LOCAL(__aria_gfni_avx512_crypt_64way)
+ /* input:
+ * %r9: rk
+ * %rsi: dst
+ * %rdx: src
+ * %zmm0..%zmm15: byte-sliced blocks
+ */
+
+ FRAME_BEGIN
+
+ movq %rsi, %rax;
+ leaq 8 * 64(%rax), %r8;
+
+ inpack16_post(%zmm0, %zmm1, %zmm2, %zmm3,
+ %zmm4, %zmm5, %zmm6, %zmm7,
+ %zmm8, %zmm9, %zmm10, %zmm11,
+ %zmm12, %zmm13, %zmm14,
+ %zmm15, %rax, %r8);
+ aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
+ %zmm4, %zmm5, %zmm6, %zmm7,
+ %zmm8, %zmm9, %zmm10, %zmm11,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 0);
+ aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
+ %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 1);
+ aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
+ %zmm4, %zmm5, %zmm6, %zmm7,
+ %zmm8, %zmm9, %zmm10, %zmm11,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 2);
+ aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
+ %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 3);
+ aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
+ %zmm4, %zmm5, %zmm6, %zmm7,
+ %zmm8, %zmm9, %zmm10, %zmm11,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 4);
+ aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
+ %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 5);
+ aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
+ %zmm4, %zmm5, %zmm6, %zmm7,
+ %zmm8, %zmm9, %zmm10, %zmm11,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 6);
+ aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
+ %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 7);
+ aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
+ %zmm4, %zmm5, %zmm6, %zmm7,
+ %zmm8, %zmm9, %zmm10, %zmm11,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 8);
+ aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
+ %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 9);
+ aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
+ %zmm4, %zmm5, %zmm6, %zmm7,
+ %zmm8, %zmm9, %zmm10, %zmm11,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 10);
+ cmpl $12, ARIA_CTX_rounds(CTX);
+ jne .Laria_gfni_192;
+ aria_ff_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
+ %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 11, 12);
+ jmp .Laria_gfni_end;
+.Laria_gfni_192:
+ aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
+ %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 11);
+ aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
+ %zmm4, %zmm5, %zmm6, %zmm7,
+ %zmm8, %zmm9, %zmm10, %zmm11,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 12);
+ cmpl $14, ARIA_CTX_rounds(CTX);
+ jne .Laria_gfni_256;
+ aria_ff_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
+ %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 13, 14);
+ jmp .Laria_gfni_end;
+.Laria_gfni_256:
+ aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
+ %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 13);
+ aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
+ %zmm4, %zmm5, %zmm6, %zmm7,
+ %zmm8, %zmm9, %zmm10, %zmm11,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 14);
+ aria_ff_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
+ %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10,
+ %zmm12, %zmm13, %zmm14, %zmm15,
+ %zmm24, %zmm25, %zmm26, %zmm27,
+ %zmm28, %zmm29, %zmm30, %zmm31,
+ %rax, %r9, 15, 16);
+.Laria_gfni_end:
+ debyteslice_16x16b(%zmm9, %zmm12, %zmm3, %zmm6,
+ %zmm8, %zmm13, %zmm2, %zmm7,
+ %zmm11, %zmm14, %zmm1, %zmm4,
+ %zmm10, %zmm15, %zmm0, %zmm5,
+ (%rax), (%r8));
+ FRAME_END
+ RET;
+SYM_FUNC_END(__aria_gfni_avx512_crypt_64way)
+
+SYM_TYPED_FUNC_START(aria_gfni_avx512_encrypt_64way)
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ FRAME_BEGIN
+
+ leaq ARIA_CTX_enc_key(CTX), %r9;
+
+ inpack16_pre(%zmm0, %zmm1, %zmm2, %zmm3, %zmm4, %zmm5, %zmm6, %zmm7,
+ %zmm8, %zmm9, %zmm10, %zmm11, %zmm12, %zmm13, %zmm14,
+ %zmm15, %rdx);
+
+ call __aria_gfni_avx512_crypt_64way;
+
+ write_output(%zmm3, %zmm2, %zmm1, %zmm0, %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10, %zmm12, %zmm13, %zmm14,
+ %zmm15, %rax);
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(aria_gfni_avx512_encrypt_64way)
+
+SYM_TYPED_FUNC_START(aria_gfni_avx512_decrypt_64way)
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ FRAME_BEGIN
+
+ leaq ARIA_CTX_dec_key(CTX), %r9;
+
+ inpack16_pre(%zmm0, %zmm1, %zmm2, %zmm3, %zmm4, %zmm5, %zmm6, %zmm7,
+ %zmm8, %zmm9, %zmm10, %zmm11, %zmm12, %zmm13, %zmm14,
+ %zmm15, %rdx);
+
+ call __aria_gfni_avx512_crypt_64way;
+
+ write_output(%zmm3, %zmm2, %zmm1, %zmm0, %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10, %zmm12, %zmm13, %zmm14,
+ %zmm15, %rax);
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(aria_gfni_avx512_decrypt_64way)
+
+SYM_FUNC_START_LOCAL(__aria_gfni_avx512_ctr_gen_keystream_64way)
+ /* input:
+ * %rdi: ctx
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: keystream
+ * %r8: iv (big endian, 128bit)
+ */
+
+ FRAME_BEGIN
+
+ vbroadcasti64x2 .Lbswap128_mask (%rip), %zmm19;
+ vmovdqa64 .Lcounter0123_lo (%rip), %zmm21;
+ vbroadcasti64x2 .Lcounter4444_lo (%rip), %zmm22;
+ vbroadcasti64x2 .Lcounter8888_lo (%rip), %zmm23;
+ vbroadcasti64x2 .Lcounter16161616_lo (%rip), %zmm24;
+ vbroadcasti64x2 .Lcounter1111_hi (%rip), %zmm25;
+
+ /* load IV and byteswap */
+ movq 8(%r8), %r11;
+ movq (%r8), %r10;
+ bswapq %r11;
+ bswapq %r10;
+ vbroadcasti64x2 (%r8), %zmm20;
+ vpshufb %zmm19, %zmm20, %zmm20;
+
+ /* check need for handling 64-bit overflow and carry */
+ cmpq $(0xffffffffffffffff - 64), %r11;
+ ja .Lload_ctr_carry;
+
+ /* construct IVs */
+ vpaddq %zmm21, %zmm20, %zmm0; /* +0:+1:+2:+3 */
+ vpaddq %zmm22, %zmm0, %zmm1; /* +4:+5:+6:+7 */
+ vpaddq %zmm23, %zmm0, %zmm2; /* +8:+9:+10:+11 */
+ vpaddq %zmm23, %zmm1, %zmm3; /* +12:+13:+14:+15 */
+ vpaddq %zmm24, %zmm0, %zmm4; /* +16... */
+ vpaddq %zmm24, %zmm1, %zmm5; /* +20... */
+ vpaddq %zmm24, %zmm2, %zmm6; /* +24... */
+ vpaddq %zmm24, %zmm3, %zmm7; /* +28... */
+ vpaddq %zmm24, %zmm4, %zmm8; /* +32... */
+ vpaddq %zmm24, %zmm5, %zmm9; /* +36... */
+ vpaddq %zmm24, %zmm6, %zmm10; /* +40... */
+ vpaddq %zmm24, %zmm7, %zmm11; /* +44... */
+ vpaddq %zmm24, %zmm8, %zmm12; /* +48... */
+ vpaddq %zmm24, %zmm9, %zmm13; /* +52... */
+ vpaddq %zmm24, %zmm10, %zmm14; /* +56... */
+ vpaddq %zmm24, %zmm11, %zmm15; /* +60... */
+ jmp .Lload_ctr_done;
+
+.Lload_ctr_carry:
+ /* construct IVs */
+ add_le128(%zmm0, %zmm20, %zmm21, %zmm25); /* +0:+1:+2:+3 */
+ add_le128(%zmm1, %zmm0, %zmm22, %zmm25); /* +4:+5:+6:+7 */
+ add_le128(%zmm2, %zmm0, %zmm23, %zmm25); /* +8:+9:+10:+11 */
+ add_le128(%zmm3, %zmm1, %zmm23, %zmm25); /* +12:+13:+14:+15 */
+ add_le128(%zmm4, %zmm0, %zmm24, %zmm25); /* +16... */
+ add_le128(%zmm5, %zmm1, %zmm24, %zmm25); /* +20... */
+ add_le128(%zmm6, %zmm2, %zmm24, %zmm25); /* +24... */
+ add_le128(%zmm7, %zmm3, %zmm24, %zmm25); /* +28... */
+ add_le128(%zmm8, %zmm4, %zmm24, %zmm25); /* +32... */
+ add_le128(%zmm9, %zmm5, %zmm24, %zmm25); /* +36... */
+ add_le128(%zmm10, %zmm6, %zmm24, %zmm25); /* +40... */
+ add_le128(%zmm11, %zmm7, %zmm24, %zmm25); /* +44... */
+ add_le128(%zmm12, %zmm8, %zmm24, %zmm25); /* +48... */
+ add_le128(%zmm13, %zmm9, %zmm24, %zmm25); /* +52... */
+ add_le128(%zmm14, %zmm10, %zmm24, %zmm25); /* +56... */
+ add_le128(%zmm15, %zmm11, %zmm24, %zmm25); /* +60... */
+
+.Lload_ctr_done:
+ /* Byte-swap IVs and update counter. */
+ addq $64, %r11;
+ adcq $0, %r10;
+ vpshufb %zmm19, %zmm15, %zmm15;
+ vpshufb %zmm19, %zmm14, %zmm14;
+ vpshufb %zmm19, %zmm13, %zmm13;
+ vpshufb %zmm19, %zmm12, %zmm12;
+ vpshufb %zmm19, %zmm11, %zmm11;
+ vpshufb %zmm19, %zmm10, %zmm10;
+ vpshufb %zmm19, %zmm9, %zmm9;
+ vpshufb %zmm19, %zmm8, %zmm8;
+ bswapq %r11;
+ bswapq %r10;
+ vpshufb %zmm19, %zmm7, %zmm7;
+ vpshufb %zmm19, %zmm6, %zmm6;
+ vpshufb %zmm19, %zmm5, %zmm5;
+ vpshufb %zmm19, %zmm4, %zmm4;
+ vpshufb %zmm19, %zmm3, %zmm3;
+ vpshufb %zmm19, %zmm2, %zmm2;
+ vpshufb %zmm19, %zmm1, %zmm1;
+ vpshufb %zmm19, %zmm0, %zmm0;
+ movq %r11, 8(%r8);
+ movq %r10, (%r8);
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(__aria_gfni_avx512_ctr_gen_keystream_64way)
+
+SYM_TYPED_FUNC_START(aria_gfni_avx512_ctr_crypt_64way)
+ /* input:
+ * %rdi: ctx
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: keystream
+ * %r8: iv (big endian, 128bit)
+ */
+ FRAME_BEGIN
+
+ call __aria_gfni_avx512_ctr_gen_keystream_64way
+
+ leaq (%rsi), %r10;
+ leaq (%rdx), %r11;
+ leaq (%rcx), %rsi;
+ leaq (%rcx), %rdx;
+ leaq ARIA_CTX_enc_key(CTX), %r9;
+
+ call __aria_gfni_avx512_crypt_64way;
+
+ vpxorq (0 * 64)(%r11), %zmm3, %zmm3;
+ vpxorq (1 * 64)(%r11), %zmm2, %zmm2;
+ vpxorq (2 * 64)(%r11), %zmm1, %zmm1;
+ vpxorq (3 * 64)(%r11), %zmm0, %zmm0;
+ vpxorq (4 * 64)(%r11), %zmm6, %zmm6;
+ vpxorq (5 * 64)(%r11), %zmm7, %zmm7;
+ vpxorq (6 * 64)(%r11), %zmm4, %zmm4;
+ vpxorq (7 * 64)(%r11), %zmm5, %zmm5;
+ vpxorq (8 * 64)(%r11), %zmm9, %zmm9;
+ vpxorq (9 * 64)(%r11), %zmm8, %zmm8;
+ vpxorq (10 * 64)(%r11), %zmm11, %zmm11;
+ vpxorq (11 * 64)(%r11), %zmm10, %zmm10;
+ vpxorq (12 * 64)(%r11), %zmm12, %zmm12;
+ vpxorq (13 * 64)(%r11), %zmm13, %zmm13;
+ vpxorq (14 * 64)(%r11), %zmm14, %zmm14;
+ vpxorq (15 * 64)(%r11), %zmm15, %zmm15;
+ write_output(%zmm3, %zmm2, %zmm1, %zmm0, %zmm6, %zmm7, %zmm4, %zmm5,
+ %zmm9, %zmm8, %zmm11, %zmm10, %zmm12, %zmm13, %zmm14,
+ %zmm15, %r10);
+
+ FRAME_END
+ RET;
+SYM_FUNC_END(aria_gfni_avx512_ctr_crypt_64way)
diff --git a/arch/x86/crypto/aria_aesni_avx2_glue.c b/arch/x86/crypto/aria_aesni_avx2_glue.c
new file mode 100644
index 000000000000..87a11804fc77
--- /dev/null
+++ b/arch/x86/crypto/aria_aesni_avx2_glue.c
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Glue Code for the AVX2/AES-NI/GFNI assembler implementation of the ARIA Cipher
+ *
+ * Copyright (c) 2022 Taehee Yoo <[email protected]>
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/aria.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include "ecb_cbc_helpers.h"
+#include "aria-avx.h"
+
+asmlinkage void aria_aesni_avx2_encrypt_32way(const void *ctx, u8 *dst,
+ const u8 *src);
+EXPORT_SYMBOL_GPL(aria_aesni_avx2_encrypt_32way);
+asmlinkage void aria_aesni_avx2_decrypt_32way(const void *ctx, u8 *dst,
+ const u8 *src);
+EXPORT_SYMBOL_GPL(aria_aesni_avx2_decrypt_32way);
+asmlinkage void aria_aesni_avx2_ctr_crypt_32way(const void *ctx, u8 *dst,
+ const u8 *src,
+ u8 *keystream, u8 *iv);
+EXPORT_SYMBOL_GPL(aria_aesni_avx2_ctr_crypt_32way);
+#ifdef CONFIG_AS_GFNI
+asmlinkage void aria_aesni_avx2_gfni_encrypt_32way(const void *ctx, u8 *dst,
+ const u8 *src);
+EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_encrypt_32way);
+asmlinkage void aria_aesni_avx2_gfni_decrypt_32way(const void *ctx, u8 *dst,
+ const u8 *src);
+EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_decrypt_32way);
+asmlinkage void aria_aesni_avx2_gfni_ctr_crypt_32way(const void *ctx, u8 *dst,
+ const u8 *src,
+ u8 *keystream, u8 *iv);
+EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_ctr_crypt_32way);
+#endif /* CONFIG_AS_GFNI */
+
+static struct aria_avx_ops aria_ops;
+
+struct aria_avx2_request_ctx {
+ u8 keystream[ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE];
+};
+
+static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey)
+{
+ ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS);
+ ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_encrypt_32way);
+ ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_encrypt_16way);
+ ECB_BLOCK(1, aria_encrypt);
+ ECB_WALK_END();
+}
+
+static int ecb_do_decrypt(struct skcipher_request *req, const u32 *rkey)
+{
+ ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS);
+ ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_decrypt_32way);
+ ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_decrypt_16way);
+ ECB_BLOCK(1, aria_decrypt);
+ ECB_WALK_END();
+}
+
+static int aria_avx2_ecb_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return ecb_do_encrypt(req, ctx->enc_key[0]);
+}
+
+static int aria_avx2_ecb_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return ecb_do_decrypt(req, ctx->dec_key[0]);
+}
+
+static int aria_avx2_set_key(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ return aria_set_key(&tfm->base, key, keylen);
+}
+
+static int aria_avx2_ctr_encrypt(struct skcipher_request *req)
+{
+ struct aria_avx2_request_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ while (nbytes >= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE) {
+ kernel_fpu_begin();
+ aria_ops.aria_ctr_crypt_32way(ctx, dst, src,
+ &req_ctx->keystream[0],
+ walk.iv);
+ kernel_fpu_end();
+ dst += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE;
+ src += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE;
+ nbytes -= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE;
+ }
+
+ while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) {
+ kernel_fpu_begin();
+ aria_ops.aria_ctr_crypt_16way(ctx, dst, src,
+ &req_ctx->keystream[0],
+ walk.iv);
+ kernel_fpu_end();
+ dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE;
+ src += ARIA_AESNI_PARALLEL_BLOCK_SIZE;
+ nbytes -= ARIA_AESNI_PARALLEL_BLOCK_SIZE;
+ }
+
+ while (nbytes >= ARIA_BLOCK_SIZE) {
+ memcpy(&req_ctx->keystream[0], walk.iv, ARIA_BLOCK_SIZE);
+ crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
+
+ aria_encrypt(ctx, &req_ctx->keystream[0],
+ &req_ctx->keystream[0]);
+
+ crypto_xor_cpy(dst, src, &req_ctx->keystream[0],
+ ARIA_BLOCK_SIZE);
+ dst += ARIA_BLOCK_SIZE;
+ src += ARIA_BLOCK_SIZE;
+ nbytes -= ARIA_BLOCK_SIZE;
+ }
+
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ memcpy(&req_ctx->keystream[0], walk.iv,
+ ARIA_BLOCK_SIZE);
+ crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
+
+ aria_encrypt(ctx, &req_ctx->keystream[0],
+ &req_ctx->keystream[0]);
+
+ crypto_xor_cpy(dst, src, &req_ctx->keystream[0],
+ nbytes);
+ dst += nbytes;
+ src += nbytes;
+ nbytes = 0;
+ }
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+static int aria_avx2_init_tfm(struct crypto_skcipher *tfm)
+{
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct aria_avx2_request_ctx));
+
+ return 0;
+}
+
+static struct skcipher_alg aria_algs[] = {
+ {
+ .base.cra_name = "__ecb(aria)",
+ .base.cra_driver_name = "__ecb-aria-avx2",
+ .base.cra_priority = 500,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = ARIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct aria_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = ARIA_MIN_KEY_SIZE,
+ .max_keysize = ARIA_MAX_KEY_SIZE,
+ .setkey = aria_avx2_set_key,
+ .encrypt = aria_avx2_ecb_encrypt,
+ .decrypt = aria_avx2_ecb_decrypt,
+ }, {
+ .base.cra_name = "__ctr(aria)",
+ .base.cra_driver_name = "__ctr-aria-avx2",
+ .base.cra_priority = 500,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL |
+ CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct aria_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = ARIA_MIN_KEY_SIZE,
+ .max_keysize = ARIA_MAX_KEY_SIZE,
+ .ivsize = ARIA_BLOCK_SIZE,
+ .chunksize = ARIA_BLOCK_SIZE,
+ .setkey = aria_avx2_set_key,
+ .encrypt = aria_avx2_ctr_encrypt,
+ .decrypt = aria_avx2_ctr_encrypt,
+ .init = aria_avx2_init_tfm,
+ }
+};
+
+static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)];
+
+static int __init aria_avx2_init(void)
+{
+ const char *feature_name;
+
+ if (!boot_cpu_has(X86_FEATURE_AVX) ||
+ !boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+ pr_info("AVX2 or AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+
+ if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+ &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
+ return -ENODEV;
+ }
+
+ if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) {
+ aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way;
+ aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way;
+ aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way;
+ aria_ops.aria_encrypt_32way = aria_aesni_avx2_gfni_encrypt_32way;
+ aria_ops.aria_decrypt_32way = aria_aesni_avx2_gfni_decrypt_32way;
+ aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_gfni_ctr_crypt_32way;
+ } else {
+ aria_ops.aria_encrypt_16way = aria_aesni_avx_encrypt_16way;
+ aria_ops.aria_decrypt_16way = aria_aesni_avx_decrypt_16way;
+ aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_ctr_crypt_16way;
+ aria_ops.aria_encrypt_32way = aria_aesni_avx2_encrypt_32way;
+ aria_ops.aria_decrypt_32way = aria_aesni_avx2_decrypt_32way;
+ aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_ctr_crypt_32way;
+ }
+
+ return simd_register_skciphers_compat(aria_algs,
+ ARRAY_SIZE(aria_algs),
+ aria_simd_algs);
+}
+
+static void __exit aria_avx2_exit(void)
+{
+ simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs),
+ aria_simd_algs);
+}
+
+module_init(aria_avx2_init);
+module_exit(aria_avx2_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Taehee Yoo <[email protected]>");
+MODULE_DESCRIPTION("ARIA Cipher Algorithm, AVX2/AES-NI/GFNI optimized");
+MODULE_ALIAS_CRYPTO("aria");
+MODULE_ALIAS_CRYPTO("aria-aesni-avx2");
diff --git a/arch/x86/crypto/aria_aesni_avx_glue.c b/arch/x86/crypto/aria_aesni_avx_glue.c
index c561ea4fefa5..4e1516b76669 100644
--- a/arch/x86/crypto/aria_aesni_avx_glue.c
+++ b/arch/x86/crypto/aria_aesni_avx_glue.c
@@ -18,21 +18,33 @@
asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
+EXPORT_SYMBOL_GPL(aria_aesni_avx_encrypt_16way);
asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
+EXPORT_SYMBOL_GPL(aria_aesni_avx_decrypt_16way);
asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
+EXPORT_SYMBOL_GPL(aria_aesni_avx_ctr_crypt_16way);
+#ifdef CONFIG_AS_GFNI
asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
+EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_encrypt_16way);
asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
+EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_decrypt_16way);
asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
+EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_ctr_crypt_16way);
+#endif /* CONFIG_AS_GFNI */
static struct aria_avx_ops aria_ops;
+struct aria_avx_request_ctx {
+ u8 keystream[ARIA_AESNI_PARALLEL_BLOCK_SIZE];
+};
+
static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey)
{
ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS);
@@ -73,6 +85,7 @@ static int aria_avx_set_key(struct crypto_skcipher *tfm, const u8 *key,
static int aria_avx_ctr_encrypt(struct skcipher_request *req)
{
+ struct aria_avx_request_ctx *req_ctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
@@ -86,10 +99,9 @@ static int aria_avx_ctr_encrypt(struct skcipher_request *req)
u8 *dst = walk.dst.virt.addr;
while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) {
- u8 keystream[ARIA_AESNI_PARALLEL_BLOCK_SIZE];
-
kernel_fpu_begin();
- aria_ops.aria_ctr_crypt_16way(ctx, dst, src, keystream,
+ aria_ops.aria_ctr_crypt_16way(ctx, dst, src,
+ &req_ctx->keystream[0],
walk.iv);
kernel_fpu_end();
dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE;
@@ -98,28 +110,29 @@ static int aria_avx_ctr_encrypt(struct skcipher_request *req)
}
while (nbytes >= ARIA_BLOCK_SIZE) {
- u8 keystream[ARIA_BLOCK_SIZE];
-
- memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE);
+ memcpy(&req_ctx->keystream[0], walk.iv, ARIA_BLOCK_SIZE);
crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
- aria_encrypt(ctx, keystream, keystream);
+ aria_encrypt(ctx, &req_ctx->keystream[0],
+ &req_ctx->keystream[0]);
- crypto_xor_cpy(dst, src, keystream, ARIA_BLOCK_SIZE);
+ crypto_xor_cpy(dst, src, &req_ctx->keystream[0],
+ ARIA_BLOCK_SIZE);
dst += ARIA_BLOCK_SIZE;
src += ARIA_BLOCK_SIZE;
nbytes -= ARIA_BLOCK_SIZE;
}
if (walk.nbytes == walk.total && nbytes > 0) {
- u8 keystream[ARIA_BLOCK_SIZE];
-
- memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE);
+ memcpy(&req_ctx->keystream[0], walk.iv,
+ ARIA_BLOCK_SIZE);
crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
- aria_encrypt(ctx, keystream, keystream);
+ aria_encrypt(ctx, &req_ctx->keystream[0],
+ &req_ctx->keystream[0]);
- crypto_xor_cpy(dst, src, keystream, nbytes);
+ crypto_xor_cpy(dst, src, &req_ctx->keystream[0],
+ nbytes);
dst += nbytes;
src += nbytes;
nbytes = 0;
@@ -130,6 +143,13 @@ static int aria_avx_ctr_encrypt(struct skcipher_request *req)
return err;
}
+static int aria_avx_init_tfm(struct crypto_skcipher *tfm)
+{
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct aria_avx_request_ctx));
+
+ return 0;
+}
+
static struct skcipher_alg aria_algs[] = {
{
.base.cra_name = "__ecb(aria)",
@@ -160,6 +180,7 @@ static struct skcipher_alg aria_algs[] = {
.setkey = aria_avx_set_key,
.encrypt = aria_avx_ctr_encrypt,
.decrypt = aria_avx_ctr_encrypt,
+ .init = aria_avx_init_tfm,
}
};
@@ -182,7 +203,7 @@ static int __init aria_avx_init(void)
return -ENODEV;
}
- if (boot_cpu_has(X86_FEATURE_GFNI)) {
+ if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) {
aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way;
aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way;
aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way;
diff --git a/arch/x86/crypto/aria_gfni_avx512_glue.c b/arch/x86/crypto/aria_gfni_avx512_glue.c
new file mode 100644
index 000000000000..f4a2208d2638
--- /dev/null
+++ b/arch/x86/crypto/aria_gfni_avx512_glue.c
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Glue Code for the AVX512/GFNI assembler implementation of the ARIA Cipher
+ *
+ * Copyright (c) 2022 Taehee Yoo <[email protected]>
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/aria.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include "ecb_cbc_helpers.h"
+#include "aria-avx.h"
+
+asmlinkage void aria_gfni_avx512_encrypt_64way(const void *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void aria_gfni_avx512_decrypt_64way(const void *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void aria_gfni_avx512_ctr_crypt_64way(const void *ctx, u8 *dst,
+ const u8 *src,
+ u8 *keystream, u8 *iv);
+
+static struct aria_avx_ops aria_ops;
+
+struct aria_avx512_request_ctx {
+ u8 keystream[ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE];
+};
+
+static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey)
+{
+ ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS);
+ ECB_BLOCK(ARIA_GFNI_AVX512_PARALLEL_BLOCKS, aria_ops.aria_encrypt_64way);
+ ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_encrypt_32way);
+ ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_encrypt_16way);
+ ECB_BLOCK(1, aria_encrypt);
+ ECB_WALK_END();
+}
+
+static int ecb_do_decrypt(struct skcipher_request *req, const u32 *rkey)
+{
+ ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS);
+ ECB_BLOCK(ARIA_GFNI_AVX512_PARALLEL_BLOCKS, aria_ops.aria_decrypt_64way);
+ ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_decrypt_32way);
+ ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_decrypt_16way);
+ ECB_BLOCK(1, aria_decrypt);
+ ECB_WALK_END();
+}
+
+static int aria_avx512_ecb_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return ecb_do_encrypt(req, ctx->enc_key[0]);
+}
+
+static int aria_avx512_ecb_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return ecb_do_decrypt(req, ctx->dec_key[0]);
+}
+
+static int aria_avx512_set_key(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ return aria_set_key(&tfm->base, key, keylen);
+}
+
+static int aria_avx512_ctr_encrypt(struct skcipher_request *req)
+{
+ struct aria_avx512_request_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ while (nbytes >= ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE) {
+ kernel_fpu_begin();
+ aria_ops.aria_ctr_crypt_64way(ctx, dst, src,
+ &req_ctx->keystream[0],
+ walk.iv);
+ kernel_fpu_end();
+ dst += ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE;
+ src += ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE;
+ nbytes -= ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE;
+ }
+
+ while (nbytes >= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE) {
+ kernel_fpu_begin();
+ aria_ops.aria_ctr_crypt_32way(ctx, dst, src,
+ &req_ctx->keystream[0],
+ walk.iv);
+ kernel_fpu_end();
+ dst += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE;
+ src += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE;
+ nbytes -= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE;
+ }
+
+ while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) {
+ kernel_fpu_begin();
+ aria_ops.aria_ctr_crypt_16way(ctx, dst, src,
+ &req_ctx->keystream[0],
+ walk.iv);
+ kernel_fpu_end();
+ dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE;
+ src += ARIA_AESNI_PARALLEL_BLOCK_SIZE;
+ nbytes -= ARIA_AESNI_PARALLEL_BLOCK_SIZE;
+ }
+
+ while (nbytes >= ARIA_BLOCK_SIZE) {
+ memcpy(&req_ctx->keystream[0], walk.iv,
+ ARIA_BLOCK_SIZE);
+ crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
+
+ aria_encrypt(ctx, &req_ctx->keystream[0],
+ &req_ctx->keystream[0]);
+
+ crypto_xor_cpy(dst, src, &req_ctx->keystream[0],
+ ARIA_BLOCK_SIZE);
+ dst += ARIA_BLOCK_SIZE;
+ src += ARIA_BLOCK_SIZE;
+ nbytes -= ARIA_BLOCK_SIZE;
+ }
+
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ memcpy(&req_ctx->keystream[0], walk.iv,
+ ARIA_BLOCK_SIZE);
+ crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
+
+ aria_encrypt(ctx, &req_ctx->keystream[0],
+ &req_ctx->keystream[0]);
+
+ crypto_xor_cpy(dst, src, &req_ctx->keystream[0],
+ nbytes);
+ dst += nbytes;
+ src += nbytes;
+ nbytes = 0;
+ }
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+static int aria_avx512_init_tfm(struct crypto_skcipher *tfm)
+{
+ crypto_skcipher_set_reqsize(tfm,
+ sizeof(struct aria_avx512_request_ctx));
+
+ return 0;
+}
+
+static struct skcipher_alg aria_algs[] = {
+ {
+ .base.cra_name = "__ecb(aria)",
+ .base.cra_driver_name = "__ecb-aria-avx512",
+ .base.cra_priority = 600,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = ARIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct aria_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = ARIA_MIN_KEY_SIZE,
+ .max_keysize = ARIA_MAX_KEY_SIZE,
+ .setkey = aria_avx512_set_key,
+ .encrypt = aria_avx512_ecb_encrypt,
+ .decrypt = aria_avx512_ecb_decrypt,
+ }, {
+ .base.cra_name = "__ctr(aria)",
+ .base.cra_driver_name = "__ctr-aria-avx512",
+ .base.cra_priority = 600,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL |
+ CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct aria_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = ARIA_MIN_KEY_SIZE,
+ .max_keysize = ARIA_MAX_KEY_SIZE,
+ .ivsize = ARIA_BLOCK_SIZE,
+ .chunksize = ARIA_BLOCK_SIZE,
+ .setkey = aria_avx512_set_key,
+ .encrypt = aria_avx512_ctr_encrypt,
+ .decrypt = aria_avx512_ctr_encrypt,
+ .init = aria_avx512_init_tfm,
+ }
+};
+
+static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)];
+
+static int __init aria_avx512_init(void)
+{
+ const char *feature_name;
+
+ if (!boot_cpu_has(X86_FEATURE_AVX) ||
+ !boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_AVX512F) ||
+ !boot_cpu_has(X86_FEATURE_AVX512VL) ||
+ !boot_cpu_has(X86_FEATURE_GFNI) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+ pr_info("AVX512/GFNI instructions are not detected.\n");
+ return -ENODEV;
+ }
+
+ if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
+ XFEATURE_MASK_AVX512, &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
+ return -ENODEV;
+ }
+
+ aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way;
+ aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way;
+ aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way;
+ aria_ops.aria_encrypt_32way = aria_aesni_avx2_gfni_encrypt_32way;
+ aria_ops.aria_decrypt_32way = aria_aesni_avx2_gfni_decrypt_32way;
+ aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_gfni_ctr_crypt_32way;
+ aria_ops.aria_encrypt_64way = aria_gfni_avx512_encrypt_64way;
+ aria_ops.aria_decrypt_64way = aria_gfni_avx512_decrypt_64way;
+ aria_ops.aria_ctr_crypt_64way = aria_gfni_avx512_ctr_crypt_64way;
+
+ return simd_register_skciphers_compat(aria_algs,
+ ARRAY_SIZE(aria_algs),
+ aria_simd_algs);
+}
+
+static void __exit aria_avx512_exit(void)
+{
+ simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs),
+ aria_simd_algs);
+}
+
+module_init(aria_avx512_init);
+module_exit(aria_avx512_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Taehee Yoo <[email protected]>");
+MODULE_DESCRIPTION("ARIA Cipher Algorithm, AVX512/GFNI optimized");
+MODULE_ALIAS_CRYPTO("aria");
+MODULE_ALIAS_CRYPTO("aria-gfni-avx512");
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 4a43e072d2d1..e88c8e4f013c 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -6,7 +6,6 @@
*/
#include <linux/linkage.h>
-#include <linux/cfi_types.h>
.file "blowfish-x86_64-asm.S"
.text
@@ -100,16 +99,11 @@
bswapq RX0; \
movq RX0, (RIO);
-#define xor_block() \
- bswapq RX0; \
- xorq RX0, (RIO);
-
-SYM_FUNC_START(__blowfish_enc_blk)
+SYM_FUNC_START(blowfish_enc_blk)
/* input:
* %rdi: ctx
* %rsi: dst
* %rdx: src
- * %rcx: bool, if true: xor output
*/
movq %r12, %r11;
@@ -130,19 +124,13 @@ SYM_FUNC_START(__blowfish_enc_blk)
add_roundkey_enc(16);
movq %r11, %r12;
-
movq %r10, RIO;
- test %cl, %cl;
- jnz .L__enc_xor;
write_block();
RET;
-.L__enc_xor:
- xor_block();
- RET;
-SYM_FUNC_END(__blowfish_enc_blk)
+SYM_FUNC_END(blowfish_enc_blk)
-SYM_TYPED_FUNC_START(blowfish_dec_blk)
+SYM_FUNC_START(blowfish_dec_blk)
/* input:
* %rdi: ctx
* %rsi: dst
@@ -272,28 +260,26 @@ SYM_FUNC_END(blowfish_dec_blk)
movq RX3, 24(RIO);
#define xor_block4() \
- bswapq RX0; \
- xorq RX0, (RIO); \
+ movq (RIO), RT0; \
+ bswapq RT0; \
+ xorq RT0, RX1; \
\
- bswapq RX1; \
- xorq RX1, 8(RIO); \
+ movq 8(RIO), RT2; \
+ bswapq RT2; \
+ xorq RT2, RX2; \
\
- bswapq RX2; \
- xorq RX2, 16(RIO); \
- \
- bswapq RX3; \
- xorq RX3, 24(RIO);
+ movq 16(RIO), RT3; \
+ bswapq RT3; \
+ xorq RT3, RX3;
-SYM_FUNC_START(__blowfish_enc_blk_4way)
+SYM_FUNC_START(blowfish_enc_blk_4way)
/* input:
* %rdi: ctx
* %rsi: dst
* %rdx: src
- * %rcx: bool, if true: xor output
*/
pushq %r12;
pushq %rbx;
- pushq %rcx;
movq %rdi, CTX
movq %rsi, %r11;
@@ -313,37 +299,28 @@ SYM_FUNC_START(__blowfish_enc_blk_4way)
round_enc4(14);
add_preloaded_roundkey4();
- popq %r12;
movq %r11, RIO;
-
- test %r12b, %r12b;
- jnz .L__enc_xor4;
-
write_block4();
popq %rbx;
popq %r12;
RET;
+SYM_FUNC_END(blowfish_enc_blk_4way)
-.L__enc_xor4:
- xor_block4();
-
- popq %rbx;
- popq %r12;
- RET;
-SYM_FUNC_END(__blowfish_enc_blk_4way)
-
-SYM_TYPED_FUNC_START(blowfish_dec_blk_4way)
+SYM_FUNC_START(__blowfish_dec_blk_4way)
/* input:
* %rdi: ctx
* %rsi: dst
* %rdx: src
+ * %rcx: cbc (bool)
*/
pushq %r12;
pushq %rbx;
+ pushq %rcx;
+ pushq %rdx;
movq %rdi, CTX;
- movq %rsi, %r11
+ movq %rsi, %r11;
movq %rdx, RIO;
preload_roundkey_dec(17);
@@ -359,6 +336,14 @@ SYM_TYPED_FUNC_START(blowfish_dec_blk_4way)
round_dec4(3);
add_preloaded_roundkey4();
+ popq RIO;
+ popq %r12;
+ testq %r12, %r12;
+ jz .L_no_cbc_xor;
+
+ xor_block4();
+
+.L_no_cbc_xor:
movq %r11, RIO;
write_block4();
@@ -366,4 +351,4 @@ SYM_TYPED_FUNC_START(blowfish_dec_blk_4way)
popq %r12;
RET;
-SYM_FUNC_END(blowfish_dec_blk_4way)
+SYM_FUNC_END(__blowfish_dec_blk_4way)
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 019c64c1340a..552f2df0643f 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -16,26 +16,28 @@
#include <linux/module.h>
#include <linux/types.h>
+#include "ecb_cbc_helpers.h"
+
/* regular block cipher functions */
-asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
- bool xor);
+asmlinkage void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
/* 4-way parallel cipher functions */
-asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
+asmlinkage void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
+asmlinkage void __blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
+ const u8 *src, bool cbc);
-static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
+static inline void blowfish_dec_ecb_4way(struct bf_ctx *ctx, u8 *dst,
+ const u8 *src)
{
- __blowfish_enc_blk(ctx, dst, src, false);
+ return __blowfish_dec_blk_4way(ctx, dst, src, false);
}
-static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
- const u8 *src)
+static inline void blowfish_dec_cbc_4way(struct bf_ctx *ctx, u8 *dst,
+ const u8 *src)
{
- __blowfish_enc_blk_4way(ctx, dst, src, false);
+ return __blowfish_dec_blk_4way(ctx, dst, src, true);
}
static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
@@ -54,183 +56,35 @@ static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm,
return blowfish_setkey(&tfm->base, key, keylen);
}
-static int ecb_crypt(struct skcipher_request *req,
- void (*fn)(struct bf_ctx *, u8 *, const u8 *),
- void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
-{
- unsigned int bsize = BF_BLOCK_SIZE;
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((nbytes = walk.nbytes)) {
- u8 *wsrc = walk.src.virt.addr;
- u8 *wdst = walk.dst.virt.addr;
-
- /* Process four block batch */
- if (nbytes >= bsize * 4) {
- do {
- fn_4way(ctx, wdst, wsrc);
-
- wsrc += bsize * 4;
- wdst += bsize * 4;
- nbytes -= bsize * 4;
- } while (nbytes >= bsize * 4);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- fn(ctx, wdst, wsrc);
-
- wsrc += bsize;
- wdst += bsize;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
-done:
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
static int ecb_encrypt(struct skcipher_request *req)
{
- return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way);
+ ECB_WALK_START(req, BF_BLOCK_SIZE, -1);
+ ECB_BLOCK(4, blowfish_enc_blk_4way);
+ ECB_BLOCK(1, blowfish_enc_blk);
+ ECB_WALK_END();
}
static int ecb_decrypt(struct skcipher_request *req)
{
- return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way);
-}
-
-static unsigned int __cbc_encrypt(struct bf_ctx *ctx,
- struct skcipher_walk *walk)
-{
- unsigned int bsize = BF_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u64 *src = (u64 *)walk->src.virt.addr;
- u64 *dst = (u64 *)walk->dst.virt.addr;
- u64 *iv = (u64 *)walk->iv;
-
- do {
- *dst = *src ^ *iv;
- blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
- iv = dst;
-
- src += 1;
- dst += 1;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
- *(u64 *)walk->iv = *iv;
- return nbytes;
+ ECB_WALK_START(req, BF_BLOCK_SIZE, -1);
+ ECB_BLOCK(4, blowfish_dec_ecb_4way);
+ ECB_BLOCK(1, blowfish_dec_blk);
+ ECB_WALK_END();
}
static int cbc_encrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while (walk.nbytes) {
- nbytes = __cbc_encrypt(ctx, &walk);
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static unsigned int __cbc_decrypt(struct bf_ctx *ctx,
- struct skcipher_walk *walk)
-{
- unsigned int bsize = BF_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u64 *src = (u64 *)walk->src.virt.addr;
- u64 *dst = (u64 *)walk->dst.virt.addr;
- u64 ivs[4 - 1];
- u64 last_iv;
-
- /* Start of the last block. */
- src += nbytes / bsize - 1;
- dst += nbytes / bsize - 1;
-
- last_iv = *src;
-
- /* Process four block batch */
- if (nbytes >= bsize * 4) {
- do {
- nbytes -= bsize * 4 - bsize;
- src -= 4 - 1;
- dst -= 4 - 1;
-
- ivs[0] = src[0];
- ivs[1] = src[1];
- ivs[2] = src[2];
-
- blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
-
- dst[1] ^= ivs[0];
- dst[2] ^= ivs[1];
- dst[3] ^= ivs[2];
-
- nbytes -= bsize;
- if (nbytes < bsize)
- goto done;
-
- *dst ^= *(src - 1);
- src -= 1;
- dst -= 1;
- } while (nbytes >= bsize * 4);
- }
-
- /* Handle leftovers */
- for (;;) {
- blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
-
- nbytes -= bsize;
- if (nbytes < bsize)
- break;
-
- *dst ^= *(src - 1);
- src -= 1;
- dst -= 1;
- }
-
-done:
- *dst ^= *(u64 *)walk->iv;
- *(u64 *)walk->iv = last_iv;
-
- return nbytes;
+ CBC_WALK_START(req, BF_BLOCK_SIZE, -1);
+ CBC_ENC_BLOCK(blowfish_enc_blk);
+ CBC_WALK_END();
}
static int cbc_decrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while (walk.nbytes) {
- nbytes = __cbc_decrypt(ctx, &walk);
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
+ CBC_WALK_START(req, BF_BLOCK_SIZE, -1);
+ CBC_DEC_BLOCK(4, blowfish_dec_cbc_4way);
+ CBC_DEC_BLOCK(1, blowfish_dec_blk);
+ CBC_WALK_END();
}
static struct crypto_alg bf_cipher_alg = {
diff --git a/arch/x86/crypto/ecb_cbc_helpers.h b/arch/x86/crypto/ecb_cbc_helpers.h
index eaa15c7b29d6..11955bd01af1 100644
--- a/arch/x86/crypto/ecb_cbc_helpers.h
+++ b/arch/x86/crypto/ecb_cbc_helpers.h
@@ -13,13 +13,14 @@
#define ECB_WALK_START(req, bsize, fpu_blocks) do { \
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); \
+ const int __fpu_blocks = (fpu_blocks); \
const int __bsize = (bsize); \
struct skcipher_walk walk; \
int err = skcipher_walk_virt(&walk, (req), false); \
while (walk.nbytes > 0) { \
unsigned int nbytes = walk.nbytes; \
- bool do_fpu = (fpu_blocks) != -1 && \
- nbytes >= (fpu_blocks) * __bsize; \
+ bool do_fpu = __fpu_blocks != -1 && \
+ nbytes >= __fpu_blocks * __bsize; \
const u8 *src = walk.src.virt.addr; \
u8 *dst = walk.dst.virt.addr; \
u8 __maybe_unused buf[(bsize)]; \
@@ -35,7 +36,12 @@
} while (0)
#define ECB_BLOCK(blocks, func) do { \
- while (nbytes >= (blocks) * __bsize) { \
+ const int __blocks = (blocks); \
+ if (do_fpu && __blocks < __fpu_blocks) { \
+ kernel_fpu_end(); \
+ do_fpu = false; \
+ } \
+ while (nbytes >= __blocks * __bsize) { \
(func)(ctx, dst, src); \
ECB_WALK_ADVANCE(blocks); \
} \
@@ -53,7 +59,12 @@
} while (0)
#define CBC_DEC_BLOCK(blocks, func) do { \
- while (nbytes >= (blocks) * __bsize) { \
+ const int __blocks = (blocks); \
+ if (do_fpu && __blocks < __fpu_blocks) { \
+ kernel_fpu_end(); \
+ do_fpu = false; \
+ } \
+ while (nbytes >= __blocks * __bsize) { \
const u8 *__iv = src + ((blocks) - 1) * __bsize; \
if (dst == src) \
__iv = memcpy(buf, __iv, __bsize); \
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 2bf871899920..257ed9446f3e 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -4,7 +4,7 @@
* instructions. This file contains accelerated part of ghash
* implementation. More information about PCLMULQDQ can be found at:
*
- * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
+ * https://www.intel.com/content/dam/develop/external/us/en/documents/clmul-wp-rev-2-02-2014-04-20.pdf
*
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <[email protected]>
@@ -88,7 +88,7 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
RET
SYM_FUNC_END(__clmul_gf128mul_ble)
-/* void clmul_ghash_mul(char *dst, const u128 *shash) */
+/* void clmul_ghash_mul(char *dst, const le128 *shash) */
SYM_FUNC_START(clmul_ghash_mul)
FRAME_BEGIN
movups (%rdi), DATA
@@ -104,7 +104,7 @@ SYM_FUNC_END(clmul_ghash_mul)
/*
* void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
- * const u128 *shash);
+ * const le128 *shash);
*/
SYM_FUNC_START(clmul_ghash_update)
FRAME_BEGIN
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 1f1a95f3dd0c..700ecaee9a08 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -19,21 +19,22 @@
#include <crypto/internal/simd.h>
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
+#include <asm/unaligned.h>
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
-void clmul_ghash_mul(char *dst, const u128 *shash);
+void clmul_ghash_mul(char *dst, const le128 *shash);
void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
- const u128 *shash);
+ const le128 *shash);
struct ghash_async_ctx {
struct cryptd_ahash *cryptd_tfm;
};
struct ghash_ctx {
- u128 shash;
+ le128 shash;
};
struct ghash_desc_ctx {
@@ -54,22 +55,40 @@ static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
- be128 *x = (be128 *)key;
u64 a, b;
if (keylen != GHASH_BLOCK_SIZE)
return -EINVAL;
- /* perform multiplication by 'x' in GF(2^128) */
- a = be64_to_cpu(x->a);
- b = be64_to_cpu(x->b);
-
- ctx->shash.a = (b << 1) | (a >> 63);
- ctx->shash.b = (a << 1) | (b >> 63);
-
+ /*
+ * GHASH maps bits to polynomial coefficients backwards, which makes it
+ * hard to implement. But it can be shown that the GHASH multiplication
+ *
+ * D * K (mod x^128 + x^7 + x^2 + x + 1)
+ *
+ * (where D is a data block and K is the key) is equivalent to:
+ *
+ * bitreflect(D) * bitreflect(K) * x^(-127)
+ * (mod x^128 + x^127 + x^126 + x^121 + 1)
+ *
+ * So, the code below precomputes:
+ *
+ * bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1)
+ *
+ * ... but in Montgomery form (so that Montgomery multiplication can be
+ * used), i.e. with an extra x^128 factor, which means actually:
+ *
+ * bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1)
+ *
+ * The within-a-byte part of bitreflect() cancels out GHASH's built-in
+ * reflection, and thus bitreflect() is actually a byteswap.
+ */
+ a = get_unaligned_be64(key);
+ b = get_unaligned_be64(key + 8);
+ ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63));
+ ctx->shash.b = cpu_to_le64((b << 1) | (a >> 63));
if (a >> 63)
- ctx->shash.b ^= ((u64)0xc2) << 56;
-
+ ctx->shash.a ^= cpu_to_le64((u64)0xc2 << 56);
return 0;
}
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 15739a2c0983..7ecd2aeeeffc 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -782,7 +782,7 @@ _ASM_NOKPROBE(common_interrupt_return)
/*
* Reload gs selector with exception handling
- * edi: new selector
+ * di: new selector
*
* Is in entry.text as it shouldn't be instrumented.
*/
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 8f80de627c60..b1a98fa38828 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -12,6 +12,7 @@
#include <asm/special_insns.h>
#include <asm/preempt.h>
#include <asm/asm.h>
+#include <asm/gsseg.h>
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index 407beebadaf4..4d4a47a3a8ab 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -9,7 +9,6 @@
*/
#include <linux/compiler.h>
-#include <linux/uaccess.h>
#include <asm/byteorder.h>
/**
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1a85e1fb0922..ce0c8f7d3218 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -32,6 +32,7 @@ enum cpuid_leafs
CPUID_8000_0007_EBX,
CPUID_7_EDX,
CPUID_8000_001F_EAX,
+ CPUID_8000_0021_EAX,
};
#define X86_CAP_FMT_NUM "%d:%d"
@@ -94,8 +95,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 20))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 21))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -118,8 +120,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 20))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 21))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 49973061b5bd..389ea336258f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 20 /* N 32-bit words worth of info */
+#define NCAPINTS 21 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -97,7 +97,7 @@
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
+/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
@@ -315,6 +315,7 @@
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */
#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */
+#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
@@ -429,6 +430,13 @@
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
+/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
+#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
+#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */
+#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */
+#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index ca97442e8d49..66eb5e1ac4fb 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -148,9 +148,14 @@ static __always_inline void local_db_restore(unsigned long dr7)
}
#ifdef CONFIG_CPU_SUP_AMD
-extern void set_dr_addr_mask(unsigned long mask, int dr);
+extern void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr);
+extern unsigned long amd_get_dr_addr_mask(unsigned int dr);
#else
-static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
+static inline void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr) { }
+static inline unsigned long amd_get_dr_addr_mask(unsigned int dr)
+{
+ return 0;
+}
#endif
#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index c44b56f7ffba..5dfa4fb76f4b 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -124,6 +124,7 @@
#define DISABLED_MASK17 0
#define DISABLED_MASK18 0
#define DISABLED_MASK19 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
+#define DISABLED_MASK20 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/gsseg.h b/arch/x86/include/asm/gsseg.h
new file mode 100644
index 000000000000..ab6a595cea70
--- /dev/null
+++ b/arch/x86/include/asm/gsseg.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_X86_GSSEG_H
+#define _ASM_X86_GSSEG_H
+
+#include <linux/types.h>
+
+#include <asm/asm.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
+#include <asm/processor.h>
+#include <asm/nops.h>
+
+#ifdef CONFIG_X86_64
+
+extern asmlinkage void asm_load_gs_index(u16 selector);
+
+/* Replace with "lkgs %di" once binutils support LKGS instruction */
+#define LKGS_DI _ASM_BYTES(0xf2,0x0f,0x00,0xf7)
+
+static inline void native_lkgs(unsigned int selector)
+{
+ u16 sel = selector;
+ asm_inline volatile("1: " LKGS_DI
+ _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k[sel])
+ : [sel] "+D" (sel));
+}
+
+static inline void native_load_gs_index(unsigned int selector)
+{
+ if (cpu_feature_enabled(X86_FEATURE_LKGS)) {
+ native_lkgs(selector);
+ } else {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ asm_load_gs_index(selector);
+ local_irq_restore(flags);
+ }
+}
+
+#endif /* CONFIG_X86_64 */
+
+static inline void __init lkgs_init(void)
+{
+#ifdef CONFIG_PARAVIRT_XXL
+#ifdef CONFIG_X86_64
+ if (cpu_feature_enabled(X86_FEATURE_LKGS))
+ pv_ops.cpu.load_gs_index = native_lkgs;
+#endif
+#endif
+}
+
+#ifndef CONFIG_PARAVIRT_XXL
+
+static inline void load_gs_index(unsigned int selector)
+{
+#ifdef CONFIG_X86_64
+ native_load_gs_index(selector);
+#else
+ loadsegment(gs, selector);
+#endif
+}
+
+#endif /* CONFIG_PARAVIRT_XXL */
+
+#endif /* _ASM_X86_GSSEG_H */
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 08e822bd7aa6..566ac26239ba 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -116,6 +116,9 @@
/* Recommend using the newer ExProcessorMasks interface */
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
+/* Indicates that the hypervisor is nested within a Hyper-V partition. */
+#define HV_X64_HYPERV_NESTED BIT(12)
+
/* Recommend using enlightened VMCS */
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
@@ -225,6 +228,17 @@ enum hv_isolation_type {
#define HV_REGISTER_SINT15 0x4000009F
/*
+ * Define synthetic interrupt controller model specific registers for
+ * nested hypervisor.
+ */
+#define HV_REGISTER_NESTED_SCONTROL 0x40001080
+#define HV_REGISTER_NESTED_SVERSION 0x40001081
+#define HV_REGISTER_NESTED_SIEFP 0x40001082
+#define HV_REGISTER_NESTED_SIMP 0x40001083
+#define HV_REGISTER_NESTED_EOM 0x40001084
+#define HV_REGISTER_NESTED_SINT0 0x40001090
+
+/*
* Synthetic Timer MSRs. Four timers per vcpu.
*/
#define HV_REGISTER_STIMER0_CONFIG 0x400000B0
@@ -368,7 +382,8 @@ struct hv_nested_enlightenments_control {
__u32 reserved:31;
} features;
struct {
- __u32 reserved;
+ __u32 inter_partition_comm:1;
+ __u32 reserved:31;
} hypercallControls;
} __packed;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index b8d40ddeab00..e01aa74a6de7 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -12,6 +12,7 @@
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
#include <asm/debugreg.h>
+#include <asm/gsseg.h>
extern atomic64_t last_mm_ctx_id;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 6d502f3efb0f..4c4c0ec3b62e 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -72,10 +72,16 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return hv_status;
}
+/* Hypercall to the L0 hypervisor */
+static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output)
+{
+ return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output);
+}
+
/* Fast hypercall with 8 bytes of input and no output */
-static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 hv_status;
#ifdef CONFIG_X86_64
{
@@ -103,10 +109,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
return hv_status;
}
+static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ return _hv_do_fast_hypercall8(control, input1);
+}
+
+static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
+
+ return _hv_do_fast_hypercall8(control, input1);
+}
+
/* Fast hypercall with 16 bytes of input */
-static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 hv_status;
#ifdef CONFIG_X86_64
{
@@ -137,6 +157,20 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
return hv_status;
}
+static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ return _hv_do_fast_hypercall16(control, input1, input2);
+}
+
+static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
+
+ return _hv_do_fast_hypercall16(control, input1, input2);
+}
+
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
@@ -190,36 +224,20 @@ extern bool hv_isolation_type_snp(void);
static inline bool hv_is_synic_reg(unsigned int reg)
{
- if ((reg >= HV_REGISTER_SCONTROL) &&
- (reg <= HV_REGISTER_SINT15))
- return true;
- return false;
+ return (reg >= HV_REGISTER_SCONTROL) &&
+ (reg <= HV_REGISTER_SINT15);
}
-static inline u64 hv_get_register(unsigned int reg)
+static inline bool hv_is_sint_reg(unsigned int reg)
{
- u64 value;
-
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
- hv_ghcb_msr_read(reg, &value);
- else
- rdmsrl(reg, value);
- return value;
+ return (reg >= HV_REGISTER_SINT0) &&
+ (reg <= HV_REGISTER_SINT15);
}
-static inline void hv_set_register(unsigned int reg, u64 value)
-{
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
- hv_ghcb_msr_write(reg, value);
-
- /* Write proxy bit via wrmsl instruction */
- if (reg >= HV_REGISTER_SINT0 &&
- reg <= HV_REGISTER_SINT15)
- wrmsrl(reg, value | 1 << 20);
- } else {
- wrmsrl(reg, value);
- }
-}
+u64 hv_get_register(unsigned int reg);
+void hv_set_register(unsigned int reg, u64 value);
+u64 hv_get_non_nested_register(unsigned int reg);
+void hv_set_non_nested_register(unsigned int reg, u64 value);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
@@ -239,6 +257,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
}
static inline void hv_set_register(unsigned int reg, u64 value) { }
static inline u64 hv_get_register(unsigned int reg) { return 0; }
+static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { }
+static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
bool visible)
{
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 28d334fa8588..ad35355ee43e 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -25,6 +25,7 @@
#define _EFER_SVME 12 /* Enable virtualization */
#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */
#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
+#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */
#define EFER_SCE (1<<_EFER_SCE)
#define EFER_LME (1<<_EFER_LME)
@@ -33,6 +34,7 @@
#define EFER_SVME (1<<_EFER_SVME)
#define EFER_LMSLE (1<<_EFER_LMSLE)
#define EFER_FFXSR (1<<_EFER_FFXSR)
+#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
/* Intel MSRs. Some also available on other CPUs */
@@ -49,6 +51,10 @@
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+/* A mask for bits which the kernel toggles when controlling mitigations */
+#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
+ | SPEC_CTRL_RRSBA_DIS_S)
+
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index aff774775c67..7ba1726b71c7 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -98,6 +98,7 @@
#define REQUIRED_MASK17 0
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
+#define REQUIRED_MASK20 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index f358a23f228d..de48d1389936 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -120,17 +120,6 @@ static __always_inline void native_wbinvd(void)
asm volatile("wbinvd": : :"memory");
}
-extern asmlinkage void asm_load_gs_index(unsigned int selector);
-
-static inline void native_load_gs_index(unsigned int selector)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- asm_load_gs_index(selector);
- local_irq_restore(flags);
-}
-
static inline unsigned long __read_cr4(void)
{
return native_read_cr4();
@@ -184,16 +173,6 @@ static __always_inline void wbinvd(void)
native_wbinvd();
}
-
-static inline void load_gs_index(unsigned int selector)
-{
-#ifdef CONFIG_X86_64
- native_load_gs_index(selector);
-#else
- loadsegment(gs, selector);
-#endif
-}
-
#endif /* CONFIG_PARAVIRT_XXL */
static __always_inline void clflush(volatile void *__p)
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 82c783da16a8..ef9e951415c5 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -7,6 +7,7 @@
#define COMPILE_OFFSETS
#include <linux/crypto.h>
+#include <crypto/aria.h>
#include <linux/sched.h>
#include <linux/stddef.h>
#include <linux/hardirq.h>
@@ -111,5 +112,12 @@ static void __used common(void)
#ifdef CONFIG_CALL_DEPTH_TRACKING
OFFSET(X86_call_depth, pcpu_hot, call_depth);
#endif
+#if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64)
+ /* Offset for fields in aria_ctx */
+ BLANK();
+ OFFSET(ARIA_CTX_enc_key, aria_ctx, enc_key);
+ OFFSET(ARIA_CTX_dec_key, aria_ctx, dec_key);
+ OFFSET(ARIA_CTX_rounds, aria_ctx, rounds);
+#endif
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f769d6d08b43..380753b14cab 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -956,7 +956,7 @@ static void init_amd(struct cpuinfo_x86 *c)
init_amd_cacheinfo(c);
- if (cpu_has(c, X86_FEATURE_XMM2)) {
+ if (!cpu_has(c, X86_FEATURE_LFENCE_RDTSC) && cpu_has(c, X86_FEATURE_XMM2)) {
/*
* Use LFENCE for execution serialization. On families which
* don't have that MSR, LFENCE is already serializing.
@@ -1158,24 +1158,43 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
return false;
}
-void set_dr_addr_mask(unsigned long mask, int dr)
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long[4], amd_dr_addr_mask);
+
+static unsigned int amd_msr_dr_addr_masks[] = {
+ MSR_F16H_DR0_ADDR_MASK,
+ MSR_F16H_DR1_ADDR_MASK,
+ MSR_F16H_DR1_ADDR_MASK + 1,
+ MSR_F16H_DR1_ADDR_MASK + 2
+};
+
+void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr)
{
- if (!boot_cpu_has(X86_FEATURE_BPEXT))
+ int cpu = smp_processor_id();
+
+ if (!cpu_feature_enabled(X86_FEATURE_BPEXT))
return;
- switch (dr) {
- case 0:
- wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
- break;
- case 1:
- case 2:
- case 3:
- wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
- break;
- default:
- break;
- }
+ if (WARN_ON_ONCE(dr >= ARRAY_SIZE(amd_msr_dr_addr_masks)))
+ return;
+
+ if (per_cpu(amd_dr_addr_mask, cpu)[dr] == mask)
+ return;
+
+ wrmsr(amd_msr_dr_addr_masks[dr], mask, 0);
+ per_cpu(amd_dr_addr_mask, cpu)[dr] = mask;
+}
+
+unsigned long amd_get_dr_addr_mask(unsigned int dr)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_BPEXT))
+ return 0;
+
+ if (WARN_ON_ONCE(dr >= ARRAY_SIZE(amd_msr_dr_addr_masks)))
+ return 0;
+
+ return per_cpu(amd_dr_addr_mask[dr], smp_processor_id());
}
+EXPORT_SYMBOL_GPL(amd_get_dr_addr_mask);
u32 amd_get_highest_perf(void)
{
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 85168740f76a..cf81848b72f4 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -33,6 +33,7 @@
#include <asm/e820/api.h>
#include <asm/hypervisor.h>
#include <asm/tlbflush.h>
+#include <asm/cpu.h>
#include "cpu.h"
@@ -144,9 +145,17 @@ void __init check_bugs(void)
* have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
* init code as it is not enumerated and depends on the family.
*/
- if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ if (cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) {
rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ /*
+ * Previously running kernel (kexec), may have some controls
+ * turned ON. Clear them and let the mitigations setup below
+ * rediscover them based on configuration.
+ */
+ x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
+ }
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@@ -1229,9 +1238,9 @@ static const char * const spectre_v2_strings[] = {
[SPECTRE_V2_NONE] = "Vulnerable",
[SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
[SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
- [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
- [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
- [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
+ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS",
+ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE",
+ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines",
[SPECTRE_V2_IBRS] = "Mitigation: IBRS",
};
@@ -1300,7 +1309,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
!boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
- pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n",
+ pr_err("%s selected but CPU doesn't have Enhanced or Automatic IBRS. Switching to AUTO select\n",
mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
}
@@ -1486,8 +1495,12 @@ static void __init spectre_v2_select_mitigation(void)
pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
if (spectre_v2_in_ibrs_mode(mode)) {
- x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
- update_spec_ctrl(x86_spec_ctrl_base);
+ if (boot_cpu_has(X86_FEATURE_AUTOIBRS)) {
+ msr_set_bit(MSR_EFER, _EFER_AUTOIBRS);
+ } else {
+ x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ }
}
switch (mode) {
@@ -1571,8 +1584,8 @@ static void __init spectre_v2_select_mitigation(void)
/*
* Retpoline protects the kernel, but doesn't protect firmware. IBRS
* and Enhanced IBRS protect firmware too, so enable IBRS around
- * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
- * enabled.
+ * firmware calls only when IBRS / Enhanced / Automatic IBRS aren't
+ * otherwise enabled.
*
* Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
* the user might select retpoline on the kernel command line and if
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 65ceabb2e114..a394bbba7a4b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1093,6 +1093,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
if (c->extended_cpuid_level >= 0x8000001f)
c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f);
+ if (c->extended_cpuid_level >= 0x80000021)
+ c->x86_capability[CPUID_8000_0021_EAX] = cpuid_eax(0x80000021);
+
init_scattered_cpuid_features(c);
init_speculation_control(c);
@@ -1226,8 +1229,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
/* Zhaoxin Family 7 */
VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
@@ -1340,8 +1343,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
- if (ia32_cap & ARCH_CAP_IBRS_ALL)
+ /*
+ * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature
+ * flag and protect from vendor-specific bugs via the whitelist.
+ */
+ if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) {
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
+ if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
+ !(ia32_cap & ARCH_CAP_PBRSB_NO))
+ setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
+ }
if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
!(ia32_cap & ARCH_CAP_MDS_NO)) {
@@ -1403,11 +1414,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_RETBLEED);
}
- if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&
- !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
- !(ia32_cap & ARCH_CAP_PBRSB_NO))
- setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
-
if (cpu_matches(cpu_vuln_blacklist, SMT_RSB))
setup_force_cpu_bug(X86_BUG_SMT_RSB);
@@ -1687,9 +1693,7 @@ void check_null_seg_clears_base(struct cpuinfo_x86 *c)
if (!IS_ENABLED(CONFIG_X86_64))
return;
- /* Zen3 CPUs advertise Null Selector Clears Base in CPUID. */
- if (c->extended_cpuid_level >= 0x80000021 &&
- cpuid_eax(0x80000021) & BIT(6))
+ if (cpu_has(c, X86_FEATURE_NULL_SEL_CLR_BASE))
return;
/*
@@ -1964,6 +1968,7 @@ void __init identify_boot_cpu(void)
setup_cr_pinning();
tsx_init();
+ lkgs_init();
}
void identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 7c9b5893c30a..57a5349e6954 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -83,6 +83,4 @@ unsigned int aperfmperf_get_khz(int cpu);
extern void x86_spec_ctrl_setup_ap(void);
extern void update_srbds_msr(void);
-extern u64 x86_read_arch_cap_msr(void);
-
#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 46668e255421..f924a76c6923 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -37,9 +37,76 @@
/* Is Linux running as the root partition? */
bool hv_root_partition;
+/* Is Linux running on nested Microsoft Hypervisor */
+bool hv_nested;
struct ms_hyperv_info ms_hyperv;
#if IS_ENABLED(CONFIG_HYPERV)
+static inline unsigned int hv_get_nested_reg(unsigned int reg)
+{
+ if (hv_is_sint_reg(reg))
+ return reg - HV_REGISTER_SINT0 + HV_REGISTER_NESTED_SINT0;
+
+ switch (reg) {
+ case HV_REGISTER_SIMP:
+ return HV_REGISTER_NESTED_SIMP;
+ case HV_REGISTER_SIEFP:
+ return HV_REGISTER_NESTED_SIEFP;
+ case HV_REGISTER_SVERSION:
+ return HV_REGISTER_NESTED_SVERSION;
+ case HV_REGISTER_SCONTROL:
+ return HV_REGISTER_NESTED_SCONTROL;
+ case HV_REGISTER_EOM:
+ return HV_REGISTER_NESTED_EOM;
+ default:
+ return reg;
+ }
+}
+
+u64 hv_get_non_nested_register(unsigned int reg)
+{
+ u64 value;
+
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
+ hv_ghcb_msr_read(reg, &value);
+ else
+ rdmsrl(reg, value);
+ return value;
+}
+EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
+
+void hv_set_non_nested_register(unsigned int reg, u64 value)
+{
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
+ hv_ghcb_msr_write(reg, value);
+
+ /* Write proxy bit via wrmsl instruction */
+ if (hv_is_sint_reg(reg))
+ wrmsrl(reg, value | 1 << 20);
+ } else {
+ wrmsrl(reg, value);
+ }
+}
+EXPORT_SYMBOL_GPL(hv_set_non_nested_register);
+
+u64 hv_get_register(unsigned int reg)
+{
+ if (hv_nested)
+ reg = hv_get_nested_reg(reg);
+
+ return hv_get_non_nested_register(reg);
+}
+EXPORT_SYMBOL_GPL(hv_get_register);
+
+void hv_set_register(unsigned int reg, u64 value)
+{
+ if (hv_nested)
+ reg = hv_get_nested_reg(reg);
+
+ hv_set_non_nested_register(reg, value);
+}
+EXPORT_SYMBOL_GPL(hv_set_register);
+
static void (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
@@ -301,6 +368,11 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: running as root partition\n");
}
+ if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) {
+ hv_nested = true;
+ pr_info("Hyper-V: running on a nested hypervisor\n");
+ }
+
/*
* Extract host information.
*/
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
index 8009c8346d8f..b31ee4f1657a 100644
--- a/arch/x86/kernel/cpu/tsx.c
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -11,6 +11,7 @@
#include <linux/cpufeature.h>
#include <asm/cmdline.h>
+#include <asm/cpu.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index bbb0f737aab1..b01644c949b2 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -127,7 +127,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
set_debugreg(*dr7, 7);
if (info->mask)
- set_dr_addr_mask(info->mask, i);
+ amd_set_dr_addr_mask(info->mask, i);
return 0;
}
@@ -166,7 +166,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
set_debugreg(dr7, 7);
if (info->mask)
- set_dr_addr_mask(0, i);
+ amd_set_dr_addr_mask(0, i);
/*
* Ensure the write to cpu_dr7 is after we've set the DR7 register.
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index e57e07b0edb6..57b0037d0a99 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -46,8 +46,8 @@ unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
/* This function only handles jump-optimized kprobe */
if (kp && kprobe_optimized(kp)) {
op = container_of(kp, struct optimized_kprobe, kp);
- /* If op->list is not empty, op is under optimizing */
- if (list_empty(&op->list))
+ /* If op is optimized or under unoptimizing */
+ if (list_empty(&op->list) || optprobe_queued_unopt(op))
goto found;
}
}
@@ -353,7 +353,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)
for (i = 1; i < op->optinsn.size; i++) {
p = get_kprobe(op->kp.addr + i);
- if (p && !kprobe_disabled(p))
+ if (p && !kprobe_disarmed(p))
return -EEXIST;
}
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 705fb2a41d7d..84ad0e61ba6e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -129,22 +129,27 @@ int apply_relocate(Elf32_Shdr *sechdrs,
return 0;
}
#else /*X86_64*/
-static int __apply_relocate_add(Elf64_Shdr *sechdrs,
+static int __write_relocate_add(Elf64_Shdr *sechdrs,
const char *strtab,
unsigned int symindex,
unsigned int relsec,
struct module *me,
- void *(*write)(void *dest, const void *src, size_t len))
+ void *(*write)(void *dest, const void *src, size_t len),
+ bool apply)
{
unsigned int i;
Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
Elf64_Sym *sym;
void *loc;
u64 val;
+ u64 zero = 0ULL;
- DEBUGP("Applying relocate section %u to %u\n",
+ DEBUGP("%s relocate section %u to %u\n",
+ apply ? "Applying" : "Clearing",
relsec, sechdrs[relsec].sh_info);
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+ size_t size;
+
/* This is where to make the change */
loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ rel[i].r_offset;
@@ -162,56 +167,53 @@ static int __apply_relocate_add(Elf64_Shdr *sechdrs,
switch (ELF64_R_TYPE(rel[i].r_info)) {
case R_X86_64_NONE:
- break;
+ continue; /* nothing to write */
case R_X86_64_64:
- if (*(u64 *)loc != 0)
- goto invalid_relocation;
- write(loc, &val, 8);
+ size = 8;
break;
case R_X86_64_32:
- if (*(u32 *)loc != 0)
- goto invalid_relocation;
- write(loc, &val, 4);
- if (val != *(u32 *)loc)
+ if (val != *(u32 *)&val)
goto overflow;
+ size = 4;
break;
case R_X86_64_32S:
- if (*(s32 *)loc != 0)
- goto invalid_relocation;
- write(loc, &val, 4);
- if ((s64)val != *(s32 *)loc)
+ if ((s64)val != *(s32 *)&val)
goto overflow;
+ size = 4;
break;
case R_X86_64_PC32:
case R_X86_64_PLT32:
- if (*(u32 *)loc != 0)
- goto invalid_relocation;
val -= (u64)loc;
- write(loc, &val, 4);
-#if 0
- if ((s64)val != *(s32 *)loc)
- goto overflow;
-#endif
+ size = 4;
break;
case R_X86_64_PC64:
- if (*(u64 *)loc != 0)
- goto invalid_relocation;
val -= (u64)loc;
- write(loc, &val, 8);
+ size = 8;
break;
default:
pr_err("%s: Unknown rela relocation: %llu\n",
me->name, ELF64_R_TYPE(rel[i].r_info));
return -ENOEXEC;
}
+
+ if (apply) {
+ if (memcmp(loc, &zero, size)) {
+ pr_err("x86/modules: Invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n",
+ (int)ELF64_R_TYPE(rel[i].r_info), loc, val);
+ return -ENOEXEC;
+ }
+ write(loc, &val, size);
+ } else {
+ if (memcmp(loc, &val, size)) {
+ pr_warn("x86/modules: Invalid relocation target, existing value does not match expected value for type %d, loc %p, val %Lx\n",
+ (int)ELF64_R_TYPE(rel[i].r_info), loc, val);
+ return -ENOEXEC;
+ }
+ write(loc, &zero, size);
+ }
}
return 0;
-invalid_relocation:
- pr_err("x86/modules: Skipping invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n",
- (int)ELF64_R_TYPE(rel[i].r_info), loc, val);
- return -ENOEXEC;
-
overflow:
pr_err("overflow in relocation type %d val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), val);
@@ -220,11 +222,12 @@ overflow:
return -ENOEXEC;
}
-int apply_relocate_add(Elf64_Shdr *sechdrs,
- const char *strtab,
- unsigned int symindex,
- unsigned int relsec,
- struct module *me)
+static int write_relocate_add(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me,
+ bool apply)
{
int ret;
bool early = me->state == MODULE_STATE_UNFORMED;
@@ -235,8 +238,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
mutex_lock(&text_mutex);
}
- ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
- write);
+ ret = __write_relocate_add(sechdrs, strtab, symindex, relsec, me,
+ write, apply);
if (!early) {
text_poke_sync();
@@ -246,6 +249,26 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
return ret;
}
+int apply_relocate_add(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ return write_relocate_add(sechdrs, strtab, symindex, relsec, me, true);
+}
+
+#ifdef CONFIG_LIVEPATCH
+void clear_relocate_add(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ write_relocate_add(sechdrs, strtab, symindex, relsec, me, false);
+}
+#endif
+
#endif
int module_finalize(const Elf_Ehdr *hdr,
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index cec0bfa3bc04..c315b18ec7c8 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -69,6 +69,15 @@ struct nmi_stats {
unsigned int unknown;
unsigned int external;
unsigned int swallow;
+ unsigned long recv_jiffies;
+ unsigned long idt_seq;
+ unsigned long idt_nmi_seq;
+ unsigned long idt_ignored;
+ atomic_long_t idt_calls;
+ unsigned long idt_seq_snap;
+ unsigned long idt_nmi_seq_snap;
+ unsigned long idt_ignored_snap;
+ long idt_calls_snap;
};
static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
@@ -479,12 +488,15 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7);
DEFINE_IDTENTRY_RAW(exc_nmi)
{
irqentry_state_t irq_state;
+ struct nmi_stats *nsp = this_cpu_ptr(&nmi_stats);
/*
* Re-enable NMIs right here when running as an SEV-ES guest. This might
* cause nested NMIs, but those can be handled safely.
*/
sev_es_nmi_complete();
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
+ arch_atomic_long_inc(&nsp->idt_calls);
if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return;
@@ -495,6 +507,11 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
}
this_cpu_write(nmi_state, NMI_EXECUTING);
this_cpu_write(nmi_cr2, read_cr2());
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
+ WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1);
+ WARN_ON_ONCE(!(nsp->idt_seq & 0x1));
+ WRITE_ONCE(nsp->recv_jiffies, jiffies);
+ }
nmi_restart:
/*
@@ -509,8 +526,19 @@ nmi_restart:
inc_irq_stat(__nmi_count);
- if (!ignore_nmis)
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) {
+ WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1);
+ } else if (!ignore_nmis) {
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
+ WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
+ WARN_ON_ONCE(!(nsp->idt_nmi_seq & 0x1));
+ }
default_do_nmi(regs);
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
+ WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
+ WARN_ON_ONCE(nsp->idt_nmi_seq & 0x1);
+ }
+ }
irqentry_nmi_exit(regs, irq_state);
@@ -525,6 +553,11 @@ nmi_restart:
if (user_mode(regs))
mds_user_clear_cpu_buffers();
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
+ WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1);
+ WARN_ON_ONCE(nsp->idt_seq & 0x1);
+ WRITE_ONCE(nsp->recv_jiffies, jiffies);
+ }
}
#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
@@ -537,6 +570,79 @@ DEFINE_IDTENTRY_RAW(exc_nmi_noist)
EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
#endif
+#ifdef CONFIG_NMI_CHECK_CPU
+
+static char *nmi_check_stall_msg[] = {
+/* */
+/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */
+/* | +------ cpu_is_offline(cpu) */
+/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */
+/* | | | NMI handler has been invoked. */
+/* | | | */
+/* V V V */
+/* 0 0 0 */ "NMIs are not reaching exc_nmi() handler",
+/* 0 0 1 */ "exc_nmi() handler is ignoring NMIs",
+/* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi() handler",
+/* 0 1 1 */ "CPU is offline and exc_nmi() handler is legitimately ignoring NMIs",
+/* 1 0 0 */ "CPU is in exc_nmi() handler and no further NMIs are reaching handler",
+/* 1 0 1 */ "CPU is in exc_nmi() handler which is legitimately ignoring NMIs",
+/* 1 1 0 */ "CPU is offline in exc_nmi() handler and no more NMIs are reaching exc_nmi() handler",
+/* 1 1 1 */ "CPU is offline in exc_nmi() handler which is legitimately ignoring NMIs",
+};
+
+void nmi_backtrace_stall_snap(const struct cpumask *btp)
+{
+ int cpu;
+ struct nmi_stats *nsp;
+
+ for_each_cpu(cpu, btp) {
+ nsp = per_cpu_ptr(&nmi_stats, cpu);
+ nsp->idt_seq_snap = READ_ONCE(nsp->idt_seq);
+ nsp->idt_nmi_seq_snap = READ_ONCE(nsp->idt_nmi_seq);
+ nsp->idt_ignored_snap = READ_ONCE(nsp->idt_ignored);
+ nsp->idt_calls_snap = atomic_long_read(&nsp->idt_calls);
+ }
+}
+
+void nmi_backtrace_stall_check(const struct cpumask *btp)
+{
+ int cpu;
+ int idx;
+ unsigned long nmi_seq;
+ unsigned long j = jiffies;
+ char *modp;
+ char *msgp;
+ char *msghp;
+ struct nmi_stats *nsp;
+
+ for_each_cpu(cpu, btp) {
+ nsp = per_cpu_ptr(&nmi_stats, cpu);
+ modp = "";
+ msghp = "";
+ nmi_seq = READ_ONCE(nsp->idt_nmi_seq);
+ if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) {
+ msgp = "CPU entered NMI handler function, but has not exited";
+ } else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) {
+ msgp = "CPU is handling NMIs";
+ } else {
+ idx = ((nsp->idt_seq_snap & 0x1) << 2) |
+ (cpu_is_offline(cpu) << 1) |
+ (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls));
+ msgp = nmi_check_stall_msg[idx];
+ if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
+ modp = ", but OK because ignore_nmis was set";
+ if (nmi_seq & ~0x1)
+ msghp = " (CPU currently in NMI handler function)";
+ else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
+ msghp = " (CPU exited one NMI handler function)";
+ }
+ pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n",
+ __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies));
+ }
+}
+
+#endif
+
void stop_nmi(void)
{
ignore_nmis++;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 5bf4f0b2f35d..42e182868873 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -32,6 +32,7 @@
#include <asm/special_insns.h>
#include <asm/tlb.h>
#include <asm/io_bitmap.h>
+#include <asm/gsseg.h>
/*
* nop stub, which must not clobber anything *including the stack* to
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index f042dcdf1f16..9027fc088f97 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -31,6 +31,7 @@
#include <asm/sigframe.h>
#include <asm/sighandling.h>
#include <asm/smap.h>
+#include <asm/gsseg.h>
#ifdef CONFIG_IA32_EMULATION
#include <asm/ia32_unistd.h>
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 3c883e064242..3ffbab0081f4 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -12,6 +12,7 @@
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/proto.h>
+#include <asm/gsseg.h>
#include "tls.h"
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 596061c1610e..7f1b585f9a67 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -741,6 +741,27 @@ void kvm_set_cpu_caps(void)
0 /* SME */ | F(SEV) | 0 /* VM_PAGE_FLUSH */ | F(SEV_ES) |
F(SME_COHERENT));
+ kvm_cpu_cap_mask(CPUID_8000_0021_EAX,
+ F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ |
+ F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */
+ );
+
+ /*
+ * Synthesize "LFENCE is serializing" into the AMD-defined entry in
+ * KVM's supported CPUID if the feature is reported as supported by the
+ * kernel. LFENCE_RDTSC was a Linux-defined synthetic feature long
+ * before AMD joined the bandwagon, e.g. LFENCE is serializing on most
+ * CPUs that support SSE2. On CPUs that don't support AMD's leaf,
+ * kvm_cpu_cap_mask() will unfortunately drop the flag due to ANDing
+ * the mask with the raw host CPUID, and reporting support in AMD's
+ * leaf can make it easier for userspace to detect the feature.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
+ kvm_cpu_cap_set(X86_FEATURE_LFENCE_RDTSC);
+ if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
+ kvm_cpu_cap_set(X86_FEATURE_NULL_SEL_CLR_BASE);
+ kvm_cpu_cap_set(X86_FEATURE_NO_SMM_CTL_MSR);
+
kvm_cpu_cap_mask(CPUID_C000_0001_EDX,
F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
@@ -1222,25 +1243,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
break;
case 0x80000021:
entry->ebx = entry->ecx = entry->edx = 0;
- /*
- * Pass down these bits:
- * EAX 0 NNDBP, Processor ignores nested data breakpoints
- * EAX 2 LAS, LFENCE always serializing
- * EAX 6 NSCB, Null selector clear base
- *
- * Other defined bits are for MSRs that KVM does not expose:
- * EAX 3 SPCL, SMM page configuration lock
- * EAX 13 PCMSR, Prefetch control MSR
- *
- * KVM doesn't support SMM_CTL.
- * EAX 9 SMM_CTL MSR is not supported
- */
- entry->eax &= BIT(0) | BIT(2) | BIT(6);
- entry->eax |= BIT(9);
- if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC))
- entry->eax |= BIT(2);
- if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
- entry->eax |= BIT(6);
+ cpuid_entry_override(entry, CPUID_8000_0021_EAX);
break;
/*Add support for Centaur's CPUID instruction*/
case 0xC0000000:
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index 042d0aca3c92..81f4e9ce0c77 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -68,6 +68,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
[CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
[CPUID_7_1_EDX] = { 7, 1, CPUID_EDX},
+ [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX},
};
/*
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9a194aa1a75a..60c7c880266b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4969,6 +4969,9 @@ static __init int svm_hardware_setup(void)
tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
+ if (boot_cpu_has(X86_FEATURE_AUTOIBRS))
+ kvm_enable_efer_bits(EFER_AUTOIBRS);
+
/* Check for pause filtering support */
if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
pause_filter_count = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a2c299d47e69..0735fbc9ba8c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1689,6 +1689,9 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
{
+ if (efer & EFER_AUTOIBRS && !guest_cpuid_has(vcpu, X86_FEATURE_AUTOIBRS))
+ return false;
+
if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
return false;
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index d12d1358f96d..5168ee0360b2 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1047,6 +1047,7 @@ GrpTable: Grp6
3: LTR Ew
4: VERR Ew
5: VERW Ew
+6: LKGS Ew (F2)
EndTable
GrpTable: Grp7
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b808be77635e..1056bbf55b17 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1003,6 +1003,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
u8 b2 = 0, b3 = 0;
u8 *start_of_ldx;
s64 jmp_offset;
+ s16 insn_off;
u8 jmp_cond;
u8 *func;
int nops;
@@ -1369,57 +1370,52 @@ st: if (is_imm8(insn->off))
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
case BPF_LDX | BPF_MEM | BPF_DW:
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ insn_off = insn->off;
+
if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
- /* Though the verifier prevents negative insn->off in BPF_PROBE_MEM
- * add abs(insn->off) to the limit to make sure that negative
- * offset won't be an issue.
- * insn->off is s16, so it won't affect valid pointers.
+ /* Conservatively check that src_reg + insn->off is a kernel address:
+ * src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE
+ * src_reg is used as scratch for src_reg += insn->off and restored
+ * after emit_ldx if necessary
*/
- u64 limit = TASK_SIZE_MAX + PAGE_SIZE + abs(insn->off);
- u8 *end_of_jmp1, *end_of_jmp2;
- /* Conservatively check that src_reg + insn->off is a kernel address:
- * 1. src_reg + insn->off >= limit
- * 2. src_reg + insn->off doesn't become small positive.
- * Cannot do src_reg + insn->off >= limit in one branch,
- * since it needs two spare registers, but JIT has only one.
+ u64 limit = TASK_SIZE_MAX + PAGE_SIZE;
+ u8 *end_of_jmp;
+
+ /* At end of these emitted checks, insn->off will have been added
+ * to src_reg, so no need to do relative load with insn->off offset
*/
+ insn_off = 0;
/* movabsq r11, limit */
EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG));
EMIT((u32)limit, 4);
EMIT(limit >> 32, 4);
+
+ if (insn->off) {
+ /* add src_reg, insn->off */
+ maybe_emit_1mod(&prog, src_reg, true);
+ EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off);
+ }
+
/* cmp src_reg, r11 */
maybe_emit_mod(&prog, src_reg, AUX_REG, true);
EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG));
- /* if unsigned '<' goto end_of_jmp2 */
- EMIT2(X86_JB, 0);
- end_of_jmp1 = prog;
-
- /* mov r11, src_reg */
- emit_mov_reg(&prog, true, AUX_REG, src_reg);
- /* add r11, insn->off */
- maybe_emit_1mod(&prog, AUX_REG, true);
- EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off);
- /* jmp if not carry to start_of_ldx
- * Otherwise ERR_PTR(-EINVAL) + 128 will be the user addr
- * that has to be rejected.
- */
- EMIT2(0x73 /* JNC */, 0);
- end_of_jmp2 = prog;
+
+ /* if unsigned '>=', goto load */
+ EMIT2(X86_JAE, 0);
+ end_of_jmp = prog;
/* xor dst_reg, dst_reg */
emit_mov_imm32(&prog, false, dst_reg, 0);
/* jmp byte_after_ldx */
EMIT2(0xEB, 0);
- /* populate jmp_offset for JB above to jump to xor dst_reg */
- end_of_jmp1[-1] = end_of_jmp2 - end_of_jmp1;
- /* populate jmp_offset for JNC above to jump to start_of_ldx */
+ /* populate jmp_offset for JAE above to jump to start_of_ldx */
start_of_ldx = prog;
- end_of_jmp2[-1] = start_of_ldx - end_of_jmp2;
+ end_of_jmp[-1] = start_of_ldx - end_of_jmp;
}
- emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off);
if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
struct exception_table_entry *ex;
u8 *_insn = image + proglen + (start_of_ldx - temp);
@@ -1428,6 +1424,18 @@ st: if (is_imm8(insn->off))
/* populate jmp_offset for JMP above */
start_of_ldx[-1] = prog - start_of_ldx;
+ if (insn->off && src_reg != dst_reg) {
+ /* sub src_reg, insn->off
+ * Restore src_reg after "add src_reg, insn->off" in prev
+ * if statement. But if src_reg == dst_reg, emit_ldx
+ * above already clobbered src_reg, so no need to restore.
+ * If add src_reg, insn->off was unnecessary, no need to
+ * restore either.
+ */
+ maybe_emit_1mod(&prog, src_reg, true);
+ EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off);
+ }
+
if (!bpf_prog->aux->extable)
break;
@@ -1849,62 +1857,59 @@ emit_jmp:
return proglen;
}
-static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
+static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
int stack_size)
{
- int i, j, arg_size, nr_regs;
+ int i, j, arg_size;
+ bool next_same_struct = false;
+
/* Store function arguments to stack.
* For a function that accepts two pointers the sequence will be:
* mov QWORD PTR [rbp-0x10],rdi
* mov QWORD PTR [rbp-0x8],rsi
*/
- for (i = 0, j = 0; i < min(nr_args, 6); i++) {
- if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) {
- nr_regs = (m->arg_size[i] + 7) / 8;
+ for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
+ /* The arg_size is at most 16 bytes, enforced by the verifier. */
+ arg_size = m->arg_size[j];
+ if (arg_size > 8) {
arg_size = 8;
- } else {
- nr_regs = 1;
- arg_size = m->arg_size[i];
+ next_same_struct = !next_same_struct;
}
- while (nr_regs) {
- emit_stx(prog, bytes_to_bpf_size(arg_size),
- BPF_REG_FP,
- j == 5 ? X86_REG_R9 : BPF_REG_1 + j,
- -(stack_size - j * 8));
- nr_regs--;
- j++;
- }
+ emit_stx(prog, bytes_to_bpf_size(arg_size),
+ BPF_REG_FP,
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ -(stack_size - i * 8));
+
+ j = next_same_struct ? j : j + 1;
}
}
-static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
+static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
int stack_size)
{
- int i, j, arg_size, nr_regs;
+ int i, j, arg_size;
+ bool next_same_struct = false;
/* Restore function arguments from stack.
* For a function that accepts two pointers the sequence will be:
* EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
* EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
*/
- for (i = 0, j = 0; i < min(nr_args, 6); i++) {
- if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) {
- nr_regs = (m->arg_size[i] + 7) / 8;
+ for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
+ /* The arg_size is at most 16 bytes, enforced by the verifier. */
+ arg_size = m->arg_size[j];
+ if (arg_size > 8) {
arg_size = 8;
- } else {
- nr_regs = 1;
- arg_size = m->arg_size[i];
+ next_same_struct = !next_same_struct;
}
- while (nr_regs) {
- emit_ldx(prog, bytes_to_bpf_size(arg_size),
- j == 5 ? X86_REG_R9 : BPF_REG_1 + j,
- BPF_REG_FP,
- -(stack_size - j * 8));
- nr_regs--;
- j++;
- }
+ emit_ldx(prog, bytes_to_bpf_size(arg_size),
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ BPF_REG_FP,
+ -(stack_size - i * 8));
+
+ j = next_same_struct ? j : j + 1;
}
}
@@ -2130,8 +2135,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
struct bpf_tramp_links *tlinks,
void *func_addr)
{
- int ret, i, nr_args = m->nr_args, extra_nregs = 0;
- int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off;
+ int i, ret, nr_regs = m->nr_args, stack_size = 0;
+ int regs_off, nregs_off, ip_off, run_ctx_off;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -2140,17 +2145,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
u8 *prog;
bool save_ret;
- /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
- if (nr_args > 6)
- return -ENOTSUPP;
-
- for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
+ /* extra registers for struct arguments */
+ for (i = 0; i < m->nr_args; i++)
if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
- extra_nregs += (m->arg_size[i] + 7) / 8 - 1;
- }
- if (nr_args + extra_nregs > 6)
+ nr_regs += (m->arg_size[i] + 7) / 8 - 1;
+
+ /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
+ if (nr_regs > 6)
return -ENOTSUPP;
- stack_size += extra_nregs * 8;
/* Generated trampoline stack layout:
*
@@ -2164,7 +2166,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
* [ ... ]
* RBP - regs_off [ reg_arg1 ] program's ctx pointer
*
- * RBP - args_off [ arg regs count ] always
+ * RBP - nregs_off [ regs count ] always
*
* RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
*
@@ -2176,11 +2178,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (save_ret)
stack_size += 8;
+ stack_size += nr_regs * 8;
regs_off = stack_size;
- /* args count */
+ /* regs count */
stack_size += 8;
- args_off = stack_size;
+ nregs_off = stack_size;
if (flags & BPF_TRAMP_F_IP_ARG)
stack_size += 8; /* room for IP address argument */
@@ -2213,11 +2216,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
EMIT1(0x53); /* push rbx */
/* Store number of argument registers of the traced function:
- * mov rax, nr_args + extra_nregs
- * mov QWORD PTR [rbp - args_off], rax
+ * mov rax, nr_regs
+ * mov QWORD PTR [rbp - nregs_off], rax
*/
- emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_args + extra_nregs);
- emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -args_off);
+ emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_regs);
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -nregs_off);
if (flags & BPF_TRAMP_F_IP_ARG) {
/* Store IP address of the traced function:
@@ -2228,7 +2231,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off);
}
- save_regs(m, &prog, nr_args, regs_off);
+ save_regs(m, &prog, nr_regs, regs_off);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* arg1: mov rdi, im */
@@ -2258,7 +2261,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
}
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- restore_regs(m, &prog, nr_args, regs_off);
+ restore_regs(m, &prog, nr_regs, regs_off);
if (flags & BPF_TRAMP_F_ORIG_STACK) {
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
@@ -2299,7 +2302,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
}
if (flags & BPF_TRAMP_F_RESTORE_REGS)
- restore_regs(m, &prog, nr_args, regs_off);
+ restore_regs(m, &prog, nr_regs, regs_off);
/* This needs to be done regardless. If there were fmod_ret programs,
* the return value is only updated on the stack and still needs to be
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5cacd4890bab..bb59cc6ddb2d 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -276,6 +276,7 @@ static void __init xen_init_capabilities(void)
setup_clear_cpu_cap(X86_FEATURE_ACC);
setup_clear_cpu_cap(X86_FEATURE_X2APIC);
setup_clear_cpu_cap(X86_FEATURE_SME);
+ setup_clear_cpu_cap(X86_FEATURE_LKGS);
/*
* Xen PV would need some work to support PCID: CR3 handling as well
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8db26f10fb1d..c2be3efb2ba0 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -934,12 +934,8 @@ void xen_enable_syscall(void)
static void __init xen_pvmmu_arch_setup(void)
{
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
- HYPERVISOR_vm_assist(VMASST_CMD_enable,
- VMASST_TYPE_pae_extended_cr3);
-
if (register_callback(CALLBACKTYPE_event,
xen_asm_exc_xen_hypervisor_callback) ||
register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index bd02f9d50107..22fb982ff971 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -21,6 +21,8 @@ void xen_smp_send_reschedule(int cpu);
void xen_smp_send_call_function_ipi(const struct cpumask *mask);
void xen_smp_send_call_function_single_ipi(int cpu);
+void __noreturn xen_cpu_bringup_again(unsigned long stack);
+
struct xen_common_irq {
int irq;
char *name;
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 6175f2c5c822..a9cf8c8fa074 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -381,21 +381,12 @@ static void xen_pv_cpu_die(unsigned int cpu)
}
}
-static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
+static void __noreturn xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
{
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
- cpu_bringup();
- /*
- * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
- * clears certain data that the cpu_idle loop (which called us
- * and that we return from) expects. The only way to get that
- * data back is to call:
- */
- tick_nohz_idle_enter();
- tick_nohz_idle_stop_tick_protected();
-
- cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
+ xen_cpu_bringup_again((unsigned long)task_pt_regs(current));
+ BUG();
}
#else /* !CONFIG_HOTPLUG_CPU */
@@ -409,7 +400,7 @@ static void xen_pv_cpu_die(unsigned int cpu)
BUG();
}
-static void xen_pv_play_dead(void)
+static void __noreturn xen_pv_play_dead(void)
{
BUG();
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 6b8836deb738..1d597364b49d 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -482,15 +482,51 @@ static void xen_setup_vsyscall_time_info(void)
xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
}
+/*
+ * Check if it is possible to safely use the tsc as a clocksource. This is
+ * only true if the hypervisor notifies the guest that its tsc is invariant,
+ * the tsc is stable, and the tsc instruction will never be emulated.
+ */
+static int __init xen_tsc_safe_clocksource(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ if (!(boot_cpu_has(X86_FEATURE_CONSTANT_TSC)))
+ return 0;
+
+ if (!(boot_cpu_has(X86_FEATURE_NONSTOP_TSC)))
+ return 0;
+
+ if (check_tsc_unstable())
+ return 0;
+
+ /* Leaf 4, sub-leaf 0 (0x40000x03) */
+ cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx);
+
+ /* tsc_mode = no_emulate (2) */
+ if (ebx != 2)
+ return 0;
+
+ return 1;
+}
+
static void __init xen_time_init(void)
{
struct pvclock_vcpu_time_info *pvti;
int cpu = smp_processor_id();
struct timespec64 tp;
- /* As Dom0 is never moved, no penalty on using TSC there */
+ /*
+ * As Dom0 is never moved, no penalty on using TSC there.
+ *
+ * If it is possible for the guest to determine that the tsc is a safe
+ * clocksource, then set xen_clocksource rating below that of the tsc
+ * so that the system prefers tsc instead.
+ */
if (xen_initial_domain())
xen_clocksource.rating = 275;
+ else if (xen_tsc_safe_clocksource())
+ xen_clocksource.rating = 299;
clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index ffaa62167f6e..e36ea4268bd2 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -76,6 +76,13 @@ SYM_CODE_START(asm_cpu_bringup_and_idle)
call cpu_bringup_and_idle
SYM_CODE_END(asm_cpu_bringup_and_idle)
+
+SYM_CODE_START(xen_cpu_bringup_again)
+ UNWIND_HINT_FUNC
+ mov %rdi, %rsp
+ UNWIND_HINT_REGS
+ call cpu_bringup_and_idle
+SYM_CODE_END(xen_cpu_bringup_again)
.popsection
#endif
#endif